# AI Job Board Scraper

Code authored by: Siva

In [None]:
!mkdir data

In [3]:
!pip install ipykernel jupyterlab numpy pandas plotly streamlit requests beautifulsoup4 pyngrok --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m94.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m89.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.7/76.7 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.7/59.7 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m94.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m54.6 MB/s[0m eta [36m0:00:00[0m
[?25h

### imports

In [4]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import re
import pandas as pd

### grab job urls from webpage

In [5]:
base_url = "https://aijobs.ai/united-states"

In [6]:
res = requests.get(base_url)
soup = BeautifulSoup(res.text, "html.parser")

In [7]:
# find all <a> tags where the href contains "/job/"
a_tags = soup.select('a[href*="/job/"]')

# Step 2: go through each <a> tag and get the link
job_links = []
for tag in a_tags:
    href = tag.get("href")   # get the link from the tag
    if href:                 # make sure it’s not empty
        full_url = urljoin(base_url, href)  # turn relative into absolute
        job_links.append(full_url)

In [8]:
job_links

['https://aijobs.ai/job/bioacoustics-machine-learning-post-doctoral-associate',
 'https://aijobs.ai/job/ai-fellows-12-month-lte',
 'https://aijobs.ai/job/senior-software-engineer-agents',
 'https://aijobs.ai/job/lead-software-engineer-ml-backend',
 'https://aijobs.ai/job/staff-engineer-software-autonomy-applications-r3166-3',
 'https://aijobs.ai/job/ml-application-security-engineer',
 'https://aijobs.ai/job/solutions-engineering-senior-manager',
 'https://aijobs.ai/job/ml-engineer-llm-evaluation',
 'https://aijobs.ai/job/ml-engineer-llm-safety',
 'https://aijobs.ai/job/ml-engineer-llm-privacy',
 'https://aijobs.ai/job/ml-research-scientist-llm-safety',
 'https://aijobs.ai/job/staff-software-engineer-34',
 'https://aijobs.ai/job/ml-research-engineer-llm-safety',
 'https://aijobs.ai/job/technical-product-manager-47',
 'https://aijobs.ai/job/study-participant-on-site-research-study',
 'https://aijobs.ai/job/machine-learning-engineer-graduate-e-commerce-supply-chain-logistics-2026-start-ph

### scrape job data from urls

In [9]:
def get_job_data(job_link):
    """
    Scrape job details from a given job posting URL.

    Args:
        job_link (str): The full URL of the job posting page.

    Returns:
        dict: A dictionary containing:
            - "Job Title" (str or None): The job title text, if found.
            - "Job Description" (str or None): The job description text,
              with line breaks preserved, if found.
            - "Salary Range" (str or None): The salary range as a formatted string
              (e.g., "$50,000 - $70,000"), or a single value if only one number is found,
              or None if no salary is listed.
    """

    # create soup object
    res = requests.get(job_link)
    soup = BeautifulSoup(res.text, "html.parser")

    # get job title
    title_el = soup.select_one(".post-main-title2")
    job_title = title_el.get_text(strip=True) if title_el else None

    # get job description
    desc_el = soup.select_one(".job-description-container")
    job_description = desc_el.get_text(separator="\n", strip=True) if desc_el else None

    # get salary
    salary_el = soup.select_one(".salery h2")   # note: the site spells it "salery"
    salary_range = None
    if salary_el:
        text = salary_el.get_text(strip=True)
        numbers = re.findall(r"\d[\d,]*", text)  # find all numbers
        if len(numbers) >= 2:
            salary_range = f"${numbers[0]} - ${numbers[1]}"
        elif len(numbers) == 1:
            salary_range = f"${numbers[0]}"


    return {
        "Job Title": job_title,
        "Job Description": job_description,
        "Salary Range:": salary_range
    }

In [10]:
job_data_list = []
for job_link in job_links:
    job_data_list.append(get_job_data(job_link))

In [11]:
job_data_list

[{'Job Title': 'Bioacoustics Machine Learning Post Doctoral Associate',
  'Job Description': "HOW YOU WILL IMPACT OUR MISSION\nThe Post-Doctoral Associate oversees research projects and performs a variety of assigned research and diagnostic activities, while completing post-doctoral training in a specialized area of scientific research. This position reports to Scientist level or higher in the Conservation Science & Wildlife Health department.\nWHAT YOU WILL DO\nOversee assigned research projects.\nCollect, process, and enters research data.\nPerform experiments and measurements.\nInterpret and analyze research results and revises procedures as necessary.\nEstablish and maintain databases.\nDocument research methodology and results.\nPrepare progress reports.\nConduct and oversee investigations of study subjects.\nWHAT WE ARE LOOKING FOR\nThe San Diego Zoo Wildlife Alliance Conservation Technology Lab seeks a postdoctoral associate for a three-year position applying their machine learn

### save job data to file

In [12]:
df = pd.DataFrame(job_data_list)

In [13]:
df.head(10)

Unnamed: 0,Job Title,Job Description,Salary Range:
0,Bioacoustics Machine Learning Post Doctoral As...,HOW YOU WILL IMPACT OUR MISSION\nThe Post-Doct...,"$87,264"
1,AI Fellows (12-month LTE),AI Fellows (12-month LTE*)\nLocation:\nSeattle...,
2,Senior Software Engineer - Agents,About xAI\nxAI’s mission is to create AI syste...,"$180,000 - $440,000"
3,"Lead Software Engineer, ML Backend","At Dynamo AI, our mission is to empower every ...",
4,"Staff Engineer, Software Autonomy Applications...","Founded in 2015, Shield AI is a venture-backed...",
5,ML Application Security Engineer,"At\nDynamo AI\n, we believe that LLMs must be ...",
6,Solutions Engineering Senior Manager,We are seeking a strategic and innovative Solu...,
7,ML Engineer — LLM Evaluation,"At\nDynamo AI\n, we believe that LLMs must be ...",
8,ML Engineer – LLM Safety,"At\nDynamo AI\n, we believe that LLMs must be ...",
9,ML Engineer — LLM Privacy,"At\nDynamo AI\n, we believe that LLMs must be ...",


In [14]:
df.to_csv('data/job_data.csv')

In [20]:
import pandas as pd
sample_data = [
    {"Job Title": "AI Engineer", "Job Description": "Build AI models.", "Salary Range:": "$100,000 - $400,000"},
    # Add more rows as needed
]
pd.DataFrame(sample_data).to_csv('data/job_data.csv', index=False)

In [22]:
import pandas as pd

sample_data = [
    {"Job Title": "AI Engineer", "Job Description": "Build AI models using Python, TensorFlow, and Pandas for predictive analytics in e-commerce.", "Salary Range:": "$100,000 - $200,000"},
    {"Job Title": "Machine Learning Specialist", "Job Description": "Develop ML algorithms with PyTorch and deploy on AWS for real-time fraud detection.", "Salary Range:": "$120,000 - $180,000"},
    {"Job Title": "NLP Researcher", "Job Description": "Research advanced NLP models using BERT, GPT, and LangChain for conversational AI.", "Salary Range:": "$110,000 - $160,000"},
    {"Job Title": "Computer Vision Engineer", "Job Description": "Implement computer vision solutions with OpenCV and deep learning for autonomous vehicles.", "Salary Range:": "$130,000 - $190,000"},
    {"Job Title": "Data Scientist", "Job Description": "Analyze large datasets with SQL, Pandas, and NumPy to derive business insights.", "Salary Range:": "$90,000 - $150,000"},
    {"Job Title": "MLOps Engineer", "Job Description": "Manage ML pipelines using Docker, Kubernetes, and MLOps tools for scalable deployments.", "Salary Range:": "$140,000 - $210,000"},
    {"Job Title": "AI Ethics Specialist", "Job Description": "Evaluate AI models for bias using statistical methods and ethical frameworks.", "Salary Range:": "$95,000 - $145,000"},
    {"Job Title": "Reinforcement Learning Expert", "Job Description": "Design RL agents with Python and TensorFlow for optimization in robotics.", "Salary Range:": "$125,000 - $185,000"},
    {"Job Title": "Bioinformatics AI Analyst", "Job Description": "Apply ML to genomic data using BioPython and deep learning for drug discovery.", "Salary Range:": "$105,000 - $155,000"},
    {"Job Title": "Edge Computing AI Developer", "Job Description": "Optimize AI models for edge devices with TensorFlow Lite and embedded systems.", "Salary Range:": "$115,000 - $170,000"},
    {"Job Title": "Generative AI Artist", "Job Description": "Create generative models for art using Stable Diffusion, GANs, and Python.", "Salary Range:": "$85,000 - $135,000"},
    {"Job Title": "AI Security Engineer", "Job Description": "Secure AI systems against adversarial attacks using robust ML techniques.", "Salary Range:": "$135,000 - $195,000"},
    {"Job Title": "Healthcare AI Consultant", "Job Description": "Develop predictive models for patient outcomes using EHR data and scikit-learn.", "Salary Range:": "$100,000 - $160,000"},
    {"Job Title": "Autonomous Systems Engineer", "Job Description": "Build AI for self-driving tech using sensor fusion, computer vision, and ROS.", "Salary Range:": "$145,000 - $220,000"},
    {"Job Title": "Financial AI Modeler", "Job Description": "Model financial risks with time-series forecasting and LSTM networks in Python.", "Salary Range:": "$110,000 - $165,000"},
    {"Job Title": "Environmental AI Analyst", "Job Description": "Analyze satellite data with ML to predict climate patterns using Python and GCP.", "Salary Range:": "$95,000 - $140,000"},
    {"Job Title": "Educational AI Developer", "Job Description": "Create adaptive learning systems with recommendation engines and NLP.", "Salary Range:": "$90,000 - $130,000"},
    {"Job Title": "Retail Analytics Specialist", "Job Description": "Optimize inventory with demand forecasting using Prophet and regression models.", "Salary Range:": "$105,000 - $150,000"},
    {"Job Title": "Game AI Programmer", "Job Description": "Implement NPC behaviors using behavior trees and reinforcement learning in C++.", "Salary Range:": "$120,000 - $175,000"},
    {"Job Title": "Aerospace AI Engineer", "Job Description": "Develop AI for satellite navigation and anomaly detection using Python and AWS.", "Salary Range:": "$130,000 - $190,000"}
]

pd.DataFrame(sample_data).to_csv('data/job_data.csv', index=False)
print("Sample job_data.csv created with 20 entries")

Sample job_data.csv created with 20 entries


In [23]:
!python ai-job-dashboard-gradio.py

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://cb23610fea222db355.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
Keyboard interruption in main thread... closing server.
Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/gradio/blocks.py", line 2958, in block_thread
    time.sleep(0.1)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/content/ai-job-dashboard-gradio.py", line 242, in <module>
    gradio_interface().launch(share=True)
  File "/usr/local/lib/python3.12/dist-packages/gradio/blocks.py", line 2865, in launch
    self.block_thread()
  File "/usr/local/lib/python3.12/dist-packages/gradio/blocks.py", line 2962, in block_thread
    self.server.close()
  File "/usr