# Scraping AI Job Board

Code authored by: Shaw Talebi

### imports

In [8]:
import requests
from bs4 import BeautifulSoup
import json
import csv

### request html page

In [2]:
base_url = "https://aijobs.net"

# Send a request to the page
response = requests.get(base_url)
soup = BeautifulSoup(response.text, 'html.parser')

In [3]:
# Find all job links inside <a> tags with hrefs that start with /job/
job_links = []
for a in soup.select('li.list-group-item a.col'):
    href = a.get('href')
    if href and href.startswith('/job/'):
        job_links.append(href)

# Optional: add base URL if needed
full_links = [base_url + link for link in job_links]

In [4]:
full_links

['https://aijobs.net/job/1066507-senior-ai-engineer/',
 'https://aijobs.net/job/1142271-data-engineering-expert-for-power-management/',
 'https://aijobs.net/job/1142270-principal-software-qa-engineer-systems-test/',
 'https://aijobs.net/job/1142269-research-scientist-of-geosciences-attribute-assisted-seismic-processing-interpretation/',
 'https://aijobs.net/job/1142268-senior-software-engineer/',
 'https://aijobs.net/job/1142267-field-geomechanics-ii/',
 'https://aijobs.net/job/1142266-bangalore-sap-basis-technology-consultant-migrationconversion-2-to-15-years-btp-ds/',
 'https://aijobs.net/job/1142265-pre-doctoral-technical-associate-johan-chu/',
 'https://aijobs.net/job/1142264-application-engineer-internship-fm/',
 'https://aijobs.net/job/1142263-research-fellow-diagnostic-imaging-felix-lab/',
 'https://aijobs.net/job/1142262-lead-software-engineer/',
 'https://aijobs.net/job/1142261-data-co-op/',
 'https://aijobs.net/job/1142260-business-inteligence-developer-mf/',
 'https://aijobs

### Extract job details from urls

In [5]:
job_info_list = []
for link in full_links:
    # Send a request to the page
    response = requests.get(link)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find the embedded JSON-LD script
    json_ld_script = soup.find("script", type="application/ld+json")
    try:
        job_data = json.loads(json_ld_script.string)
    except:
        print(link)
    
    # Store extracted fields in a dictionary
    job_info = {
        "title": job_data.get("title"),
        "company": job_data.get("hiringOrganization", {}).get("name"),
        "location": job_data.get("jobLocation", {}).get("address", {}).get("addressLocality"),
        "salary_min": job_data.get("baseSalary", {}).get("value", {}).get("minValue"),
        "salary_max": job_data.get("baseSalary", {}).get("value", {}).get("maxValue"),
        "salary_currency": job_data.get("baseSalary", {}).get("currency"),
        "description": job_data.get("description")
    }

    job_info_list.append(job_info)

https://aijobs.net/job/1142266-bangalore-sap-basis-technology-consultant-migrationconversion-2-to-15-years-btp-ds/
https://aijobs.net/job/1142253-power-market-analyst/
https://aijobs.net/job/1142220-ingenieure-fiabilite/
https://aijobs.net/job/1142206-senior-business-technology-analyst/


In [6]:
job_info

{'title': 'Staff Software Engineer, TPU Performance',
 'company': 'Google',
 'location': 'Sunnyvale, CA, USA',
 'salary_min': 197000,
 'salary_max': 291000,
 'salary_currency': 'USD',
 'description': 'Minimum qualifications: Bachelor&#x27;s degree or equivalent practical experience. 8 years of experience testing, and launching software products. 5 years of experience with software development in one or more programming languages (e.g., Python, C, C++). Experience in performance analysis including system architecture, performance modeling, benchmarking, machine learning infrastructure, or other similar experience. Preferred qualifications: Master’s degree or PhD in Engineering, Computer Science, or a related technical field. 3 years of experience working in an organization including technical leadership, leading project teams, and setting technical direction. Experience with accelerator hardware architectures (e.g., TPUs, GPUs). Experience with ML performance modeling, optimizations, an

### write to .csv

In [12]:
# Define the CSV file name
output_file = "job_listings.csv"

# Write to CSV
with open(output_file, mode="w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=job_info_list[0].keys())
    writer.writeheader()
    writer.writerows(job_info_list)

print(f"Saved {len(job_info_list)} job(s) to {output_file}")

Saved 51 job(s) to job_listings.csv
