# AI Job Scraper
### ABB #8 - Session 1

Code authored by: Shaw Talebi

In [1]:
# Imports
import requests
from bs4 import BeautifulSoup
import csv
import time
import re

In [2]:
# Constants
BASE_URL = "https://aijobs.ai"
LISTING_URL = "https://aijobs.ai/united-states"
NUM_PAGES = 5
REQUEST_DELAY = 2  # seconds between requests

# Headers to avoid being blocked
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
}

In [3]:
# Step 1: Collect all job links from listing pages
job_links = []

for page in range(1, NUM_PAGES + 1):
    url = f"{LISTING_URL}?page={page}"
    print(f"Fetching page {page}...")
    
    # Fetch the page
    response = requests.get(url, headers=HEADERS)
    html = response.text
    
    # Parse with BeautifulSoup
    soup = BeautifulSoup(html, "html.parser")
    
    # Find all job cards (they have class 'jobcardStyle1')
    job_cards = soup.find_all("a", class_="jobcardStyle1")
    
    # Extract links
    for card in job_cards:
        href = card.get("href")
        if href:
            full_url = BASE_URL + href if href.startswith("/") else href
            job_links.append(full_url)
    
    print(f"  Found {len(job_cards)} jobs")
    time.sleep(REQUEST_DELAY)

# Remove duplicates
job_links = list(dict.fromkeys(job_links))
print(f"\nTotal unique jobs: {len(job_links)}")

Fetching page 1...
  Found 20 jobs
Fetching page 2...
  Found 20 jobs
Fetching page 3...
  Found 20 jobs
Fetching page 4...
  Found 20 jobs
Fetching page 5...
  Found 20 jobs

Total unique jobs: 100


In [4]:
# Step 2: Scrape details from each job page
jobs = []

for i, job_url in enumerate(job_links, 1):
    print(f"[{i}/{len(job_links)}] Scraping {job_url}")
    
    try:
        # Fetch job detail page
        response = requests.get(job_url, headers=HEADERS)
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Extract title
        title_elem = soup.find("div", class_="post-main-title2")
        title = title_elem.get_text(strip=True) if title_elem else ""
        
        # Extract company (from post-info2, after "at")
        company = ""
        post_info = soup.find("div", class_="post-info2")
        if post_info:
            info_text = post_info.get_text()
            if " at " in info_text:
                company = info_text.split(" at ")[1].split("\n")[0].strip()
        
        # Extract salary (from div.salery h2)
        salary_text = ""
        salary_elem = soup.find("div", class_="salery")
        if salary_elem:
            h2 = salary_elem.find("h2")
            if h2:
                salary_text = h2.get_text(strip=True)
        
        # Parse salary into min/max
        min_salary, max_salary = None, None
        amounts = re.findall(r"\$[\d,]+", salary_text)
        if amounts:
            parsed = [int(a.replace("$", "").replace(",", "")) for a in amounts]
            min_salary = min(parsed)
            max_salary = max(parsed)
        
        # Extract description
        desc_elem = soup.find("div", class_="job-description-container")
        description = desc_elem.get_text(strip=True) if desc_elem else ""
        
        # Store job data
        jobs.append({
            "title": title,
            "company": company,
            "min_salary": min_salary,
            "max_salary": max_salary,
            "url": job_url,
            "description": description
        })
        
    except Exception as e:
        print(f"  Error: {e}")
    
    time.sleep(REQUEST_DELAY)

print(f"\nScraped {len(jobs)} jobs successfully")

[1/100] Scraping https://aijobs.ai/job/data-scientist-cheminformatics
[2/100] Scraping https://aijobs.ai/job/data-scientist-rna-biology
[3/100] Scraping https://aijobs.ai/job/scientist-machine-learning
[4/100] Scraping https://aijobs.ai/job/chief-operating-officer-coo
[5/100] Scraping https://aijobs.ai/job/associate-transfer-pricing-practice
[6/100] Scraping https://aijobs.ai/job/principal-recruiter
[7/100] Scraping https://aijobs.ai/job/software-engineer-i-ai-platform
[8/100] Scraping https://aijobs.ai/job/machine-learning-scientist-156
[9/100] Scraping https://aijobs.ai/job/director-data-and-ai-alliances
[10/100] Scraping https://aijobs.ai/job/senior-finance-manager-us
[11/100] Scraping https://aijobs.ai/job/data-science-analyst
[12/100] Scraping https://aijobs.ai/job/director-data-and-ai-alliances-2
[13/100] Scraping https://aijobs.ai/job/data-scientist-7117
[14/100] Scraping https://aijobs.ai/job/software-engineer-ii-ai-enablement
[15/100] Scraping https://aijobs.ai/job/director-da

In [5]:
# Step 3: Save to CSV
filename = "jobs.csv"
fieldnames = ["title", "company", "min_salary", "max_salary", "url", "description"]

with open(filename, "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(jobs)

print(f"Saved {len(jobs)} jobs to {filename}")

# Show summary
with_salary = sum(1 for j in jobs if j["min_salary"] is not None)
print(f"Jobs with salary info: {with_salary}/{len(jobs)}")

Saved 100 jobs to jobs.csv
Jobs with salary info: 35/100


In [6]:
# Preview the first few jobs
for job in jobs[:3]:
    print(f"Title: {job['title']}")
    print(f"Company: {job['company']}")
    if job['min_salary']:
        print(f"Salary: ${job['min_salary']:,} - ${job['max_salary']:,}")
    else:
        print("Salary: Not specified")
    print("-" * 50)

Title: Data Scientist, Cheminformatics
Company: 
Salary: $135,000 - $180,000
--------------------------------------------------
Title: Data Scientist, RNA Biology
Company: 
Salary: $135,000 - $180,000
--------------------------------------------------
Title: Scientist, Machine Learning
Company: 
Salary: $170,000 - $230,000
--------------------------------------------------
