# Scraping AI Job Board (Live Sesssion)

Code authored by: Shaw Talebi

### imports

In [1]:
import requests
from bs4 import BeautifulSoup

### 1) extract job page urls

In [2]:
# Define the base URL of the job board
base_url = "https://aijobs.net"

# Send a GET request to fetch the webpage content
response = requests.get(base_url)

In [5]:
# create soup object
soup = BeautifulSoup(response.text, "html.parser")

# Extract all job post links
job_links = []

for link in soup.find_all("a", class_='col py-2', href=True):
    href = link["href"]
    if href.startswith("/job/"):  # Ensure it's a job posting link
        full_url = base_url + href
        job_links.append(full_url)

### 2) extract job data from each url

Data: job title, company name, salary, location,

In [18]:
def extract_job_data(url):
    """
        Function to extract job data from ai-jobs.net job page
    """
    
    # Send a GET request to fetch the webpage content
    response = requests.get(url)

    # create soup object
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Extract job title
    job_title = soup.find("h1", class_="display-5 mt-4 text-break")
    job_title = job_title.text.strip() if job_title else "N/A"
    
    # Extract company name
    company_name = soup.find("h2", class_="h5")
    company_name = company_name.text.strip() if company_name else "N/A"
    
    # Extract salary
    salary_span = soup.find("span", class_="badge rounded-pill text-bg-success my-1")
    salary = salary_span.text.strip() if salary_span else "N/A"
    
    # Extract location
    location_h3 = soup.find("h3", class_="lead py-3")
    location = location_h3.text.strip() if location_h3 else "N/A"
    
    # Print results
    return {"Job Title": job_title,
        "Company": company_name,
        "Salary": salary,
        "Location": location
       }

In [20]:
job_data_list = []
for url in job_links:
    job_data_list.append(extract_job_data(url))

### 3) write data to .csv

In [23]:
import csv

In [29]:
csv_file_path = "job_data.csv"

# Extract column names from the first dictionary (assuming all have the same keys)
fieldnames = job_data_list[0].keys()

# Write data to CSV
with open(csv_file_path, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    
    # Write header row
    writer.writeheader()
    
    # Write job data rows
    writer.writerows(job_data_list)

print(f"CSV file saved successfully at: {csv_file_path}")

CSV file saved successfully at: job_data.csv


In [26]:
fieldnames

dict_keys(['Job Title', 'Company', 'Salary', 'Location'])