In [1]:
import json
import os
import requests
from datetime import datetime
import math
from loguru import logger
import pandas as pd

In [3]:
ADZUNA_APP_ID = '80393870'
ADZUNA_APP_KEY = '875441c752735976f76161157548e0f0'

# Define the API endpoint and base parameters
url = "https://api.adzuna.com/v1/api/jobs/ca/search/"
base_params = {
    'app_id': ADZUNA_APP_ID,
    'app_key': ADZUNA_APP_KEY,
    'results_per_page': 50,  # Maximum allowed results per page
    'what_phrase': "data engineer", # an entire phrase which must be found in the description or title
    'max_days_old': 2,
    'sort_by': "date"
}

# Initialize a list to store all job postings
all_job_postings = []

# Make the first request to determine the total number of pages
logger.info("Making the first request to determine the total number of pages")
response = requests.get(f"{url}1", params=base_params)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()  # Parse the JSON response
    total_results = data['count']  # Get the total number of results
    results_per_page = base_params['results_per_page']
    
    # Calculate the total number of pages
    total_pages = math.ceil(total_results / results_per_page)
    logger.info(f"Total number of page = {total_pages}")
    
    # Store the results from the first page
    all_job_postings.extend(data['results'])

    # Loop through the remaining pages and request data from each
    logger.info("Looping through the remaining pages and request data from each")
    for page in range(2, total_pages + 1):  # Start from page 2 to total_pages
        response = requests.get(f"{url}{page}", params=base_params)
        
        # Check if the request was successful
        if response.status_code == 200:
            page_data = response.json()
            # Append job postings from this page to the list
            all_job_postings.extend(page_data['results'])
        else:
            logger.error(f"Error fetching page {page}: {response.status_code}, {response.text}")
else:
    logger.error(f"Error: {response.status_code}, {response.text}")

# Now all_job_postings contains data from all pages
logger.info(f"Total jobs retrieved: {len(all_job_postings)}")


# Transformation: picking up only necessary fields
parsed_jobs = []
for job in all_job_postings:
      parsed_jobs.append(
        dict(
            job_id = job['id'],
            job_title = job['title'],
            job_location = job['location']['display_name'],
            job_company = job['company']['display_name'],
            job_category = job['category']['label'],
            job_description = job['description'],
            job_url = job['redirect_url'],
            job_created = job['created']
        )
      )

jobs_df = pd.DataFrame.from_dict(parsed_jobs)
jobs_df['job_created'] = pd.to_datetime(jobs_df['job_created'])

logger.info("Done")

[32m2024-09-29 19:51:58.791[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m19[0m - [1mMaking the first request to determine the total number of pages[0m
[32m2024-09-29 19:51:59.309[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m30[0m - [1mTotal number of page = 2[0m
[32m2024-09-29 19:51:59.310[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m36[0m - [1mLooping through the remaining pages and request data from each[0m
[32m2024-09-29 19:51:59.621[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m51[0m - [1mTotal jobs retrieved: 57[0m
[32m2024-09-29 19:51:59.644[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m73[0m - [1mDone[0m


In [4]:
jobs_df.head()

Unnamed: 0,job_id,job_title,job_location,job_company,job_category,job_description,job_url,job_created
0,4881546130,Java Team Lead,"Toronto, Ontario",Harvey Nash,IT Jobs,"Job Title: Java Team LeadLocation: Toronto, On...",https://www.adzuna.ca/details/4881546130?utm_m...,2024-09-29 17:14:44+00:00
1,4881545435,Data Architect,"Fulford, Brome-Missisquoi",BRP,IT Jobs,We are looking for a data architect to join ou...,https://www.adzuna.ca/details/4881545435?utm_m...,2024-09-29 17:14:38+00:00
2,4881543942,Senior Data Engineer 3[Hudi must]_Toronto,"Toronto, Ontario",Luxoft,IT Jobs,"Project Description:Applying to the position, ...",https://www.adzuna.ca/details/4881543942?utm_m...,2024-09-29 17:13:33+00:00
3,4881538032,Azure & Power BI Data Engineer,"Toronto, Ontario",Procom Labs,IT Jobs,Azure & Power BI Data EngineerThe role: In col...,https://www.adzuna.ca/details/4881538032?utm_m...,2024-09-29 17:11:12+00:00
4,4881537272,Senior Python Developer with AWS skills,"Toronto, Ontario",Luxoft,IT Jobs,"Project Description:Applying to the position, ...",https://www.adzuna.ca/details/4881537272?utm_m...,2024-09-29 17:10:59+00:00
