In [0]:
%sql
CREATE SCHEMA bronze;

In [0]:
# Import necessary modules
import requests
import pandas as pd
from pyspark.sql import SparkSession

# API Configuration
API_URL = "https://findwork.dev/api/jobs/"
API_TOKEN = "18c297ab1b0529b4ca1629a2051d8e8d3716f526"

In [0]:
# Define necessary functions, first for fetching jobs from Findwork API, then for loading them into the bronze schema

def fetch_all_jobs():
    """
    Fetch all jobs from the API, handling pagination
    """
    all_jobs = []
    url = API_URL
    
    headers = {
        'Authorization': f'Token {API_TOKEN}'
    }
    
    while url:
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            data = response.json()
            all_jobs.extend(data['results'])
            # Get next page URL
            url = data['next']  
            print(f"Fetched {len(all_jobs)} jobs so far...")
        else:
            print(f"Error: {response.status_code}")
            print(response.text)
            break
    
    return all_jobs


def load_to_database(jobs_data, table_name):
    """
    Load jobs data into end_to_end analytics SQL table
    """
    # Convert to pandas DataFrame
    df_pandas = pd.DataFrame(jobs_data)
    
    # Convert pandas df to Spark DataFrame
    df_spark = spark.createDataFrame(df_pandas)

    # Specify database to load the data into
    full_table_name = f'bronze.{table_name}'
    
    # Write to table (creates if doesn't exist, and appends data if it does)
    df_spark.write.mode("overwrite").saveAsTable(full_table_name)
    
    print(f"Successfully loaded {len(jobs_data)} records to table '{full_table_name}'")
    
    return df_spark

In [0]:
# Main execution
if __name__ == "__main__":
    # Fetch data from API
    print("Fetching jobs from API...")
    jobs = fetch_all_jobs()
    
    # Load to Databricks
    print("Loading data to Database...")
    df = load_to_database(jobs, "jobs_table")

Fetching jobs from API...
Fetched 100 jobs so far...
Fetched 200 jobs so far...
Fetched 300 jobs so far...
Fetched 400 jobs so far...
Fetched 500 jobs so far...
Fetched 600 jobs so far...
Fetched 700 jobs so far...
Fetched 800 jobs so far...
Fetched 900 jobs so far...
Fetched 1000 jobs so far...
Fetched 1100 jobs so far...
Fetched 1200 jobs so far...
Fetched 1300 jobs so far...
Fetched 1400 jobs so far...
Fetched 1492 jobs so far...
Loading data to Database...
Successfully loaded 1492 records to table 'bronze.jobs_table'
