In [10]:
import os
import requests
import jwt
import time
import sqlite3
import json


def save_to_file(data, file_path):
    with open(file_path, 'w') as file:
        # file.write(data)
        json.dump(data, file)

def load_from_file(file_path):
    with open(file_path, 'r') as file:
        return file.read()

def generate_jwt_token(client_id, client_secret):
    # Replace with the appropriate token endpoint URL for your OAuth server
    #token_endpoint = "https://web.arbeitsagentur.de/weiterbildungssuche/suche"
    #token_endpoint = "https://www.arbeitsagentur.de/jobsuche/suche?angebotsart=1" #Work
    token_endpoint = "https://www.arbeitsagentur.de/jobsuche/"
    now = int(time.time())
    payload = {
        "iss": client_id,
        "sub": client_id,
        "aud": token_endpoint,
        "iat": now,
        "exp": now + 3600,  # Token expires in 1 hour
    }

    # Sign the payload with your client secret to generate the JWT
    jwt_token = jwt.encode(payload, client_secret, algorithm='HS256')

    return jwt_token

def get_access_token(client_id, client_secret):
    # Generate the JWT token
    jwt_token = generate_jwt_token(client_id, client_secret)

    # Request the access token using the JWT token
    token_endpoint = "https://rest.arbeitsagentur.de/oauth/gettoken_cc"
    headers = {
        "Content-Type": "application/x-www-form-urlencoded",
    }
    data = {
        "grant_type": "client_credentials",
        "client_id": client_id,
        "client_secret": client_secret
    }

    response = requests.post(token_endpoint, headers=headers, data=data)

    if response.status_code == 200:
        return response.json()["access_token"]
    else:
        raise Exception(f"Failed to get access token. Status code: {response.status_code}")

def get_api_data(api_url, access_token):
    headers = {
        "Authorization": f"Bearer {access_token}",
    }

    response = requests.get(api_url, headers=headers)
    
    if response.status_code == 200:
        data = response.json()
        print(data)
        
        if data:  # Check if data is not empty
            return data
        else:
            print("No more data")
    else:
        raise Exception(f"Failed to fetch data from API. Status code: {response.status_code}")

def create_table(db_connection):
    cursor = db_connection.cursor()
    # Create a table if it doesn't exist
    cursor.execute('''CREATE TABLE IF NOT EXISTS job_offers (
    beruf TEXT NULL,
    titel TEXT NULL,
    refnr TEXT NULL,
    plz REAL NULL,
    ort TEXT NULL,
    strasse TEXT NULL,
    region TEXT NULL,    
    land TEXT NULL,
    lat REAL NULL,
    lon REAL NULL,
    arbeitgeber TEXT NULL,
    aktuelleVeroeffentlichungsdatum DATE,
    modifikationsTimestamp TIMESTAMP,
    eintrittsdatum DATE,
    logoHashId TEXT NULL,
    hashId TEXT NULL )''')
    db_connection.commit()


def insert_data_into_db(data, db_connection):
    print(len(data))
    cursor = db_connection.cursor()
    for entry in data["stellenangebote"]:
        beruf = entry.get("beruf", "")  #beruf = entry["beruf"]
        titel = entry.get("titel", "")  #entry["titel"]
        refnr = entry["refnr"]    
        arbeitgeber = entry.get("arbeitgeber", "")   #entry["arbeitgeber"]
        aktuelleVeroeffentlichungsdatum = entry["aktuelleVeroeffentlichungsdatum"]
        modifikationsTimestamp = entry["modifikationsTimestamp"]
        eintrittsdatum = entry["eintrittsdatum"]
        logoHashId = entry.get("logoHashId", "")
        hashId = entry.get("hashId", "")

        # Access the "arbeitsort" dictionary within the current job entry
        #arbeitsort = entry["arbeitsort"]
        arbeitsort = entry.get("arbeitsort", {})  # Use an empty dictionary as default
        plz = arbeitsort.get("plz")
        ort = arbeitsort.get("ort")
        strasse = arbeitsort.get("strasse")
        region = arbeitsort.get("region")
        land = arbeitsort.get("land")
        lat = arbeitsort.get("koordinaten", {}).get("lat")
        lon = arbeitsort.get("koordinaten", {}).get("lon")
        

        # Insert data into the table
        cursor.execute('''INSERT INTO job_offers (
                    beruf,
                    titel,
                    refnr,
                    plz,
                    ort,
                    strasse,   
                    region,
                    land,
                    lat,
                    lon,
                    arbeitgeber,
                    aktuelleVeroeffentlichungsdatum,
                    modifikationsTimestamp,
                    eintrittsdatum,
                    logoHashId,
                    hashId
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,?)
            ''', (
                beruf,
                titel,
                refnr,
                plz,
                ort,
                strasse,
                region,
                land,
                lat,
                lon,
                arbeitgeber,
                aktuelleVeroeffentlichungsdatum,
                modifikationsTimestamp,
                eintrittsdatum,
                logoHashId,
                hashId
            ))
    db_connection.commit()
    print("insert data")


def main():
    #api_url = "https://rest.arbeitsagentur.de/infosysbub/wbsuche/pc/v1/bildungsangebot"  # Replace with the API endpoint URL
    #api_url = "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/app/jobs"
    api_url = "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/jobs?angebotsart=1"
    data_file = "data_json.txt"
    client_id = "c003a37f-024f-462a-b36d-b001be4cd24a"  # Replace with your OAuth client ID
    client_secret = "32a39620-32b3-4307-9aa1-511e3d7f48a8"  # Replace with your OAuth client secret
    #print(data_file)
    # Get the access token using client credentials flow with JWTs
    access_token = get_access_token(client_id, client_secret)
    
    try:
        # Open a connection
        db_connection = sqlite3.connect('weiterbildung_sample6.db')
        
        # Create the table if not exists
        create_table(db_connection)

        # Pagination: Fetch all data from the API using multiple requests
        all_data = []
        page = 0
        totalPages = 1
        # Inside the loop that fetches data from the API
        while page < totalPages:
            paginated_api_url = f"{api_url}"#&page={page}&size=10"
            print(paginated_api_url)
            api_data = get_api_data(paginated_api_url, access_token)
            #totalPages = api_data["page"]["totalPages"]
            # Insert data into the database
            insert_data_into_db(api_data, db_connection)  # Insert each page of data
            all_data.extend(api_data["stellenangebote"])   # Extend the all_data list
            page += 1

        # After the loop, insert all_data into the database
        insert_data_into_db(all_data, db_connection)
        print(insert_data_into_db)


    except Exception as e:
        print(f"Error: {e}")
        db_connection.rollback()  # Rollback changes in case of an error
    
    finally:
        # Close the connection
        db_connection.close()
        print("Data fetched and saved to SQLite database successfully.")


    #api_url = "https://rest.arbeitsagentur.de/infosysbub/wbsuche/pc/v1/bildungsangebot"  # Replace with the API endpoint URL
    #api_url = "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/app/jobs"
    api_url2 = "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/jobs?angebotsart=2"
    data_file = "data_json.txt"
    client_id = "c003a37f-024f-462a-b36d-b001be4cd24a"  # Replace with your OAuth client ID
    client_secret = "32a39620-32b3-4307-9aa1-511e3d7f48a8"  # Replace with your OAuth client secret
    #print(data_file)
    # Get the access token using client credentials flow with JWTs
    access_token = get_access_token(client_id, client_secret)
    
    try:
        # Open a connection
        db_connection = sqlite3.connect('weiterbildung_sample6.db')
        
        # Create the table if not exists
        create_table(db_connection)

        # Pagination: Fetch all data from the API using multiple requests
        all_data = []
        page = 0
        totalPages = 1
        # Inside the loop that fetches data from the API
        while page < totalPages:
            paginated_api_url = f"{api_url2}"#&page={page}&size=10"
            print(paginated_api_url)
            api_data = get_api_data(paginated_api_url, access_token)
            #totalPages = api_data["page"]["totalPages"]
            # Insert data into the database
            insert_data_into_db(api_data, db_connection)  # Insert each page of data
            all_data.extend(api_data["stellenangebote"])   # Extend the all_data list
            page += 1

        # After the loop, insert all_data into the database
        insert_data_into_db(all_data, db_connection)
        print(insert_data_into_db)


    except Exception as e:
        print(f"Error: {e}")
        db_connection.rollback()  # Rollback changes in case of an error
    
    finally:
        # Close the connection
        db_connection.close()
        print("Data fetched and saved to SQLite database successfully.")


    #api_url = "https://rest.arbeitsagentur.de/infosysbub/wbsuche/pc/v1/bildungsangebot"  # Replace with the API endpoint URL
    #api_url = "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/app/jobs"
    api_url4 = "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/jobs?angebotsart=4"
    data_file = "data_json.txt"
    client_id = "c003a37f-024f-462a-b36d-b001be4cd24a"  # Replace with your OAuth client ID
    client_secret = "32a39620-32b3-4307-9aa1-511e3d7f48a8"  # Replace with your OAuth client secret
    #print(data_file)
    # Get the access token using client credentials flow with JWTs
    access_token = get_access_token(client_id, client_secret)
    
    try:
        # Open a connection
        db_connection = sqlite3.connect('weiterbildung_sample6.db')
        
        # Create the table if not exists
        create_table(db_connection)

        # Pagination: Fetch all data from the API using multiple requests
        all_data = []
        page = 0
        totalPages = 1
        # Inside the loop that fetches data from the API
        while page < totalPages:
            paginated_api_url = f"{api_url4}"#&page={page}&size=10"
            print(paginated_api_url)
            api_data = get_api_data(paginated_api_url, access_token)
            #totalPages = api_data["page"]["totalPages"]
            # Insert data into the database
            insert_data_into_db(api_data, db_connection)  # Insert each page of data
            all_data.extend(api_data["stellenangebote"])   # Extend the all_data list
            page += 1

        # After the loop, insert all_data into the database
        insert_data_into_db(all_data, db_connection)
        print(insert_data_into_db)


    except Exception as e:
        print(f"Error: {e}")
        db_connection.rollback()  # Rollback changes in case of an error
    
    finally:
        # Close the connection
        db_connection.close()
        print("Data fetched and saved to SQLite database successfully.")

    #api_url = "https://rest.arbeitsagentur.de/infosysbub/wbsuche/pc/v1/bildungsangebot"  # Replace with the API endpoint URL
    #api_url = "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/app/jobs"
    api_url34 = "https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/jobs?angebotsart=34"
    data_file = "data_json.txt"
    client_id = "c003a37f-024f-462a-b36d-b001be4cd24a"  # Replace with your OAuth client ID
    client_secret = "32a39620-32b3-4307-9aa1-511e3d7f48a8"  # Replace with your OAuth client secret
    #print(data_file)
    # Get the access token using client credentials flow with JWTs
    access_token = get_access_token(client_id, client_secret)
    
    try:
        # Open a connection
        db_connection = sqlite3.connect('weiterbildung_sample6.db')
        
        # Create the table if not exists
        create_table(db_connection)

        # Pagination: Fetch all data from the API using multiple requests
        all_data = []
        page = 0
        totalPages = 1
        # Inside the loop that fetches data from the API
        while page < totalPages:
            paginated_api_url = f"{api_url34}"#&page={page}&size=10"
            print(paginated_api_url)
            api_data = get_api_data(paginated_api_url, access_token)
            #totalPages = api_data["page"]["totalPages"]
            # Insert data into the database
            insert_data_into_db(api_data, db_connection)  # Insert each page of data
            all_data.extend(api_data["stellenangebote"])   # Extend the all_data list
            page += 1

        # After the loop, insert all_data into the database
        insert_data_into_db(all_data, db_connection)
        print(insert_data_into_db)


    except Exception as e:
        print(f"Error: {e}")
        db_connection.rollback()  # Rollback changes in case of an error
    
    finally:
        # Close the connection
        db_connection.close()
        print("Data fetched and saved to SQLite database successfully.")


if __name__ == "__main__":
    main()


https://rest.arbeitsagentur.de/jobboerse/jobsuche-service/pc/v4/jobs?angebotsart=1
{'stellenangebote': [{'beruf': 'Master of Business Administration', 'titel': 'Junior Spezialist HR Strategie - Arbeitszeit (m/w/d)', 'refnr': '11850-225621-1522481-0-S', 'arbeitsort': {'plz': '80788', 'ort': 'München', 'region': 'Bayern', 'land': 'Deutschland', 'koordinaten': {'lat': 48.1521045, 'lon': 11.5502023}}, 'arbeitgeber': 'Orizon GmbH München-Süd', 'aktuelleVeroeffentlichungsdatum': '2023-08-13', 'modifikationsTimestamp': '2023-08-11T18:00:07.596', 'eintrittsdatum': '2023-10-01', 'logoHashId': '9VLJtbiu3WLfmalceI1z_El7pkOmGL0HZn3cmF0wQPg=', 'hashId': '0Q3Jsp9XIBAXIEAiZ9llmeTbzBrAcUHEpcdUWJKWbbE='}, {'beruf': 'Betriebswirt/in (Fachschule) - Produktionswirtschaft', 'titel': 'Fertigungsplaner (m/w/d)', 'refnr': '11850-225507-1522348-0-S', 'arbeitsort': {'plz': '07745', 'ort': 'Jena', 'region': 'Thüringen', 'land': 'Deutschland', 'koordinaten': {'lat': 50.9034275, 'lon': 11.5717884}}, 'arbeitgeber':