In [1]:
import os
import requests
import jwt
import time
import sqlite3
import json

def save_to_file(data, file_path):
    with open(file_path, 'w') as file:
        # file.write(data)
        json.dump(data, file)

def load_from_file(file_path):
    with open(file_path, 'r') as file:
        return file.read()

def generate_jwt_token(client_id, client_secret):
    # Replace with the appropriate token endpoint URL for your OAuth server
    token_endpoint = "https://web.arbeitsagentur.de/ausbildungssuche/berufsausbildung-suche?sty=0&seite={}&atyp=102&kat=1"

    now = int(time.time())
    payload = {
        "iss": client_id,
        "sub": client_id,
        "aud": token_endpoint,
        "iat": now,
        "exp": now + 3600,  # Token expires in 1 hour
    }

    # Sign the payload with your client secret to generate the JWT
    jwt_token = jwt.encode(payload, client_secret, algorithm='HS256')

    return jwt_token

def get_access_token(client_id, client_secret):
    # Generate the JWT token
    jwt_token = generate_jwt_token(client_id, client_secret)

    # Request the access token using the JWT token
    token_endpoint = "https://rest.arbeitsagentur.de/oauth/gettoken_cc"
    headers = {
        "Content-Type": "application/x-www-form-urlencoded",
    }
    data = {
        "grant_type": "client_credentials",
        "client_id": client_id,
        "client_secret": client_secret
    }

    response = requests.post(token_endpoint, headers=headers, data=data)

    if response.status_code == 200:
        return response.json()["access_token"]
    else:
        raise Exception(f"Failed to get access token. Status code: {response.status_code}")

def get_api_data(api_url, access_token):
    headers = {
        "Authorization": f"Bearer {access_token}",
    }

    response = requests.get(api_url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        if data:  # Check if data is not empty
            return data
        else:
            print("No more data")
    else:
        raise Exception(f"Failed to fetch data from API. Status code: {response.status_code}")

def create_table(db_connection):
    cursor = db_connection.cursor()
    # Create a table if it doesn't exist
    cursor.execute('''CREATE TABLE IF NOT EXISTS umschulung_data
                            (
                                event_id INTEGER PRIMARY KEY,
                                title TEXT NULL,
                                degree_type TEXT NULL,
                                degree_designation TEXT NULL,event_link TEXT NULL,
                                duration TEXT NULL,
                                min_participants INTEGER NULL,
                                max_participants INTEGER NULL,
                                form_of_teaching TEXT NULL,
                                cost_remark TEXT NULL,
                                financial_support TEXT NULL,
                                education_type TEXT NULL,
                                begin TEXT NULL,
                                end TEXT NULL,
                                examining_body TEXT NULL,
                                lesson_timings TEXT NULL,
                                currency TEXT NULL,
                                individual_entry BOOLEAN NULL,
                                type_of_school TEXT NULL,
                                provider_id INTEGER NULL,
                                provider_name TEXT NULL,
                                provider_homepage TEXT NULL,
                                provider_email TEXT NULL,
                                provider_address_id INTEGER NULL,
                                provider_add_name TEXT NULL,
                                provider_address_street TEXT NULL,
                                provider_ort_plz INTEGER NULL,
                                provider_ort_name TEXT NULL,
                                provider_lat DOUBLE NULL,
                                provider_lon DOUBLE NULL,
                                address_id INTEGER NULL,
                                address_name TEXT NULL,
                                training_street TEXT NULL,
                                t_ort_plz INTEGER NULL,
                                t_ort_name TEXT NULL,
                                training_lat DOUBLE NULL,
                                training_lon DOUBLE NULL,
                                update_time INTEGER NULL
                            )
                ''')
    db_connection.commit()

In [2]:
def insert_data_into_db(data, db_connection):
    cursor = db_connection.cursor()
    for entry in data:
        id = entry["id"]
        teilnehmerMin = entry["teilnehmerMin"]
        unterrichtsform = entry["unterrichtsform"]
        pruefendeStelle = entry["pruefendeStelle"]
        unterrichtszeiten = entry["unterrichtszeiten"]
        teilnehmerMax = entry["teilnehmerMax"] 
        kostenBemerkung = entry["kostenBemerkung"]
        beginn = entry["beginn"]
        ende = entry["ende"]
        kostenWaehrung = entry["kostenWaehrung"]
        individuellerEinstieg = entry["individuellerEinstieg"] 
        aktualisierungsdatum = entry["aktualisierungsdatum"]
        dauer = entry["dauer"]
        angebot = entry["angebot"]
        adresse = entry["adresse"]
        koordinatenP = entry["angebot"]["bildungsanbieter"]['adresse']['ortStrasse']['koordinatenPlz']
        
        if(adresse != None):
            cursor.execute('''INSERT INTO umschulung_data (
                    event_id,title, degree_type, degree_designation, 
                    event_link, duration, min_participants, max_participants, form_of_teaching, cost_remark, financial_support, education_type,
                    begin, end,examining_body, lesson_timings, currency, individual_entry, 
                    type_of_school, 
                    provider_id,
                    provider_name, provider_homepage, provider_email,
                    provider_address_id, provider_add_name,
                    provider_address_street,
                    provider_ort_plz, provider_ort_name,
                    provider_lat,
                    provider_lon,
                    address_id, address_name,
                    training_street, t_ort_plz, t_ort_name, training_lat, training_lon, update_time

                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?,
                         ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
                )
            ''',(
                    id, angebot["titel"], angebot["abschlussart"], angebot["abschlussbezeichnung"], angebot["link"], dauer["bezeichnung"], teilnehmerMin, teilnehmerMax,
                    unterrichtsform["bezeichnung"], kostenBemerkung, angebot["foerderung"], angebot["bildungsart"]["bezeichnung"], beginn, ende,
                    pruefendeStelle, unterrichtszeiten, kostenWaehrung, individuellerEinstieg,
                    angebot["schulart"]["bezeichnung"], angebot["bildungsanbieter"]["id"], angebot["bildungsanbieter"]["name"], angebot["bildungsanbieter"]["homepage"],
                    angebot["bildungsanbieter"]["email"], angebot["bildungsanbieter"]["adresse"]["id"], angebot["bildungsanbieter"]["adresse"]["bezeichnung"],
                    angebot["bildungsanbieter"]["adresse"]["strasse"], angebot["bildungsanbieter"]["adresse"]["ortStrasse"]["plz"], angebot["bildungsanbieter"]["adresse"]["ortStrasse"]["name"],
                    koordinatenP["lat"], koordinatenP["lon"], 
                    adresse["id"], adresse["bezeichnung"],
                    adresse["strasse"], adresse["ortStrasse"]["plz"], adresse["ortStrasse"]["name"], adresse["ortStrasse"]["koordinatenPlz"]["lat"],
                    adresse["ortStrasse"]["koordinatenPlz"]["lon"], aktualisierungsdatum
                )
            )
        
        
    db_connection.commit()
    print("insert data")

In [3]:
def main():
    api_url = " https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot"  # Replace with the API endpoint URL
    data_file = "data_json.txt"
    client_id = "1c852184-1944-4a9e-a093-5cc078981294"  # Replace with your OAuth client ID
    client_secret = "777f9915-9f0d-4982-9c33-07b5810a3e79"  # Replace with your OAuth client secret

    # Get the access token using client credentials flow with JWTs
    access_token = get_access_token(client_id, client_secret)

    try:
        # Open a connection
        db_connection = sqlite3.connect('weiterbildung_analysis.db')
        
        # Create the table if not exists
        create_table(db_connection)

        # Pagination: Fetch all data from the API using multiple requests
        all_data = []
        page = 0
        totalPages = 2
        while page < totalPages:
            paginated_api_url = f"{api_url}?page={page}&size=20"
            print(paginated_api_url)
            api_data = get_api_data(paginated_api_url, access_token)
            totalPages = api_data["page"]["totalPages"]
            # save_to_file(api_data, data_file)
            # Insert data into the database
            insert_data_into_db(list(api_data["_embedded"]["termine"]), db_connection)
            # all_data.extend(api_data["_embedded"]["termine"])
            page += 1
        
        
        
        # # Insert data into the database
        # insert_data_into_db(all_data, db_connection)

    except Exception as e:
        print(f"Error: {e}")
        db_connection.rollback()  # Rollback changes in case of an error
    
    finally:
        # Close the connection
        db_connection.close()
        print("Data fetched and saved to SQLite database successfully.")


if __name__ == "__main__":
    main()

 https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?page=0&size=20
insert data
 https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?page=1&size=20
insert data
 https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?page=2&size=20
insert data
 https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?page=3&size=20
insert data
 https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?page=4&size=20
insert data
 https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?page=5&size=20
insert data
 https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?page=6&size=20
insert data
 https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?page=7&size=20
insert data
 https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?page=8&size=20
insert data
 https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangeb