<font size= "4">***CODE FOR FETCHING DATA FROM API FOR RETRAININGS (Umschulung)***</font>

In [3]:
import os
import requests
import jwt
import time
import sqlite3
import json
import re
from datetime import datetime , timedelta

def save_to_file(data, file_path):
    with open(file_path, 'w') as file:
        # file.write(data)
        json.dump(data, file)

def load_from_file(file_path):
    with open(file_path, 'r') as file:
        return file.read()

def generate_jwt_token(client_id, client_secret):
    # Replace with the appropriate token endpoint URL for your OAuth server
    token_endpoint = "https://web.arbeitsagentur.de/ausbildungssuche/berufsausbildung-suche?sty=0&atyp=109&kat=1"

    now = int(time.time())
    payload = {
        "iss": client_id,
        "sub": client_id,
        "aud": token_endpoint,
        "iat": now,
        "exp": now + 3600,  # Token expires in 1 hour
    }

    # Sign the payload with your client secret to generate the JWT
    jwt_token = jwt.encode(payload, client_secret, algorithm='HS256')

    return jwt_token

def get_access_token(client_id, client_secret):
    # Generate the JWT token
    jwt_token = generate_jwt_token(client_id, client_secret)

    # Request the access token using the JWT token
    token_endpoint = "https://rest.arbeitsagentur.de/oauth/gettoken_cc"
    headers = {
        "Content-Type": "application/x-www-form-urlencoded",
    }
    data = {
        "grant_type": "client_credentials",
        "client_id": client_id,
        "client_secret": client_secret
    }

    response = requests.post(token_endpoint, headers=headers, data=data)

    if response.status_code == 200:
        return response.json()["access_token"]
    else:
        raise Exception(f"Failed to get access token. Status code: {response.status_code}")

def get_api_data(api_url, access_token):
    headers = {
        "Authorization": f"Bearer {access_token}",
    }

    response = requests.get(api_url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        if data:  # Check if data is not empty
            return data
        else:
            print("No more data")
    else:
        raise Exception(f"Failed to fetch data from API. Status code: {response.status_code}")
    
def create_table(db_connection):
    cursor = db_connection.cursor()
    # Create a table if it doesn't exist
    cursor.execute('''CREATE TABLE IF NOT EXISTS umschulung
                            (
                                veranstaltungs_ID INT PRIMARY KEY,
                                trainingstitel TEXT NULL,
                                dauer TEXT,
                                unterrichtsform TEXT,
                                abschlussart TEXT,
                                abschlussbezeichnung TEXT,
                                finanzielle_unterstützung  TEXT,
                                zugangsinformationen TEXT,
                                zielgruppe TEXT,
                                kredit TEXT,
                                anbieter_ID INT,
                                bildungsanbieter TEXT,
                                telefonVorwahl INT,
                                telefonDurchwahl INT,
                                mobilVorwahl INT,
                                mobilDurchwahl  INT,
                                faxVorwahl INT,
                                faxDurchwahl INT,
                                provider_homepage TEXT,
                                provider_email TEXT,
                                latitude REAL,
                                longitude REAL,
                                stadt TEXT,
                                land TEXT,
                                land_code TEXT ,
                                training_type TEXT,
                                schulart TEXT,
                                unterrichtszeiten TEXT,
                                kostenWert TEXT,
                                kostenWaehrung TEXT,
                                individuellerEinstieg TEXT,
                                foerderung TEXT,
                                link TEXT,
                                beginn DATE,
                                ende DATE,
                                teilnehmerMin INT,
                                teilnehmerMax INT,
                                aktualisierungsdatum DATE
                                )
                ''')
    db_connection.commit()

    
def insert_data_into_db(data, db_connection):
    #print(len(data))
    cursor = db_connection.cursor()
    for entry in data:
        
        veranstaltungs_ID = entry["id"]
        trainingstitel = entry["angebot"]["titel"]
        dauer = entry["dauer"]["bezeichnung"]
        unterrichtsform = entry["unterrichtsform"]["bezeichnung"]
        abschlussart = re.sub(r'<[^>]+>', '', str(entry["angebot"]["abschlussart"])) 
        abschlussbezeichnung = re.sub(r'<[^>]+>', '', str(entry["angebot"]["abschlussbezeichnung"])) 
        finanzielle_unterstützung = re.sub(r'<[^>]+>', '', str(entry["angebot"]["foerderung"])) 
        zugangsinformationen = re.sub(r'<[^>]+>', '', str(entry["angebot"]["zugang"])) 
        zielgruppe = re.sub(r'<[^>]+>', '', str(entry["angebot"]["zielgruppe"]))
        kredit = re.sub(r'<[^>]+>', '', str(entry["angebot"]["anrechnung"]))
        anbieter_ID = entry["angebot"]["bildungsanbieter"]["id"]
        bildungsanbieter = entry["angebot"]["bildungsanbieter"]["name"]
        telefonVorwahl = entry["angebot"]["bildungsanbieter"]["telefonVorwahl"]
        telefonDurchwahl = entry["angebot"]["bildungsanbieter"]["telefonDurchwahl"]
        mobilVorwahl = entry["angebot"]["bildungsanbieter"]["mobilVorwahl"]
        mobilDurchwahl  = entry["angebot"]["bildungsanbieter"]["mobilDurchwahl"]
        faxVorwahl = entry["angebot"]["bildungsanbieter"]["faxVorwahl"]
        faxDurchwahl = entry["angebot"]["bildungsanbieter"]["faxDurchwahl"]
        provider_homepage = entry["angebot"]["bildungsanbieter"]["homepage"]
        provider_email = entry["angebot"]["bildungsanbieter"]["email"]
        latitude = entry["angebot"]["bildungsanbieter"]["adresse"]["ortStrasse"]["koordinatenPlz"]["lat"]
        longitude = entry["angebot"]["bildungsanbieter"]["adresse"]["ortStrasse"]["koordinatenPlz"]["lon"]
        stadt = entry["angebot"]["bildungsanbieter"]["adresse"]["ortStrasse"]["name"]
        land = entry["angebot"]["bildungsanbieter"]["adresse"]["ortStrasse"]["land"]["name"]
        land_code = entry["angebot"]["bildungsanbieter"]["adresse"]["ortStrasse"]["land"]["code"]
        training_type = entry["angebot"]["bildungsart"]["bezeichnung"]
        schulart = entry["angebot"]["schulart"]["bezeichnung"]
        unterrichtszeiten = re.sub(r'<[^>]+>', '', str(entry["unterrichtszeiten"]))
        kostenWert = entry["kostenWert"]
        kostenWaehrung = entry["kostenWaehrung"]
        individuellerEinstieg = "ja" if entry["individuellerEinstieg"] == 1 else "nein"
        foerderung = "ja" if entry["foerderung"] == 1 else "nein"
        link = entry["link"]
        begin_timestamp = entry.get("beginn")
        if begin_timestamp is not None:
            beginn = (datetime.utcfromtimestamp(begin_timestamp / 1000)+ timedelta(days=1)).strftime('%Y-%m-%d')
        else:
            beginn = None

        end_timestamp = entry.get("ende")
        if end_timestamp is not None:
            ende = (datetime.utcfromtimestamp(end_timestamp / 1000)+ timedelta(days=1)).strftime('%Y-%m-%d')
        else:
            ende = None
        teilnehmerMin = entry["teilnehmerMin"] if entry["teilnehmerMin"] is not None else 0
        teilnehmerMax = entry["teilnehmerMax"] if entry["teilnehmerMax"] is not None else 0
        update_timestamp = entry["aktualisierungsdatum"]
        if update_timestamp is not None:
            aktualisierungsdatum = (datetime.utcfromtimestamp(update_timestamp / 1000)+ timedelta(days=1)).strftime('%Y-%m-%d')
        else:
            aktualisierungsdatum = None
        # Check if the event_id already exists in the table
        cursor.execute('''SELECT COUNT(*) FROM umschulung WHERE veranstaltungs_ID = ?''', (veranstaltungs_ID,))
        count = cursor.fetchone()[0]
        
        if count == 0: 
        # Insert data into the table
          cursor.execute('''INSERT INTO umschulung (
                                veranstaltungs_ID ,
                                trainingstitel ,
                                dauer ,
                                unterrichtsform ,
                                abschlussart ,
                                abschlussbezeichnung ,
                                finanzielle_unterstützung  ,
                                zugangsinformationen ,
                                zielgruppe ,
                                kredit ,
                                anbieter_ID ,
                                bildungsanbieter ,
                                telefonVorwahl ,
                                telefonDurchwahl ,
                                mobilVorwahl ,
                                mobilDurchwahl  ,
                                faxVorwahl ,
                                faxDurchwahl ,
                                provider_homepage ,
                                provider_email ,
                                latitude ,
                                longitude ,
                                stadt ,
                                land ,
                                land_code ,
                                training_type ,
                                schulart ,
                                unterrichtszeiten ,
                                kostenWert ,
                                kostenWaehrung ,
                                individuellerEinstieg ,
                                foerderung ,
                                link ,
                                beginn ,
                                ende ,
                                teilnehmerMin ,
                                teilnehmerMax ,
                                aktualisierungsdatum 
                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
            ''', (
                    veranstaltungs_ID ,
                    trainingstitel ,
                    dauer ,
                    unterrichtsform ,
                    abschlussart ,
                    abschlussbezeichnung ,
                    finanzielle_unterstützung  ,
                    zugangsinformationen ,
                    zielgruppe ,
                    kredit ,
                    anbieter_ID ,
                    bildungsanbieter ,
                    telefonVorwahl ,
                    telefonDurchwahl ,
                    mobilVorwahl ,
                    mobilDurchwahl  ,
                    faxVorwahl ,
                    faxDurchwahl ,
                    provider_homepage ,
                    provider_email ,
                    latitude ,
                    longitude ,
                    stadt ,
                    land ,
                    land_code,
                    training_type  ,
                    schulart ,
                    unterrichtszeiten ,
                    kostenWert ,
                    kostenWaehrung ,
                    individuellerEinstieg ,
                    foerderung ,
                    link ,
                    beginn ,
                    ende ,
                    teilnehmerMin ,
                    teilnehmerMax ,
                    aktualisierungsdatum
            ))
    db_connection.commit()
    #print("insert data")
    
def main():
    client_id = "1c852184-1944-4a9e-a093-5cc078981294"  # Replace with your OAuth client ID
    client_secret = "777f9915-9f0d-4982-9c33-07b5810a3e79"  # Replace with your OAuth client secret

    # Get the access token using client credentials flow with JWTs
    access_token = get_access_token(client_id, client_secret)

    # List of values for the 'dauer' parameter
    dauer_values = ['0','1','2','3','4','5','6','7','8','9']


    try:
        # Open a connection
        db_connection = sqlite3.connect('weiterbildung_analysis.db')

        # Create the table if not exists
        create_table(db_connection)

        for dauer_value in dauer_values:
            api_url = f"https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?bart=109&dauer={dauer_value}"
            page = 0
            total_pages = 1

            # Pagination: Fetch all data from the API using multiple requests
            all_data = []

            while page < total_pages:
                paginated_api_url = f"{api_url}&page={page}&size=10000"  # Updated pagination URL
                print(paginated_api_url)
                api_data = get_api_data(paginated_api_url, access_token)

                if "_embedded" in api_data:
                    all_data.extend(api_data["_embedded"]["termine"])  # Append to the existing data
                else:
                    print("No more data")
                    break

                # Update total_pages based on the response
                total_pages = api_data["page"]["totalPages"]
                page += 1

            # Insert data into the database
            insert_data_into_db(all_data, db_connection)

    except Exception as e:
        print(f"Error: {e}")
        db_connection.rollback()  # Rollback changes in case of an error
    
    finally:
        # Close the connection
        db_connection.close()
        print("Data fetched and saved to SQLite database successfully.")


if __name__ == "__main__":
    main()


https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?bart=109&dauer=0&page=0&size=10000
https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?bart=109&dauer=1&page=0&size=10000
https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?bart=109&dauer=2&page=0&size=10000
No more data
https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?bart=109&dauer=3&page=0&size=10000
No more data
https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?bart=109&dauer=4&page=0&size=10000
https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?bart=109&dauer=5&page=0&size=10000
https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?bart=109&dauer=6&page=0&size=10000
https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?bart=109&dauer=7&page=0&size=10000
https://rest.arbeitsagentur.de/infosysbub/absuche/pc/v1/ausbildungsangebot?bart=109&dauer=7&pa

<font size= "4">***PIE CHART FOR SHOWING THE DISTRIBUTION OF DAUER ACROSS RETRAININGS***</font>

In [109]:
import sqlite3
import plotly.express as px

db_connection = sqlite3.connect('weiterbildung_analysis.db')
cursor = db_connection.cursor()

query = '''
    SELECT dauer, COUNT(*) as count
    FROM umschulung
    GROUP BY dauer
    ORDER BY dauer
'''
cursor.execute(query)
results = cursor.fetchall()

db_connection.close()

dauer_values = [row[0] for row in results]
counts = [row[1] for row in results]

total_counts = sum(counts)
percentages = [(count / total_counts) * 100 for count in counts]

# Define custom colors for the pie chart
colors = px.colors.qualitative.Set3

fig = px.pie(
    values=percentages,
    names=dauer_values,
    title='Distribution of Dauer Across ReTraining Courses (Percentage)',
    color_discrete_sequence=colors,  # Use custom colors
    labels={'dauer_values': 'Dauer'}  # Display dauer_values outside the chart
)

# Display percentages and dauer_values as labels within the pie chart
fig.update_traces(textinfo='percent+label', pull=[0.1, 0.1, 0.1, 0.1])  # Adjust pull to separate slices

# Center the title above the chart
fig.update_layout(title_x=0.5)

# Add a legend
fig.update_layout(showlegend=True, legend_title_text='Dauer')

# Improve the layout and increase the size of the pie chart
fig.update_layout(
    margin=dict(l=0, r=0, t=50, b=0),  # Adjust margin for better title placement
    font=dict(size=12),  # Adjust font size for labels
    width=800,  # Set the width of the chart
    height=600  # Set the height of the chart
)

fig.show()


<font size= "4">***BAR CHART FOR SHOWING THE DISTRIBUTION OF RETRAININGS WITH FOERDERUNG***</font>

In [110]:
import sqlite3
import plotly.graph_objects as go

# Connect to the SQLite database
db_connection = sqlite3.connect('weiterbildung_analysis.db')
cursor = db_connection.cursor()

# SQL queries to retrieve data
query_ja = "SELECT COUNT(*) FROM umschulung WHERE foerderung = 'ja'"
cursor.execute(query_ja)
count_ja = cursor.fetchone()[0]

query_nein = "SELECT COUNT(*) FROM umschulung WHERE foerderung = 'nein'"
cursor.execute(query_nein)
count_nein = cursor.fetchone()[0]

# Close the database connection
db_connection.close()

categories = ['ja', 'nein']
counts = [count_ja, count_nein]

# Create the bar chart
fig = go.Figure(data=[
    go.Bar(x=categories, y=counts, marker_color=['darkcyan', 'lightseagreen'])
])

# Add labels to the bars with proper positioning
for x_val, y_val in zip(categories, counts):
    fig.add_annotation(
        text=str(y_val),
        x=x_val,
        y=y_val,
        showarrow=True,
        font=dict(size=12),
        yshift=10,  # Adjust the vertical position of the labels
    )

# Customize the appearance of the chart
fig.update_layout(
    xaxis_title='foerderung',
    yaxis_title='Number of ReTrainings',
    title='Number of ReTrainings with foerderung',
    title_x=0.5,
    xaxis=dict(tickfont=dict(size=12)),  # Adjust x-axis label font size
    yaxis=dict(tickfont=dict(size=12)),  # Adjust y-axis label font size
    font=dict(size=14),  # Adjust general font size
    margin=dict(l=80, r=40, t=60, b=80),  # Adjust margin for better spacing and labels
)

# Increase the figure size for better visibility
fig.update_layout(
    width=600,  # Set the width of the chart
    height=400,   # Set the height of the chart
)

fig.show()


<font size= "4">***DOUGHNUT PIE CHART FOR SHOWING THE DISTRIBUTION OF RETRAININGS WITH FOERDERUNG***</font>

In [111]:
import sqlite3
import plotly.graph_objects as go

db_connection = sqlite3.connect('weiterbildung_analysis.db')
cursor = db_connection.cursor()

query_ja = "SELECT COUNT(*) FROM umschulung WHERE foerderung = 'ja'"
cursor.execute(query_ja)
count_ja = cursor.fetchone()[0]

query_nein = "SELECT COUNT(*) FROM umschulung WHERE foerderung = 'nein'"
cursor.execute(query_nein)
count_nein = cursor.fetchone()[0]

db_connection.close()

# Original labels and values
original_labels = ['ja', 'nein']
original_values = [count_ja, count_nein]

# Calculate percentages
total_count = sum(original_values)
percentages = [(value / total_count) * 100 for value in original_values]

# Rotate the pie chart by 45 degrees
rotated_labels = original_labels[-1:] + original_labels[:-1]  # Rotate labels
rotated_values = original_values[-1:] + original_values[:-1]  # Rotate values

# Define custom colors
colors = ['#FF5733', '#3399FF']

fig = go.Figure(data=[
    go.Pie(
        labels=rotated_labels,
        values=rotated_values,
        textinfo='percent+label',
        hole=0.4,
        marker=dict(colors=colors),
    )
])

fig.update_layout(
    title='Distribution of ReTrainings with foerderung',
    title_x=0.5,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.0,  # Adjust this value to control the vertical position of the legend
        xanchor="center",
        x=0.5,
    ),
    width=600,
    height=400,
)

fig.show()


<font size= "4">***BAR CHART FOR SHOWING TOP PROVIDERS WITH MOST RETRAININGS***</font>

In [112]:
import sqlite3
import plotly.express as px

db_connection = sqlite3.connect('weiterbildung_analysis.db')
cursor = db_connection.cursor()

query = '''
    SELECT bildungsanbieter, COUNT(*) as count
    FROM umschulung
    WHERE bildungsanbieter IS NOT NULL
    GROUP BY bildungsanbieter
    ORDER BY count DESC
    LIMIT 10
'''
cursor.execute(query)
data = cursor.fetchall()

db_connection.close()

provider_names = [entry[0] for entry in data]
training_counts = [entry[1] for entry in data]

# Define a custom color palette
custom_colors = ['#1f77b4']

fig = px.bar(
    y=provider_names,  # Use y for the long labels
    x=training_counts,  # Use x for the counts
    title='Top Providers with Most ReTrainings',
    labels={'y': 'Providers', 'x': 'Number of ReTrainings'},
    text=training_counts,
    orientation='h',  # Create a horizontal bar chart
    color_discrete_sequence=custom_colors  # Use custom colors
)

fig.update_xaxes(tickfont=dict(size=10))

fig.update_traces(texttemplate='%{text}', textposition='outside')

fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5))

xaxis_range = [0, max(training_counts) + 3000]
fig.update_xaxes(range=xaxis_range)

fig.show()


<font size= "4">***BAR CHART FOR SHOWING ReTRAINING COUNT BY REGION***</font>

In [113]:
import sqlite3
import plotly.express as px

# Connect to the SQLite database
db_connection = sqlite3.connect('weiterbildung_analysis.db')
cursor = db_connection.cursor()

# SQL query to retrieve data
query = '''
    SELECT land_code, COUNT(*) as count
    FROM umschulung
    WHERE land_code IS NOT NULL
    GROUP BY land_code
    ORDER BY land_code ASC
'''
cursor.execute(query)
data = cursor.fetchall()

# Close the database connection
db_connection.close()

region_codes = [entry[0] for entry in data]
training_counts = [entry[1] for entry in data]

# Create the bar chart
fig = px.bar(
    x=region_codes,
    y=training_counts,
    title='ReTraining Count by Region (Land Code)',
    labels={'x': 'Region (Land Code)', 'y': 'Number of ReTrainings'},
    text=training_counts,
    #color=region_codes  # Add color based on region codes for differentiation
)

# Rotate x-axis labels for better readability
fig.update_xaxes(tickangle=45, tickfont=dict(size=10))

# Display the number of trainings on top of each bar
fig.update_traces(texttemplate='%{text}', textposition='outside', textfont=dict(size=10))

# Set the position of the legend to be below the chart
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5))

# Improve the layout
fig.update_layout(
    width=800,  # Set the width of the chart
    height=500,  # Set the height of the chart
    margin=dict(l=80, r=40, t=60, b=80),  # Adjust margin for better spacing
)

# Customize the color scale for better differentiation
fig.update_traces(marker=dict(line=dict(color='black', width=1)))  # Add black border lines to bars

fig.show()


<font size= "4">***BAR CHART FOR SHOWING RETRAINING COUNT BY REGION AND INSTRUCTIONAL FORMAT***</font>

In [114]:
import sqlite3
import plotly.express as px

db_connection = sqlite3.connect('weiterbildung_analysis.db')
cursor = db_connection.cursor()

query = '''
    SELECT land, unterrichtsform, COUNT(*) as count
    FROM umschulung
    WHERE land IS NOT NULL AND unterrichtsform IS NOT NULL
    GROUP BY land, unterrichtsform
    ORDER BY count DESC
    limit 20
'''
cursor.execute(query)
data = cursor.fetchall()

db_connection.close()

import pandas as pd
df = pd.DataFrame(data, columns=['land', 'unterrichtsform', 'count'])

fig = px.bar(
    df,
    x='land',
    y='count',
    color='unterrichtsform',
    title='ReTraining Count by Region (Land) and Instructional Format',
    labels={'x': 'Region (Land)', 'y': 'Number of Trainings'},
    text='count',  
    barmode='group'  
)

# Rotate x-axis labels for better visibility
fig.update_xaxes(tickangle=45, tickfont=dict(size=10))

# Display the number of trainings on top of each bar

fig.update_traces(texttemplate='%{text}', textposition='outside')

# Set the position of the legend to be below the chart
fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="center", x=0.5))

# Show the plot
fig.show()


<font size= "4">***PIE CHART FOR SHOWING DISTRIBUTION OF INSTRUCTIONAL FORMATS (Umschulung)***</font>

In [116]:
import sqlite3
import plotly.express as px

# Connect to the SQLite database
db_connection = sqlite3.connect('weiterbildung_analysis.db')
cursor = db_connection.cursor()

# SQL query to retrieve data
query = '''
    SELECT unterrichtsform, COUNT(*) as count
    FROM umschulung
    WHERE unterrichtsform IS NOT NULL
    GROUP BY unterrichtsform
    ORDER BY count DESC
'''
cursor.execute(query)
data = cursor.fetchall()

# Close the database connection
db_connection.close()

# Create a DataFrame from the fetched data
df = pd.DataFrame(data, columns=['unterrichtsform', 'count'])

# Calculate the total count for percentage calculation
total_count = df['count'].sum()

# Calculate the percentage
df['percentage'] = (df['count'] / total_count) * 100

# Create the pie chart
fig = px.pie(
    df,
    names='unterrichtsform',
    values='percentage',
    title='Distribution of Instructional Formats in Umschulung',
    labels={'unterrichtsform': 'Instructional Format'},
    hole=0.4,  # Add a hole in the middle of the pie chart for better aesthetics
)

# Improve the layout
fig.update_layout(
    width=1000,  # Set the width of the chart
    height=650,   # Set the height of the chart
    margin=dict(l=120, r=40, t=60, b=80),  # Adjust margin for better spacing
    font=dict(size=14),  # Adjust general font size
)

# Customize the colors for better differentiation
fig.update_traces(
    textinfo='percent+label',  # Display percentage and label in pie chart sections
    marker=dict(line=dict(color='white', width=2)),  # Add white border lines to sections
)

fig.show()


<font size= "4">***BAR CHART FOR SHOWING THE TOP 20 STATES THAT OFFER MOST RETRAININGS***</font>

In [117]:
import sqlite3
import plotly.express as px

# Connect to the SQLite database
db_connection = sqlite3.connect('weiterbildung_analysis.db')
cursor = db_connection.cursor()

# SQL query to retrieve data
query = '''
    SELECT land, COUNT(*) as count
    FROM umschulung
    WHERE land IS NOT NULL
    GROUP BY land
    ORDER BY count DESC
    LIMIT 20
'''
cursor.execute(query)
data = cursor.fetchall()

# Close the database connection
db_connection.close()

# Extract data into lists
state_names = [entry[0] for entry in data]
training_counts = [entry[1] for entry in data]

# Create the bar chart
fig = px.bar(
    x=state_names,
    y=training_counts,
    title='German States with Most ReTraining Offerings',
    labels={'x': 'German States', 'y': 'Number of ReTrainings'},
)

# Improve chart aesthetics
fig.update_layout(
    xaxis_title=None,  # Remove x-axis label
    xaxis_tickangle=-45,  # Rotate x-axis labels for better readability
    yaxis_range=[0, max(training_counts) + 1000],  # Set y-axis range to accommodate larger values
    title_x=0.5,  # Center the title above the chart
    showlegend=False,  # Hide legend
    margin=dict(l=40, r=40, t=60, b=40),  # Adjust margin for better spacing
)

# Display the number of trainings on top of each bar
fig.update_traces(texttemplate='%{y}', textposition='outside', textfont=dict(size=10))

# Customize the color of bars if needed
# fig.update_traces(marker_color='your_color_here')

# Increase the figure size for better visibility
fig.update_layout(
    width=1000,  # Set the width of the chart
    height=600,   # Set the height of the chart
)

fig.show()


<font size= "4">***MAP FOR SHOWING TOP 20 RETRAINING PROVIDERS WITH THE NUMBER OF RETRAININGS BY CITY***</font>

In [119]:
import pandas as pd
import geopandas as gpd
import folium
import sqlite3

# Read the GeoJSON file containing the geometries of German states
path = "NUTS_RG_20M_2021_3035.json"
gdf = gpd.read_file(path)

# Specify CRS (see the filename)
gdf.crs = "EPSG:3035"

# Filter and transform the data for visualization
gdf_de = gdf[gdf.CNTR_CODE == "DE"]
gdf_de = gdf_de.to_crs("EPSG:4326")  # Change CRS for Folium compatibility

# Creating a Folium map centered on Germany with a darker background (CartoDB Dark_Matter)
m = folium.Map(location=[51.1657, 10.4515], zoom_start=6, control_scale=True, tiles='CartoDB Dark_Matter')

db_connection = sqlite3.connect('weiterbildung_analysis.db')
cursor = db_connection.cursor()

query = '''
    SELECT stadt, COUNT(*) as training_count, latitude, longitude, bildungsanbieter
    FROM umschulung
    WHERE stadt IS NOT NULL
    GROUP BY stadt
    ORDER BY training_count DESC
    LIMIT 20
'''
cursor.execute(query)
data = cursor.fetchall()

db_connection.close()

city_trainings_df = pd.DataFrame(data, columns=['stadt', 'training_count', 'latitude', 'longitude', 'bildungsanbieter'])

# Define a color scheme for cities based on training counts
colors = ['red', 'blue', 'green', 'purple', 'orange', 'pink', 'gray', 'darkred', 'lightgreen', 'lightblue']

for idx, row in city_trainings_df.iterrows():
    # Assign a color based on the rank of training counts (up to 10 unique colors)
    color = colors[idx % len(colors)]
    
    # Create a formatted popup with the city name highlighted, dynamic width, and increased font size
    city_name = row['stadt']
    provider_name = row['bildungsanbieter']
    training_count = row['training_count']
    
    # Calculate the width based on the length of the content
    popup_width = max(len(city_name), len(provider_name), len(str(training_count))) * 10 + 30
    
    # Increase the font size using CSS styling
    popup_html = f"<div style='width: {popup_width}px; font-size: 14px;'><strong>{city_name}</strong><br>({provider_name}): {training_count} trainings</div>"
    
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=folium.Popup(popup_html, max_width=popup_width),
        icon=folium.Icon(color=color)
    ).add_to(m)

# Saving the map in the same folder
map_filename = "Map_Top_20_Providers_umschulung.html"
m.save(map_filename)

print(f"Map for showing top 20 retrainings has been saved as '{map_filename}' in your folder.")


Map for showing top 20 retrainings has been saved as 'Map_Top_20_Providers_umschulung.html' in your folder.


<font size= "4">***MAP FOR SHOWING ReTRAINING PROVIDERS WITH THE NUMBER OF TRAININGS BY CITY***</font>

In [121]:
import sqlite3
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from keplergl import KeplerGl

# Define the SQLite database connection
db_connection = sqlite3.connect('weiterbildung_analysis.db')
cursor = db_connection.cursor()

query = '''
    SELECT stadt, COUNT(*) as training_count, latitude, longitude, bildungsanbieter
    FROM umschulung
    WHERE stadt IS NOT NULL
    GROUP BY stadt
    ORDER BY training_count DESC
'''
cursor.execute(query)
data = cursor.fetchall()

city_trainings_df = pd.DataFrame(data, columns=['stadt', 'training_count', 'latitude', 'longitude', 'bildungsanbieter'])

# Create Point geometries
geometry = [Point(lon, lat) for lon, lat in zip(city_trainings_df['longitude'], city_trainings_df['latitude'])]

# Create a GeoDataFrame
gdf = gpd.GeoDataFrame(city_trainings_df, geometry=geometry, crs='EPSG:4326')

# Filter data for Germany
germany_boundary = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
germany_boundary = germany_boundary[germany_boundary['name'] == 'Germany']
gdf = gdf[gdf.within(germany_boundary.unary_union)]

# Create a Kepler.gl instance
map_config = {
    'version': 'v1',
    'config': {
        'visState': {
            'filters': [],
            'layers': [],
            'interactionConfig': {}
        },
        'mapState': {
            'latitude': 51.1657,
            'longitude': 10.4515,
            'zoom': 5,
            'isSplit': False
        },
        'mapStyle': {
            'styleType': 'muted_night',
            'topLayerGroups': {},
            'visibleLayerGroups': {
                'label': True,
                'road': True,
                'border': True,
                'building': True,
                'water': True,
                'land': True
            }
        }
    }
}

map_1 = KeplerGl(height=800, config=map_config)

# Add data to Kepler.gl instance
map_1.add_data(data=gdf, name='Germany Data')

# Save the Kepler.gl map to an HTML file
map_1.save_to_html(file_name='Map_Providers_Umschulung.html')

print("Map for showing retrainings providers with training count has been saved as Map_Providers_Umschulung.html in your folder.")



The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.



User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to Map_Providers_Umschulung.html!
Map for showing retrainings providers with training count has been saved as Map_Providers_Umschulung.html in your folder.
