In [7]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots

In [8]:
arbeidsmarkt = pd.read_csv("../data/arbeidsmarkt.csv", sep=';')
uurloon = pd.read_csv("../data/uurloon.csv", sep=';')
uurloon['Uurloon werknemers na verlaten ho (euro)']  = pd.to_numeric(uurloon['Uurloon werknemers na verlaten ho (euro)'] , errors='coerce')
arbeidsmarkt['Uitstromers ho (aantal)']  = pd.to_numeric(arbeidsmarkt['Uitstromers ho (aantal)'] , errors='coerce')
arbeidsmarkt.fillna(0, inplace = True)

peilmoment1 = arbeidsmarkt.loc[arbeidsmarkt['Peilmoment'] == 'Direct na verlaten onderwijs']

In [9]:
total_counts_niveau = peilmoment1.groupby('Uitstromers ho met en zonder diploma')['Uitstromers ho (aantal)'].sum().reset_index()
total_counts_niveau.rename(columns={'Uitstromers ho (aantal)': 'Total degree level'}, inplace=True)

arbeidsmarkt = arbeidsmarkt.merge(total_counts_niveau, on='Uitstromers ho met en zonder diploma')

total_counts = peilmoment1.groupby('Studierichting')['Uitstromers ho (aantal)'].sum().reset_index()
total_counts.rename(columns={'Uitstromers ho (aantal)': 'Total studierichting'}, inplace=True)

arbeidsmarkt = arbeidsmarkt.merge(total_counts, on='Studierichting')

arbeidsmarkt['Percentage studierichting'] = arbeidsmarkt['Uitstromers ho (aantal)'] / arbeidsmarkt['Total studierichting'] * 100
arbeidsmarkt['Percentage degree level'] = arbeidsmarkt['Uitstromers ho (aantal)'] / arbeidsmarkt['Total degree level'] * 100

In [19]:
Studierichting_data = arbeidsmarkt['Percentage studierichting'].loc[(arbeidsmarkt['Arbeidsmarktpositie'] == 'Met werk, zonder uitkering') | (arbeidsmarkt['Arbeidsmarktpositie'] == 'Met werk, met uitkering')]
hbo_bachelor_data = arbeidsmarkt['Percentage degree level'].loc[arbeidsmarkt['Uitstromers ho met en zonder diploma'] == 'Hbo-bachelor'].loc[(arbeidsmarkt['Arbeidsmarktpositie'] == 'Met werk, zonder uitkering') | (arbeidsmarkt['Arbeidsmarktpositie'] == 'Met werk, met uitkering')]
hbo_master_data = arbeidsmarkt['Percentage degree level'].loc[arbeidsmarkt['Uitstromers ho met en zonder diploma'] == 'Hbo-master'].loc[(arbeidsmarkt['Arbeidsmarktpositie'] == 'Met werk, zonder uitkering') | (arbeidsmarkt['Arbeidsmarktpositie'] == 'Met werk, met uitkering')]
wo_bachelor_data = arbeidsmarkt['Percentage degree level'].loc[arbeidsmarkt['Uitstromers ho met en zonder diploma'] == 'Wo-bachelor'].loc[(arbeidsmarkt['Arbeidsmarktpositie'] == 'Met werk, zonder uitkering') | (arbeidsmarkt['Arbeidsmarktpositie'] == 'Met werk, met uitkering')]
wo_master_data = arbeidsmarkt['Percentage degree level'].loc[arbeidsmarkt['Uitstromers ho met en zonder diploma'] == 'Wo-master'].loc[(arbeidsmarkt['Arbeidsmarktpositie'] == 'Met werk, zonder uitkering') | (arbeidsmarkt['Arbeidsmarktpositie'] == 'Met werk, met uitkering')]

specific_order = ['Direct na verlaten onderwijs', '1 jaar na verlaten onderwijs', '2 jaar na verlaten onderwijs', '3 jaar na verlaten onderwijs', '4 jaar na verlaten onderwijs', '5 jaar na verlaten onderwijs']
order_mapping = {value: index for index, value in enumerate(specific_order)}

hbo_bachelor_summed_data = hbo_bachelor_data.groupby(arbeidsmarkt['Peilmoment']).sum().sort_values(key=lambda x: x.index.map(order_mapping))
hbo_master_summed_data = hbo_master_data.groupby(arbeidsmarkt['Peilmoment']).sum().sort_values(key=lambda x: x.index.map(order_mapping))
wo_bachelor_summed_data = wo_bachelor_data.groupby(arbeidsmarkt['Peilmoment']).sum().sort_values(key=lambda x: x.index.map(order_mapping))
wo_master_summed_data = wo_master_data.groupby(arbeidsmarkt['Peilmoment']).sum().sort_values(key=lambda x: x.index.map(order_mapping))

label_mapping = {
    'Recht': 'Law',
    'Gedrag en Maatschappij': 'Behaviour & society',
    'Gezondheidszorg': 'Healthcare',
    'Onderwijs': 'Education',
    'Economie': 'Economics',
    'Natuur': 'Nature',
    'Techniek': 'STEM',
    'Onderwijs': 'Education',
    'Sectoroverstijgend': 'Interdisciplinary',
    'Landbouw en natuurlijke omgeving': 'Agriculture',
    'Taal en cultuur': 'Language & culture',
    'Vrouwen': 'Women',
    'Mannen': 'Men'
}

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x = [0,1,2,3,4,5],
        y = hbo_bachelor_summed_data,
        mode='lines',
        name='Hbo bachelor'
    )
)

fig.add_trace(
    go.Scatter(
        x = [0,1,2,3,4,5],
        y = hbo_master_summed_data,
        mode='lines',
        name='Hbo master'
    )
)

fig.add_trace(
    go.Scatter(
        x = [0,1,2,3,4,5],
        y = wo_bachelor_summed_data,
        mode='lines',
        name='Wo bachelor'
    )
)

fig.add_trace(
    go.Scatter(
        x = [0,1,2,3,4,5],
        y = wo_master_summed_data,
        mode='lines',
        name='Wo master'
    )
)

arbeidsmarkt['Studierichting_mapped'] = arbeidsmarkt['Studierichting'].map(label_mapping)

Market_data_set = set(arbeidsmarkt['Studierichting_mapped'])

for studierichting in Market_data_set:
    Study_data = Studierichting_data.loc[arbeidsmarkt['Studierichting_mapped'] == studierichting]
    grouped = Study_data.groupby(arbeidsmarkt['Peilmoment'])
    Study_summed_data = Study_data.groupby(arbeidsmarkt['Peilmoment']).sum().sort_values(key=lambda x: x.index.map(order_mapping))

    fig.add_trace(
        go.Scatter(
            x = [0,1,2,3,4,5],
            y = Study_summed_data,
            mode='lines',
            name= str(studierichting)
        )
    )

fig.update_layout(
    updatemenus = [
        dict(
            active = 0,
            buttons = list([
                dict(label = "All",
                     method = "update",
                     args = [{"visible": [True] * 14},
                           {"title": "Employment Rates based on level of education or field of study"}]),
                dict(label= "Level of education",
                     method = "update",
                     args = [{"visible": [True] * 4 + [False] * 10},
                           {"title": "Employment rate based on level of education"}]),
                dict(label = "Field of study",
                     method = "update",
                     args = [{"visible": [False] * 4 + [True] * 10},
                           {"title": "Employment rate based on field of study"}]),
            ]),
        )
    ])

fig.update_layout(
    title_text = 'Employment Rates based on level of education or field of study<br><sub>Hbo students are the majority of students employed each year</sub>',
    annotations=[
        go.layout.Annotation(
            text = "The line chart illustrates the percentage of students employed per year after graduation.<br>Each line represents a different field of study or level of education. The dropdown menu<br>allows you to filter the data based on field of study and level of education.",
            xref = "paper", yref = "paper",
            x=0, y=-0.3,
            showarrow=False,
            font=dict(size=12),
            xanchor='left',
            yanchor='bottom',
            align='left'
        )
    ],
    margin=dict(
        b = 160
    ),
    xaxis_title = 'Years after leaving college',
    yaxis_title = 'Percentage of students employed per year',
    height = 700
)

fig.show()