In [35]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go

In [37]:
df_uurloon = pd.read_csv('../data/uurloongemstudie.csv', sep = ';')

colors1 = px.colors.qualitative.G10
colors2 = px.colors.qualitative.D3
colors3 = px.colors.qualitative.Vivid
colors4 = px.colors.qualitative.Pastel

label_mapping = {
    'Recht': 'Law',
    'Gedrag en Maatschappij': 'Behaviour & Society',
    'Gezondheidszorg': 'Healthcare',
    'Onderwijs': 'Education',
    'Economie': 'Economics',
    'Natuur': 'Nature',
    'Techniek': 'STEM',
    'Onderwijs': 'Education',
    'Sectoroverstijgend': 'Interdisciplinary',
    'Landbouw en natuurlijke omgeving': 'Agriculture',
    'Taal en cultuur': 'Language & Culture',
    'Vrouwen': 'Women',
    'Mannen': 'Men'
}

studierichtingen = [
    'Economics',
    'Interdisciplinary',
    'Agriculture',
    'Law',
    'Nature',
    'STEM',
    'Language & Culture',
    'Education',
    'Behaviour & Society',
    'Healthcare'
]

df_uurloon['Peilmoment_ordered'] = pd.Categorical(df_uurloon['Peilmoment'], categories = [
    'Direct na verlaten onderwijs',
    '1 jaar na verlaten onderwijs',
    '2 jaar na verlaten onderwijs',
    '3 jaar na verlaten onderwijs',
    '4 jaar na verlaten onderwijs',
    '5 jaar na verlaten onderwijs'
], ordered = True)

df_uurloon = df_uurloon.sort_values('Peilmoment_ordered')
df_uurloon['Uurloon werknemers na verlaten ho (euro)'] = pd.to_numeric(df_uurloon['Uurloon werknemers na verlaten ho (euro)'], errors='coerce')

average_wages = df_uurloon.groupby('Peilmoment_ordered', observed=True)['Uurloon werknemers na verlaten ho (euro)'].mean().reset_index()
average_wages = average_wages.sort_values('Peilmoment_ordered')['Uurloon werknemers na verlaten ho (euro)']


In [107]:
dropdown_buttons = list([
    dict(label="All",
         method="update",
         args=[{"visible": [True] * 41},
               {"title": "Hourly wages per Level of education and Field of study over the years<br><sub>In the majority of fields of study a Wo student would start and maintain a higher hourly wage</sub>"}]),
    dict(label="Economics",
         method="update",
         args=[{"visible": [True] * 4 + [False] * 36 + [True]},
               {"title": "Hourly wages of Economics studies<br><sub>In the majority of fields of study a Wo student would start and maintain a higher hourly wage</sub>"}]),
    dict(label="Interdisciplinary",
         method="update",
         args=[{"visible": [False] * 4 + [True] * 4 + [False] * 32 + [True]},
               {"title": "Hourly wages of Interdisciplinary studies<br><sub>In the majority of fields of study a Wo student would start and maintain a higher hourly wage</sub>"}]),
    dict(label="Agriculture",
         method="update",
         args=[{"visible": [False] * 8 + [True] * 4 + [False] * 28 + [True]},
               {"title": "Hourly wages of Agriculture studies<br><sub>In the majority of fields of study a Wo student would start and maintain a higher hourly wage</sub>"}]),
    dict(label="Law",
         method="update",
         args=[{"visible": [False] * 12 + [True] * 4 + [False] * 24 + [True]},
               {"title": "Hourly wages of Law studies<br><sub>In the majority of fields of study a Wo student would start and maintain a higher hourly wage</sub>"}]),
    dict(label="Nature",
         method="update",
         args=[{"visible": [False] * 16 + [True] * 4 + [False] * 20 + [True]},
               {"title": "Hourly wages of Nature studies<br><sub>In the majority of fields of study a Wo student would start and maintain a higher hourly wage</sub>"}]),
    dict(label="STEM",
         method="update",
         args=[{"visible": [False] * 20 + [True] * 4 + [False] * 16 + [True]},
               {"title": "Hourly wages of STEM studies<br><sub>In the majority of fields of study a Wo student would start and maintain a higher hourly wage</sub>"}]),
    dict(label="Language & Culture",
         method="update",
         args=[{"visible": [False] * 24 + [True] * 4 + [False] * 12 + [True]},
               {"title": "Hourly wages of Language & Culture studies<br><sub>In the majority of fields of study a Wo student would start and maintain a higher hourly wage</sub>"}]),
    dict(label="Education",
         method="update",
         args=[{"visible": [False] * 28 + [True] * 4 + [False] * 8 + [True]},
               {"title": "Hourly wages of Education studies<br><sub>In the majority of fields of study a Wo student would start and maintain a higher hourly wage</sub>"}]),
    dict(label="Behaviour & Society",
         method="update",
         args=[{"visible": [False] * 32 + [True] * 4 + [False] * 4 + [True]},
               {"title": "Hourly wages of Behaviour & Society studies<br><sub>In the majority of fields of study a Wo student would start and maintain a higher hourly wage</sub>"}]),
    dict(label="Healthcare",
         method="update",
         args=[{"visible": [False] * 36 + [True] * 5},
               {"title": "Hourly wages of Healthcare studies<br><sub>In the majority of fields of study a Wo student would start and maintain a higher hourly wage</sub>"}]),
])

fig = go.Figure()

df_uurloon['Studierichting_mapped'] = df_uurloon['Studierichting'].map(label_mapping)

for i,  studierichting in enumerate(studierichtingen):
    df_selected_hbo_bach = df_uurloon[df_uurloon['Studierichting_mapped'] == studierichting].loc[df_uurloon['Uitstromers ho met en zonder diploma'] == 'Hbo-bachelor']
    df_selected_hbo_master = df_uurloon[df_uurloon['Studierichting_mapped'] == studierichting].loc[df_uurloon['Uitstromers ho met en zonder diploma'] == 'Hbo-master']
    df_selected_wo_bach = df_uurloon[df_uurloon['Studierichting_mapped'] == studierichting].loc[df_uurloon['Uitstromers ho met en zonder diploma'] == 'Wo-bachelor']
    df_selected_wo_master = df_uurloon[df_uurloon['Studierichting_mapped'] == studierichting].loc[df_uurloon['Uitstromers ho met en zonder diploma'] == 'Wo-master']
    
    if (studierichting != 'Language & culture' and studierichting != 'Agriculture'):
        hbo_bachelor_vis = 'legendonly'
    else:
        hbo_bachelor_vis = True
    fig.add_trace(
        go.Scatter(
            mode = 'lines+markers',
            x = [0,1,2,3,4,5],
            y = df_selected_hbo_bach['Uurloon werknemers na verlaten ho (euro)'],
            name = studierichting + ' Hbo-bachelor',
            meta = studierichting + ' Hbo-bachelor',
            visible = hbo_bachelor_vis,
            hovertemplate = '%{meta}<br>%{x} years after graduation <br>Hourly wage: € %{y:.2f} <extra></extra>',
            line=dict(color = colors1[i % len(colors1)])
        )
    )
    
    if (studierichting != 'Interdisciplinary'):
        hbo_master_vis = 'legendonly'
    else:
        hbo_master_vis = True
    fig.add_trace(
        go.Scatter(
            mode = 'lines+markers',
            x = [0,1,2,3,4,5],
            y = df_selected_hbo_master['Uurloon werknemers na verlaten ho (euro)'],
            name = studierichting + ' Hbo-master',
            meta = studierichting + ' Hbo-master',
            visible = hbo_master_vis,
            hovertemplate = '%{meta}<br>%{x} years after graduation <br>Hourly wage: € %{y:.2f}<extra></extra>',
            line=dict(color = colors2[i % len(colors2)])
        )
    )
    
    if (studierichting != 'Language & culture'):
        wo_bachelor_vis = 'legendonly'
    else:
        wo_bachelor_vis = True
    fig.add_trace(
        go.Scatter(
            mode = 'lines+markers',
            x = [0,1,2,3,4,5],
            y = df_selected_wo_bach['Uurloon werknemers na verlaten ho (euro)'],
            name = studierichting + ' Wo-bachelor',
            meta = studierichting + ' Wo-bachelor',
            visible = wo_bachelor_vis,
            hovertemplate = '%{meta}<br>%{x} years after graduation <br>Hourly wage: € %{y:.2f}<extra></extra>',
            line=dict(color = colors3[i % len(colors3)])
        )
    )
    
    if (studierichting != 'Law' and studierichting != 'Economics'):
        wo_bachelor_vis = 'legendonly'
    else:
        wo_bachelor_vis = True
    fig.add_trace(
        go.Scatter(
            mode = 'lines+markers',
            x = [0,1,2,3,4,5],
            y = df_selected_wo_master['Uurloon werknemers na verlaten ho (euro)'],
            name = studierichting + ' Wo-master',
            meta = studierichting + ' Wo-master',
            visible=wo_bachelor_vis,
            hovertemplate = '%{meta}<br>%{x} years after graduation <br>Hourly wage: € %{y:.2f}<extra></extra>',
            line=dict(color = colors4[i % len(colors4)])
        )
    )

fig.add_trace(
    go.Scatter(
        mode = 'lines+markers',
        x = [0, 1, 2, 3, 4, 5],
        y = average_wages,
        name = 'Mean',
        meta = 'Mean',
        line = dict(color='black', width=2, dash='dash'),
        hovertemplate = '%{meta}<br>%{x} years after graduation <br>Hourly wage: € %{y:.2f}<extra></extra>'
    )
)

fig.update_layout(
    updatemenus=[{
        'buttons': dropdown_buttons,
        'direction': 'down',
        'showactive': True,
        'x': 0,
        'y': 1.1,
        'xanchor': 'left',
        'yanchor': 'top',
    }],
    title = dict(
        text  = 'Hourly wages per Level of education and Field of study over the years<br><sub>On average, Wo students start with and maintain a higher hourly wage</sub>',
        y = 0.9,
        x = 0,
        xanchor='left',
        yanchor='bottom'
    ), 
    height = 800,
    xaxis_title = 'Years after leaving university',
    yaxis=go.layout.YAxis(
        title='Hourly wage',
        type= 'linear',
        tickprefix = '€', 
        tickformat = ',.'
    ),
    annotations=[
        go.layout.Annotation(
            text="The line chart illustrates the difference in hourly wage per year after graduation. <br>Each line represents a different field of study per level of education. The dropdown<br>menu allows you to filter the data by field of study.",
            xref="paper", yref="paper",
            x=0, y=-0.3,
            showarrow=False,
            font=dict(size=12),
            xanchor='left',
            yanchor='bottom',
            align = 'left'
        ),
        go.layout.Annotation(
            x = 4.5,
            y = 23.8,
            textangle = -24,
            text = "mean wage",
            showarrow = False,
            xanchor = 'right',
            font = dict(color='Black', size = 14)
        ),
        go.layout.Annotation(
            xref="paper", yref="paper",
            x = 1.1,
            y = 1,
            text = "click to compare",
            showarrow = True,
            arrowhead= 1,
            xanchor = 'center',
            font = dict(size = 12)
        ),
    ],
    margin=dict(
        t = 180,
        b = 160
    ),
)

fig.show()