In [875]:
import plotly.graph_objs as go
import plotly.express as px
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

Instellingen voor dataset:

Geslacht: totaal

Persoonskenmerken: geen internationale student

Perioden: 20.15/'16

Arbeidsmarkt positie: alles behalve totaal en onbekend

Uitstromers: wo/hbo bachelor, wo/hbo master

Studierichting: alles behalve totaal

In [None]:
arbeidsmarkt = pd.read_csv("data/arbeidsmarkt.csv", sep=';')
uurloon = pd.read_csv("data/uurloon.csv", sep=';')
uurloon['Uurloon werknemers na verlaten ho (euro)']  = pd.to_numeric(uurloon['Uurloon werknemers na verlaten ho (euro)'] , errors='coerce')
arbeidsmarkt['Uitstromers ho (aantal)']  = pd.to_numeric(arbeidsmarkt['Uitstromers ho (aantal)'] , errors='coerce')
arbeidsmarkt.fillna(0, inplace = True)


In [877]:
peilmoment1 = arbeidsmarkt.loc[arbeidsmarkt['Peilmoment'] == 'Direct na verlaten onderwijs']

In [893]:
verdeling = peilmoment1.groupby(['Geslacht','Studierichting'])['Uitstromers ho (aantal)'].sum().reset_index()
verdeling.rename(columns={'Uitstromers ho (aantal)': 'Total studierichting'}, inplace=True)

In [894]:
labels = []
parents = []
values = []

unique_geslacht = set()

label_map = {
    'Recht': 'Law',
    'Gedrag en Maatschappij': 'Behaviour and society',
    'Gezondheidszorg': 'Healthcare',
    'Onderwijs': 'Education',
    'Economie': 'Economics',
    'Natuur': 'Nature',
    'Techniek': 'Technics',
    'Onderwijs': 'Education',
    'Sectoroverstijgend': 'Other',
    'Landbouw en natuurlijke omgeving': 'Agriculture',
    'Taal en cultuur': 'Language & culture',
    'Vrouwen': 'Women',
    'Mannen': 'Men'
}

for _, row in verdeling.iterrows():
    geslacht = row['Geslacht']
    studierichting = row['Studierichting']
    aantal = row['Total studierichting']

    if label_map[geslacht] not in unique_geslacht:
        labels.append(label_map[geslacht])
        parents.append("")
        if geslacht == 'Vrouwen':
            values.append(44000.0)
        elif geslacht == 'Mannen':
            values.append(35170.0)
        unique_geslacht.add(label_map[geslacht])

    if geslacht == 'Mannen':
        labels.append(label_map[studierichting] + ' ♂')
    elif geslacht == 'Vrouwen':
        labels.append(label_map[studierichting] + ' ♀')
    parents.append(label_map[geslacht])
    values.append(aantal)

fig = go.Figure(go.Sunburst(
    labels=labels,
    parents=parents,
    values=values,
    branchvalues='total',
    insidetextfont=dict(size=20),
))

fig.update_layout(
    title='Study subject by sex',
    margin=dict(t=50, l=25, r=25, b=25),
    height=600
)

fig.show()

This barplot compares the hourly wage per degree. It's interesting to see that even though WO is a "higher" degree it initially earns less than the so called "lower" degrees.

The sunburst chart shows which degree subjects women and men study most. We can compare how these differ between the sexes. To follow this up we will look at the difference in wage between sexes per study subject.

The boxplot compares the hourly wage within degree subject per sex. Overall it seems that women get paid less than men with every degree subject.

The heatmap allows us to look at the jobmarket position per degree type through the years. 

The heatmap shows us the employment status per degree (as percentage within that degree). In this heatmap we can again look how this develops over time. 

Interestingly, the Wo-bachelor students have a high percentage that ends up going back into university. As we saw before this is also the degree with the lowest hourly wage for the first 2 years after leaving university.

In [880]:
uurloongem = pd.read_csv("data/uurloongem.csv", sep=';')
hbo_bachelor = uurloongem.loc[uurloongem['Uitstromers ho met en zonder diploma'] == 'Hbo-bachelor']
hbo_master = uurloongem.loc[uurloongem['Uitstromers ho met en zonder diploma'] == 'Hbo-master']
wo_bachelor = uurloongem.loc[uurloongem['Uitstromers ho met en zonder diploma'] == 'Wo-bachelor']
wo_master = uurloongem.loc[uurloongem['Uitstromers ho met en zonder diploma'] == 'Wo-master']

In [881]:
trace = [go.Bar(
    x=uurloon['Peilmoment'],
    y=hbo_bachelor['Uurloon werknemers na verlaten ho (euro)'],
    name='Hbo-bachelor',
    marker_color='rgb(102,194,165)',
    hoverinfo='y+name'
    ),
    go.Bar(
    x=uurloon['Peilmoment'],
    y=hbo_master['Uurloon werknemers na verlaten ho (euro)'],
    name='Hbo-master',
    marker_color='rgb(252,141,98)',
    hoverinfo='y+name'
    ),
    go.Bar(
    x=uurloon['Peilmoment'],
    y=wo_bachelor['Uurloon werknemers na verlaten ho (euro)'],
    name='Wo-bachelor',
    marker_color='rgb(141,160,203)',
    hoverinfo='y+name'
    ),
    go.Bar(
    x=uurloon['Peilmoment'],
    y=wo_master['Uurloon werknemers na verlaten ho (euro)'],
    name='Wo-master',
    marker_color='rgb(231,138,195)',
    hoverinfo='y+name'
    )]

layout = go.Layout(
    title='Hourly wage after leaving university per degree',
    height=400,
    xaxis=go.layout.XAxis(
        title='Benchmark',
        type='category',
        tickvals = uurloongem['Peilmoment'].unique(),
        ticktext=['Directly after', '1 year after', '2 years after', '3 years after', '4 years after', '5 years after']
    ),
    yaxis=go.layout.YAxis(
        title='Hourly wage',
        tickprefix = '€', 
        tickformat = ',.'
    ),
    legend=dict(
            x=1.0,
            y=1.0,
            bgcolor='rgba(255, 255, 255, 0)',
            bordercolor='rgba(255, 255, 255, 0)'
    ),
    barmode='group',
    bargap=0.15,
    bargroupgap=0.1,
)

fig = go.Figure(data=trace, layout=layout)
fig.show()

In [890]:
total_counts_niveau = peilmoment1.groupby('Uitstromers ho met en zonder diploma')['Uitstromers ho (aantal)'].sum().reset_index()
total_counts_niveau.rename(columns={'Uitstromers ho (aantal)': 'Total degree level'}, inplace=True)

arbeidsmarkt = arbeidsmarkt.merge(total_counts_niveau, on='Uitstromers ho met en zonder diploma')

total_counts = peilmoment1.groupby('Studierichting')['Uitstromers ho (aantal)'].sum().reset_index()
total_counts.rename(columns={'Uitstromers ho (aantal)': 'Total studierichting'}, inplace=True)

arbeidsmarkt = arbeidsmarkt.merge(total_counts, on='Studierichting')


In [891]:
arbeidsmarkt['Percentage studierichting'] = arbeidsmarkt['Uitstromers ho (aantal)'] / arbeidsmarkt['Total studierichting'] * 100
arbeidsmarkt['Percentage degree level'] = arbeidsmarkt['Uitstromers ho (aantal)'] / arbeidsmarkt['Total degree level'] * 100

In [892]:
arbeidsmarktp0 = arbeidsmarkt.loc[arbeidsmarkt['Peilmoment'] == 'Direct na verlaten onderwijs']
arbeidsmarktp1 = arbeidsmarkt.loc[arbeidsmarkt['Peilmoment'] == '1 jaar na verlaten onderwijs']
arbeidsmarktp2 = arbeidsmarkt.loc[arbeidsmarkt['Peilmoment'] == '2 jaar na verlaten onderwijs']
arbeidsmarktp3 = arbeidsmarkt.loc[arbeidsmarkt['Peilmoment'] == '3 jaar na verlaten onderwijs']
arbeidsmarktp4 = arbeidsmarkt.loc[arbeidsmarkt['Peilmoment'] == '4 jaar na verlaten onderwijs']
arbeidsmarktp5 = arbeidsmarkt.loc[arbeidsmarkt['Peilmoment'] == '5 jaar na verlaten onderwijs']

data = [arbeidsmarktp0, arbeidsmarktp1, arbeidsmarktp2, arbeidsmarktp3, arbeidsmarktp4, arbeidsmarktp5]


In [886]:
fig = go.Figure()

for i in data:
    heatmap_data = i.pivot_table(values='Percentage studierichting', index='Arbeidsmarktpositie', columns='Studierichting', aggfunc='sum')
    heatmap_annotations = heatmap_data.map(lambda x: '{:.0f}%'.format(x))
    tracey = go.Heatmap(
        z=heatmap_data.values,
        x=heatmap_data.columns.tolist(),
        y=heatmap_data.index.tolist(),
        colorscale='Blues',
        zmin=0,
        zmax=100,
        visible=False,
        colorbar=dict(
            title='% Percentage',  
            tickformat='%d%',
        ),
        hoverongaps=False,
        hoverinfo='text+x',
        text=heatmap_annotations.values.tolist()
    )
    fig.add_trace(tracey)

fig.data[0].visible = True

layout = go.Layout(
    title='Arbeidsmarktpositie percentueel per studie',
    xaxis=go.layout.XAxis(
        title='Studierichting',
    ),
    yaxis=go.layout.YAxis(
        title='Arbeidsmarktpositie',
    )
)

steps = []
for i in range(len(fig.data)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
        label=str(i),
    )
    step["args"][1][i] = True  # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Years after leaving university: "},
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(layout)

fig.update_layout(margin=dict(l=20, r=20, t=80, b=10),paper_bgcolor="rgb(200, 210, 220, 1)")
fig['layout']['sliders'][0]['pad']=dict(r=10, t=150,)

fig.show()

In [887]:
fig = go.Figure()

studies = ['']

for i in data:
    heatmap_data = i.pivot_table(values='Percentage degree level', index='Arbeidsmarktpositie', columns='Uitstromers ho met en zonder diploma', aggfunc='sum')
    heatmap_annotations = heatmap_data.map(lambda x: '{:.0f}%'.format(x))
    tracey = go.Heatmap(
        z=heatmap_data.values,
        x=heatmap_data.columns.tolist(),
        y=heatmap_data.index.tolist(),
        colorscale='Blues',
        zmin=0,
        zmax=100,
        visible=False,
        colorbar=dict(
            title='% Percentage',  
            tickformat='%d%',
        ),
        hoverongaps=False,
        hoverinfo='text+x',
        text=heatmap_annotations.values.tolist()
    )
    fig.add_trace(tracey)

fig.data[0].visible = True

layout = go.Layout(
    title='Arbeidsmarktpositie percentueel per studie',
    xaxis=go.layout.XAxis(
        title='Studierichting',
    ),
    yaxis=go.layout.YAxis(
        title='Arbeidsmarktpositie',
    )
)

steps = []
for i in range(len(fig.data)):
    step = dict(
        method="restyle",
        args=["visible", [False] * len(fig.data)],
        label=str(i),
    )
    step["args"][1][i] = True  # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Years after leaving university: "},
    pad={"t": 50},
    steps=steps
)]

fig.update_layout(
    sliders=sliders
)

fig.update_layout(layout)

fig.update_layout(margin=dict(l=20, r=20, t=80, b=10),paper_bgcolor="rgb(200, 210, 220, 1)")
fig['layout']['sliders'][0]['pad']=dict(r=10, t=150,)

fig.show()

0: 6.159677801468846
ln: 2.9613835583984836
n: 3.565505804311774
te: 15.683487325278369
gz: 10.246387112058754
e: 27.24472873726605
r: 4.6079128168680406
GM: 20.350627813314382
tc: 8.327410566216537
so: 0.8647239990523572
