# Digital Twin of Society - Unemployment rate 📉

Visualisierung der Arbeitslosenquote von Europa von 2003 - 2021.

<q>Die Arbeitslosenquote bezeichnet die Anzahl der Arbeitslosen als Prozentsatz der Erwerbsbevölkerung basierend auf der Definition der Internationalen Arbeitsorganisation (ILO). Die Erwerbsbevölkerung setzt sich aus Beschäftigten und Arbeitslosen zusammen. Zu den Arbeitslosen zählen Personen im Alter zwischen 15 und 74 Jahren, die in der Berichtswoche ohne Arbeit waren, für eine Arbeit sofort verfügbar waren und in den vergangenen vier Wochen aktiv auf Arbeitssuche waren oder eine Arbeit gefunden hatten, die sie innerhalb der nächsten drei Monate aufnehmen würden.</q> (Quelle: https://ec.europa.eu/eurostat/de/web/products-datasets/-/TIPSUN20)

Älteste Daten: 2003 \
Die neuesten Daten: 2021 \
Anzahl der Werte: 1071

<br>
<br>
<table align="left">
<tr>
<td><img src="https://nuernberg.digital/fileadmin/system/NDF-Logo-Jahresneutral-RGB-black-keinRand.svg" width="100" /></td>
<td><img src="https://www.capgemini.com/de-de/wp-content/themes/capgemini-komposite/assets/images/logo.svg" /></td>
</tr>
</table>

## 1. Install Requirements

In [71]:
!pip install -q pandas pycountry plotly

## 2. Imports

In [72]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import pycountry

## 3. Data preprocessing

#### Get the data
European Commission, Eurostat, ‘Arbeitslosenquoten - jährliche Daten’ (tipsun20), 2022, accessed 2022-06-16, http://data.europa.eu/88u/dataset/DJWzl5McFh9fcCW8bzSxw

### Dataset information
https://ec.europa.eu/eurostat/cache/metadata/EN/une_rt_m_esms.htm

In [73]:
df = pd.read_csv("https://raw.githubusercontent.com/Sultanow/dt_society/main/data/unemployment.tsv", sep="\t")

In [74]:
df.head()

Unnamed: 0,"sex,age,unit,geo\time",2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,"T,Y15-24,PC_ACT,AT",:,:,:,:,:,:,11.3,10.1,9.5,10.0,10.3,11.0,11.3,12.0,10.5,10.0,9.1,11.7,11.0
1,"T,Y15-24,PC_ACT,BE",:,:,:,:,:,:,22.0,22.4,18.9,20.0,23.9,23.4,22.5,20.3,19.4 b,16.0,14.5,15.9,18.2
2,"T,Y15-24,PC_ACT,BG",:,:,:,:,:,:,19.4,25.1 b,28.2 b,31.2,31.5,26.9,24.7,20.4,16.1,15.9,12.1,17.4,15.8
3,"T,Y15-24,PC_ACT,CY",:,:,:,:,:,:,13.8,16.6,22.4,27.7,38.9,36.0,32.8,29.1,24.7,20.2,16.6,18.2,17.1
4,"T,Y15-24,PC_ACT,CZ",:,:,:,:,:,:,16.6,18.3,18.1 b,19.5,19.0,15.9,12.6,10.5,7.9,6.7,5.6,8.0,8.2


In [75]:
# Remove taling whitespaces in column names
df.columns = df.columns.str.rstrip()

In [76]:
# Years that will be ignored, because of insufficient data
ignored_years = ['2003', '2004', '2005', '2006', '2007', '2008']
df = df.drop(ignored_years, axis=1)

In [77]:
df

Unnamed: 0,"sex,age,unit,geo\time",2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,"T,Y15-24,PC_ACT,AT",11.3,10.1,9.5,10.0,10.3,11.0,11.3,12.0,10.5,10.0,9.1,11.7,11.0
1,"T,Y15-24,PC_ACT,BE",22.0,22.4,18.9,20.0,23.9,23.4,22.5,20.3,19.4 b,16.0,14.5,15.9,18.2
2,"T,Y15-24,PC_ACT,BG",19.4,25.1 b,28.2 b,31.2,31.5,26.9,24.7,20.4,16.1,15.9,12.1,17.4,15.8
3,"T,Y15-24,PC_ACT,CY",13.8,16.6,22.4,27.7,38.9,36.0,32.8,29.1,24.7,20.2,16.6,18.2,17.1
4,"T,Y15-24,PC_ACT,CZ",16.6,18.3,18.1 b,19.5,19.0,15.9,12.6,10.5,7.9,6.7,5.6,8.0,8.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,"T,Y25-74,PC_ACT,PT",9.7,11.1,11.9,14.7,15.4,12.9,11.5,10.2,8.0,6.2,5.8,5.9,5.5
77,"T,Y25-74,PC_ACT,RO",6.8,7.3,7.3,7.0,7.3,6.9,6.9,5.9,4.9,4.2,3.8,5.0,4.5
78,"T,Y25-74,PC_ACT,SE",6.2,6.5,5.8,5.9,6.0,6.0,5.8,5.6,5.5,5.2 b,5.3,6.7,6.8
79,"T,Y25-74,PC_ACT,SI",5.0,6.5,7.5,7.9,9.2,8.9,8.4,7.5,6.2,4.8,4.2,4.4,4.2


In [78]:
# Rename First Column to work with it
df.rename(columns={ df.columns[0]: "meta" }, inplace = True)

In [79]:
# Split up 'meta' column to get sex, age, unit and geo data
df_meta = pd.DataFrame(df.meta.str.split(',',3).tolist(),
                                 columns = ['sex','age', 'unit', 'geo'])

In [80]:
# Remove the 'meta' column 
df = df.drop('meta', axis=1)

In [81]:
# Merge the two dataframes again
result = pd.concat([df_meta, df], axis=1)

In [82]:
result

Unnamed: 0,sex,age,unit,geo,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,T,Y15-24,PC_ACT,AT,11.3,10.1,9.5,10.0,10.3,11.0,11.3,12.0,10.5,10.0,9.1,11.7,11.0
1,T,Y15-24,PC_ACT,BE,22.0,22.4,18.9,20.0,23.9,23.4,22.5,20.3,19.4 b,16.0,14.5,15.9,18.2
2,T,Y15-24,PC_ACT,BG,19.4,25.1 b,28.2 b,31.2,31.5,26.9,24.7,20.4,16.1,15.9,12.1,17.4,15.8
3,T,Y15-24,PC_ACT,CY,13.8,16.6,22.4,27.7,38.9,36.0,32.8,29.1,24.7,20.2,16.6,18.2,17.1
4,T,Y15-24,PC_ACT,CZ,16.6,18.3,18.1 b,19.5,19.0,15.9,12.6,10.5,7.9,6.7,5.6,8.0,8.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,T,Y25-74,PC_ACT,PT,9.7,11.1,11.9,14.7,15.4,12.9,11.5,10.2,8.0,6.2,5.8,5.9,5.5
77,T,Y25-74,PC_ACT,RO,6.8,7.3,7.3,7.0,7.3,6.9,6.9,5.9,4.9,4.2,3.8,5.0,4.5
78,T,Y25-74,PC_ACT,SE,6.2,6.5,5.8,5.9,6.0,6.0,5.8,5.6,5.5,5.2 b,5.3,6.7,6.8
79,T,Y25-74,PC_ACT,SI,5.0,6.5,7.5,7.9,9.2,8.9,8.4,7.5,6.2,4.8,4.2,4.4,4.2


In [83]:
# Since 'sex' and 'unit' are the same for each row, we can ignore them 
result = result.drop(['sex', 'unit'], axis=1)

In [84]:
# Clean Up Data 
result = result.replace(' b','', regex=True)
result = result.replace(' d','', regex=True) 

In [85]:
def iso2_to_iso3(iso2):
  country = pycountry.countries.get(alpha_2=iso2)
  # Old Code for Greek is EL -> the new one is GR (iso2) or GRC (iso3)
  if(iso2 == 'EL'):
    return 'GRC'
  return country.alpha_3

In [86]:
  # Convert Iso2 to Iso3 country code
  result['geo'] = result['geo'].apply(lambda x: iso2_to_iso3(x))

In [87]:
result

Unnamed: 0,age,geo,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Y15-24,AUT,11.3,10.1,9.5,10.0,10.3,11.0,11.3,12.0,10.5,10.0,9.1,11.7,11.0
1,Y15-24,BEL,22.0,22.4,18.9,20.0,23.9,23.4,22.5,20.3,19.4,16.0,14.5,15.9,18.2
2,Y15-24,BGR,19.4,25.1,28.2,31.2,31.5,26.9,24.7,20.4,16.1,15.9,12.1,17.4,15.8
3,Y15-24,CYP,13.8,16.6,22.4,27.7,38.9,36.0,32.8,29.1,24.7,20.2,16.6,18.2,17.1
4,Y15-24,CZE,16.6,18.3,18.1,19.5,19.0,15.9,12.6,10.5,7.9,6.7,5.6,8.0,8.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,Y25-74,PRT,9.7,11.1,11.9,14.7,15.4,12.9,11.5,10.2,8.0,6.2,5.8,5.9,5.5
77,Y25-74,ROU,6.8,7.3,7.3,7.0,7.3,6.9,6.9,5.9,4.9,4.2,3.8,5.0,4.5
78,Y25-74,SWE,6.2,6.5,5.8,5.9,6.0,6.0,5.8,5.6,5.5,5.2,5.3,6.7,6.8
79,Y25-74,SVN,5.0,6.5,7.5,7.9,9.2,8.9,8.4,7.5,6.2,4.8,4.2,4.4,4.2


# 4. Data visualization

In [88]:
# Get all columns without age and geo
years = result.columns.tolist()[2:]
# Get all unique elements of row age 
agegroups = sorted(result['age'].unique())
# Get all unique elments of row geo
countries = sorted(result['geo'].unique())

In [89]:
print(years)
print(agegroups)
print(countries)

['2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021']
['Y15-24', 'Y15-74', 'Y25-74']
['AUT', 'BEL', 'BGR', 'CYP', 'CZE', 'DEU', 'DNK', 'ESP', 'EST', 'FIN', 'FRA', 'GRC', 'HRV', 'HUN', 'IRL', 'ITA', 'LTU', 'LUX', 'LVA', 'MLT', 'NLD', 'POL', 'PRT', 'ROU', 'SVK', 'SVN', 'SWE']


### Bar / Line chart

In [90]:
# Get values based of geo and age
def get_row_from_dataframe_by_geo_and_age(df, geo, age):

  filtered_df = df.loc[(df['geo'] == geo) & (df['age'] == age)]
  filtered_df = filtered_df.drop(['geo', 'age'], axis=1)  
  
  return filtered_df.astype(float).values.tolist()[0]

In [91]:
# Render bar chart based of the given country and agegroup
def render_barchart(country, agegroup):
  y = get_row_from_dataframe_by_geo_and_age(result, country, agegroup)

  fig = go.Figure(
      data=[go.Bar(x=years, y=y)],
      layout=go.Layout(
          title=go.layout.Title(text="Unemployment | " + country + " | " + agegroup)
      )
  ) 

  fig.show() 

In [92]:
# Set example agegroup and country
agegroup = "Y15-74"
country = "DEU"
# Render barchart
render_barchart(country, agegroup)

In [93]:
def multi_plot_countries(df):
    fig = go.Figure()

    # For every country create a Barchart (for training with the agegroup 'Y15-74')
    for country in countries:
      fig.add_trace(
          go.Bar(
              x = years,
              y = get_row_from_dataframe_by_geo_and_age(result, country, 'Y15-74'),
              name = country,
              visible='legendonly'
          )
      )
    
    # Only show first trace
    fig.data[0].update(visible=True, showlegend=True)

    # Create dropdown for counties
    def create_country_dropdown(country):

        res = ['legendonly' for country in countries]
        res[countries.index(country)] = True

        return dict(label = country,
                    method = 'update',
                    args = [{'visible': res,
                             'title': country,
                             'showlegend': True}])

    # Add dropdown to figure
    button_layer_1_height = 1.20
    fig.update_layout(
        updatemenus=[
          dict(
            buttons=[create_country_dropdown(country) for country in countries],
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            active=0,
            x=0.20,
            xanchor="left",
            y=button_layer_1_height,
            yanchor="top"
          ),
          dict(
            type = "buttons",
            direction = "left",
            buttons=list([
              dict(
                args=["type", "bar"],
                label="Bar",
                method="restyle"
              ),
              dict(
                args=["type", "line"],
                label="Line",
                method="restyle"
              )
            ]),
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.0,
            xanchor="left",
            y=button_layer_1_height,
            yanchor="top"
            )
        ])
    
    # Add label for dropdown 
    fig.update_layout(
    annotations=[
        dict(text="Country", x=0.15, xref="paper", y=1.15,
                             yref="paper", showarrow=False),
    ])
  
    fig.show() 

In [94]:
multi_plot_countries(result)

In [95]:
def multi_plot_countries_agegroup(df):
    fig = go.Figure()

    charts_per_agegroup = dict()
    for agegroup in agegroups:
      charts = []   
      for country in countries:
          charts.append(
              go.Bar(
                  x = years,
                  y = get_row_from_dataframe_by_geo_and_age(result, country, agegroup),
                  name = country,
                  visible='legendonly'
              )
          )

      charts_per_agegroup[agegroup] = charts

    fig.add_traces(charts_per_agegroup['Y15-74'])
    fig.data[0].update(visible=True, showlegend=True)
    
    def create_country_dropdown(country):
        res = ['legendonly' for country in countries]
        res[countries.index(country)] = True

        return dict(label = country,
                    method = 'update',
                    args = [{'visible': res,
                             'title': country,
                             'showlegend': True}])
        
    def create_agegroup_dropdown(agegroup):
      return dict(label = 'All' if agegroup == 'Y15-74' else agegroup,
                  method = 'update',
                  args = [{'y': [chart.y for chart in charts_per_agegroup[agegroup]],
                            'title': agegroup,
                            'showlegend': True}])

    button_layer_1_height = 1.20
    fig.update_layout(
        updatemenus=[
          dict(
            buttons=sorted([create_agegroup_dropdown(agegroup) for agegroup in agegroups], key=lambda d: d['label']),
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.15,
            xanchor="left",
            y=button_layer_1_height,
            yanchor="top"
          ),
          dict(
            buttons=[create_country_dropdown(country) for country in countries],
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.30,
            xanchor="left",
            y=button_layer_1_height,
            yanchor="top"
          ),
          dict(
            type = "buttons",
            direction = "left",
            buttons=list([
              dict(
                args=["type", "bar"],
                label="Bar",
                method="restyle"
              ),
              dict(
                args=["type", "line"],
                label="Line",
                method="restyle"
              )
            ]),
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.0,
            xanchor="left",
            y=button_layer_1_height,
            yanchor="top"
            )
        ])
    
    fig.update_layout(
    annotations=[
        dict(text="Agegroup", x=0.1, xref="paper", y=1.15, yref="paper",
                             align="left", showarrow=False),
        dict(text="Country", x=0.25, xref="paper", y=1.15,
                             yref="paper", showarrow=False),
    ])
    
    fig.show()

In [96]:
multi_plot_countries_agegroup(result)

### Map

In [97]:
def preprocess_dataframe(df, agegroup):
  result_without_age = df.loc[(df['age'] == agegroup)].drop('age', axis=1)
  processed_dataframe = result_without_age.melt(id_vars='geo', value_vars=result_without_age.columns, var_name='years')
  processed_dataframe['value'] = processed_dataframe['value'].astype('float64')
  processed_dataframe['years'] = processed_dataframe['years'].astype('int')
  return processed_dataframe

In [98]:
def create_map_with_slider(df, agegroup):

  preprocessed_df = preprocess_dataframe(df, agegroup)
  
  fig = px.choropleth(
          preprocessed_df,
          locations='geo', color='value',
          color_continuous_scale="Viridis",
          range_color=(preprocessed_df['value'].min(), preprocessed_df['value'].max()),
          scope="europe",
          height=600,
          animation_frame="years",
          basemap_visible=True,
          labels={'value': 'Unemployment rate in %'}
        )
  fig.update_geos(fitbounds="locations", resolution=50, projection_type="orthographic")
  fig.update_layout(margin={"r":50,"t":50,"l":50,"b":50})
  return fig

In [99]:
fig = create_map_with_slider(result, agegroups[0])
fig.show()

In [101]:
fig = go.Figure()
charts_per_agegroup = dict()
df_agegroups = dict()

for agegroup in agegroups:
  charts = []   
  df_agegroup = preprocess_dataframe(result, agegroup)
  df_agegroups[agegroup] = df_agegroup
  for year in df_agegroup['years'].unique():
      df_year = df_agegroup[(df_agegroup['years']== year)]
      charts.append(
          go.Choropleth(
              locations = df_year['geo'],
              z=df_year['value'],
              coloraxis='coloraxis',
              geo= 'geo',
              hovertemplate= 'years='+str(year)+'<br>geo=%{location}<br>value=%{z}<extra></extra>',
              visible = False
          )
      )
  
  charts_per_agegroup[agegroup] = charts

fig.add_traces(charts_per_agegroup['Y15-74'])
fig.data[0].update(visible=True)

def create_agegroup_button(agegroup):
    return dict(label = 'All' if agegroup == 'Y15-74' else agegroup,
                method = 'update',
                args = [{'z':  [chart.z for chart in charts_per_agegroup[agegroup]], 'title': agegroup},
                        {'coloraxis': {
                            'cmin':df_agegroups[agegroup]['value'].min(), 
                            'cmax':df_agegroups[agegroup]['value'].max()}}])

def create_sliders():
    steps = []
    for i in range(13):
        step = dict(method='restyle',
                    args=['visible', [False] * 13],
                    label='{}'.format(i + 2009))
        step['args'][1][i] = True
        steps.append(step)

    return [dict(active=0, pad= {'b': 10, 't': 10}, steps=steps)]

fig.update_layout(
    updatemenus=[
      dict(
        buttons=[create_agegroup_button('Y15-74'), create_agegroup_button('Y15-24'), create_agegroup_button('Y25-74')],
        direction="down",
        pad={"r": 10, "t": -8},
        showactive=True,
        x=0.04,
        xanchor="left",
        y=0.9,
        yanchor="top"
      )
    ],
    sliders=create_sliders(),
    coloraxis={
      'cmin':df_agegroups['Y15-74']['value'].min(), 
      'cmax':df_agegroups['Y15-74']['value'].max(),
      'colorbar': {'title': {'text': 'value'}}},
    geo= {
        'center': {},
        'domain': {'x': [0.0, 1.0], 'y': [0.0, 1.0]},
        'resolution': 50,
        'scope': 'europe',
        'visible': True},
    height= 600,
    legend= {'tracegroupgap': 0},
    margin= {'b': 50, 'l': 50, 'r': 50, 't': 50},
    annotations=[dict(text="Agegroup", x=0, xref="paper", y=0.9, yref="paper", showarrow=False)]
)

fig.show()