In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly as po
import plotly.express as px
import plotly.graph_objs as go

from counlist import countrylist

unemployment = pd.read_csv("..\\documentation\\unemployment_all_ratio.csv")


unemp = unemployment.loc[unemployment["Country"].isin(countrylist)]                              

unemp = unemp.loc[unemployment['Series'] == "Unemployment rate"]                                 
unemp = unemp[~unemp.Country.str.contains("Euro", na=False)]                                     
unemp = unemp[~unemp.Country.str.contains("OECD", na=False)]  

unemp = unemp.rename(columns={'SEX': 'Gender', 'Value':'Unemploy_Rate', "Time": "Year"})         
unemp = unemp[['Country', 'Gender', 'Age', 'Year', 'Unemploy_Rate']]                             

unemp.columns = [x.lower() for x in unemp.columns]                                               
unemp = unemp[unemp['year'] <= 2016]                                                                                      

unemp.drop(unemp[unemp.gender == 'MW'].index, inplace=True)                                                        
unemp.loc[unemp['gender'] == "MEN", 'gender'] = "male"                                           
unemp.loc[unemp['gender'] == "WOMEN", 'gender'] = "female"                                                     

unemp['age'] = unemp['age'].str.replace(' to ', '-')                                            


unemp = unemp[(unemp['age'] == '15-24') | (unemp['age'] == '25-34') |(unemp['age'] == '35-44') |(unemp['age'] == '45-54') | (unemp['age'] == '55-64') |(unemp['age'] == '65-69') | (unemp['age'] == '70-74')]                                                                                                                           

In [3]:
unemp.loc[(unemp["age"] == '35-44') | (unemp["age"] == '45-54'), 'age'] = '35-54'                                   
unemp.loc[(unemp["age"] == '55-64') | (unemp["age"] == '65-69') | (unemp["age"] == '70-74'), 'age'] = "55-74"

In [4]:
aggregation_functions = {'unemploy_rate': 'mean'}

In [5]:
unemp_ages_mean = unemp.groupby('age').aggregate(aggregation_functions).sort_values("unemploy_rate", ascending=False).round(2)
unemp_ages_mean

Unnamed: 0_level_0,unemploy_rate
age,Unnamed: 1_level_1
15-24,18.68
25-34,9.07
35-54,6.37
55-74,3.06


In [6]:
unemp_countries_mean = unemp.groupby('country').aggregate(aggregation_functions).sort_values("unemploy_rate", ascending=False).round(2)
unemp_countries_mean

Unnamed: 0_level_0,unemploy_rate
country,Unnamed: 1_level_1
South Africa,21.29
Spain,13.78
Greece,13.37
Croatia,11.4
Poland,10.82
Bulgaria,10.46
Latvia,10.28
Lithuania,9.75
Colombia,9.64
Italy,9.07


In [7]:
most_unemp = unemp_countries_mean.sort_values("unemploy_rate", ascending=False).round(2).head(5)
least_unemp = unemp_countries_mean.sort_values("unemploy_rate", ascending=False).round(2).tail(5)

In [8]:
print(most_unemp)
print(least_unemp)

              unemploy_rate
country                    
South Africa          21.29
Spain                 13.78
Greece                13.37
Croatia               11.40
Poland                10.82
             unemploy_rate
country                   
Iceland               3.93
Austria               3.86
Switzerland           3.40
Norway                3.09
Mexico                3.07


In [9]:
unemp_pivot_mean_gndr = pd.pivot_table(unemp, index = ['country', 'year', 'gender'], values = ['unemploy_rate']).round(2)
unemp_pivot_mean_gndr

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,unemploy_rate
country,year,gender,Unnamed: 3_level_1
Australia,2000,female,4.99
Australia,2000,male,5.87
Australia,2001,female,5.35
Australia,2001,male,6.40
Australia,2002,female,5.16
...,...,...,...
United Kingdom,2014,male,6.75
United Kingdom,2015,female,4.97
United Kingdom,2015,male,5.58
United Kingdom,2016,female,4.71


In [10]:
unemp_pivot_mean = pd.pivot_table(unemp, index = ['country', 'year'], values = ['unemploy_rate']).round(2)
unemp_pivot_mean

Unnamed: 0_level_0,Unnamed: 1_level_0,unemploy_rate
country,year,Unnamed: 2_level_1
Australia,2000,5.43
Australia,2001,5.88
Australia,2002,5.51
Australia,2003,5.15
Australia,2004,4.78
...,...,...
United Kingdom,2012,7.72
United Kingdom,2013,7.41
United Kingdom,2014,6.29
United Kingdom,2015,5.27


In [11]:
sp_un = unemp_pivot_mean_gndr.loc["Spain"]                   #unemployment evolution in Spain per gender : tendency is to equal
sp_un

Unnamed: 0_level_0,Unnamed: 1_level_0,unemploy_rate
year,gender,Unnamed: 2_level_1
2000,female,15.31
2000,male,8.64
2001,female,11.21
2001,male,6.74
2002,female,11.88
2002,male,6.83
2003,female,12.13
2003,male,7.04
2004,female,11.34
2004,male,6.84


In [12]:
nw_un = unemp_pivot_mean.loc["Norway"]
nw_un

Unnamed: 0_level_0,unemploy_rate
year,Unnamed: 1_level_1
2000,3.57
2001,3.53
2002,3.15
2003,3.58
2004,3.46
2005,3.67
2006,2.94
2007,2.21
2008,2.12
2009,2.91


In [13]:
sa_un = unemp_pivot_mean.loc["South Africa"]
sa_un

Unnamed: 0_level_0,unemploy_rate
year,Unnamed: 1_level_1
2001,22.99
2002,24.65
2003,24.74
2004,22.58
2005,21.89
2006,20.64
2007,20.59
2008,18.43
2009,19.37
2010,20.39


In [14]:
unemp_age_group = unemp.groupby(['gender']).mean().round(2)
unemp_age_group

Unnamed: 0_level_0,year,unemploy_rate
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
female,2008.12,7.44
male,2008.1,7.02
