In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [2]:
unempl = pd.read_csv('../data/unemployment_rate.csv', nrows = 266)

In [3]:
unempl = unempl.drop(columns = ['Series Name', 'Series Code', 'Country Code'])

In [4]:
unempl =  unempl.rename(columns = {'Country Name' : 'Country'})

In [5]:
unempl.columns

Index(['Country', '2000 [YR2000]', '2001 [YR2001]', '2002 [YR2002]',
       '2003 [YR2003]', '2004 [YR2004]', '2005 [YR2005]', '2006 [YR2006]',
       '2007 [YR2007]', '2008 [YR2008]', '2009 [YR2009]', '2010 [YR2010]',
       '2011 [YR2011]', '2012 [YR2012]', '2013 [YR2013]', '2014 [YR2014]',
       '2015 [YR2015]', '2016 [YR2016]', '2017 [YR2017]', '2018 [YR2018]',
       '2019 [YR2019]', '2020 [YR2020]', '2021 [YR2021]', '2022 [YR2022]',
       '2023 [YR2023]'],
      dtype='object')

In [6]:
unempl.columns = unempl.columns.str.replace(r'\[YR\d+\]', '', regex = True)

In [7]:
unempl['Country'].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'American Samoa', 'Andorra',
       'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Australia', 'Austria', 'Azerbaijan', 'Bahamas, The', 'Bahrain',
       'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin',
       'Bermuda', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands',
       'Brunei Darussalam', 'Bulgaria', 'Burkina Faso', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Channel Islands', 'Chile',
       'China', 'Colombia', 'Comoros', 'Congo, Dem. Rep.', 'Congo, Rep.',
       'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Curacao',
       'Cyprus', 'Czechia', 'Denmark', 'Djibouti', 'Dominica',
       'Dominican Republic', 'Ecuador', 'Egypt, Arab Rep.', 'El Salvador',
       'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia',
       'Faroe Islands',

In [8]:
delete = ['Africa Eastern and Southern', 'Africa Western and Central', 'Arab World', 'Caribbean small states', 'Central Europe and the Baltics', 'Early-demographic dividend', 'East Asia & Pacific', 'East Asia & Pacific (excluding high income)', 'East Asia & Pacific (IDA & IBRD countries)', 'Euro area', 'Europe & Central Asia', 'Europe & Central Asia (excluding high income)', 'Europe & Central Asia (IDA & IBRD countries)', 'European Union', 'Fragile and conflict affected situations', 'Heavily indebted poor countries (HIPC)', 'High income', 'IBRD only', 'IDA & IBRD total', 'IDA blend', 'IDA only', 'IDA total', 'Late-demographic dividend', 'Latin America & Caribbean', 'Latin America & Caribbean (excluding high income)', 'Latin America & the Caribbean (IDA & IBRD countries)', 'Least developed countries: UN classification', 'Low & middle income', 'Low income', 'Lower middle income', 'Middle East & North Africa', 'Middle East & North Africa (excluding high income)', 'Middle East & North Africa (IDA & IBRD countries)', 'Middle income', 'North America', 'Not classified', 'OECD members', 'Other small states', 'Pacific island small states', 'Post-demographic dividend', 'Pre-demographic dividend', 'Small states', 'South Asia', 'South Asia (IDA & IBRD)', 'Sub-Saharan Africa', 'Sub-Saharan Africa (excluding high income)', 'Sub-Saharan Africa (IDA & IBRD countries)', 'Upper middle income']

In [9]:
cleaned = unempl[~unempl['Country'].isin(delete)]

In [10]:
cleaned.columns

Index(['Country', '2000 ', '2001 ', '2002 ', '2003 ', '2004 ', '2005 ',
       '2006 ', '2007 ', '2008 ', '2009 ', '2010 ', '2011 ', '2012 ', '2013 ',
       '2014 ', '2015 ', '2016 ', '2017 ', '2018 ', '2019 ', '2020 ', '2021 ',
       '2022 ', '2023 '],
      dtype='object')

In [11]:
cleaned.columns = cleaned.columns.str.strip()

In [12]:
years = ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023']

In [13]:
unpivot = cleaned.melt(id_vars = ['Country'], value_vars = years, var_name = 'Year', value_name = 'Unemployment_rate')

In [14]:
unpivot

Unnamed: 0,Country,Year,Unemployment_rate
0,Afghanistan,2000,7.955
1,Albania,2000,19.023
2,Algeria,2000,29.77
3,American Samoa,2000,..
4,Andorra,2000,..
...,...,...,...
5227,West Bank and Gaza,2023,..
5228,"Yemen, Rep.",2023,17.215
5229,Zambia,2023,5.913
5230,Zimbabwe,2023,8.759


In [15]:
##unpivot.to_csv('unempl_world.csv', index = False)

In [16]:
hdi = pd.read_csv('../data/for_hdi_slide.csv')

In [17]:
hdi.dtypes

Country     object
Year         int64
HDI        float64
dtype: object

In [18]:
unpivot.dtypes

Country              object
Year                 object
Unemployment_rate    object
dtype: object

In [19]:
unpivot['Year'] = pd.to_numeric(unpivot['Year'], errors = 'coerce', downcast = 'integer')

In [20]:
unpivot['Unemployment_rate'] = pd.to_numeric(unpivot['Unemployment_rate'], errors = 'coerce', downcast = 'integer')

In [21]:
unpivot.dtypes

Country               object
Year                   int16
Unemployment_rate    float64
dtype: object

In [22]:
merged_for_corr = pd.merge(unpivot, hdi, on = ['Country', 'Year'])

In [23]:
def corr(groupped):
    return groupped [['Unemployment_rate', 'HDI']].corr().iloc[0, 1]

In [24]:
by_year_corr = merged_for_corr.groupby('Year').apply (corr)

In [25]:
by_year = by_year_corr.to_frame().reset_index()

In [26]:
by_year = by_year.rename(columns = {0 : 'Correlation'})
by_year

Unnamed: 0,Year,Correlation
0,2000,0.062416
1,2001,0.047205
2,2002,0.062571
3,2003,0.07397
4,2004,0.063619
5,2005,0.048178
6,2006,0.033113
7,2007,-0.014008
8,2008,-0.017855
9,2009,0.11265


In [27]:
##by_year.to_csv('unempl_corr_by_year.csv', index = False)

In [28]:
by_country_corr = merged_for_corr.groupby('Country').apply(corr)

In [29]:
by_country = by_country_corr.to_frame().reset_index()

In [30]:
by_country = by_country.rename(columns = {0 : 'Correlation'})
by_country

Unnamed: 0,Country,Correlation
0,Afghanistan,0.534500
1,Albania,-0.648869
2,Algeria,-0.835015
3,Andorra,
4,Angola,-0.063183
...,...,...
183,Vanuatu,-0.628554
184,"Venezuela, RB",-0.627863
185,"Yemen, Rep.",-0.511892
186,Zambia,-0.863577


In [31]:
hfi = pd.read_csv('../data/2023-Human-Freedom-Index-Data.csv')

In [32]:
hfi = hfi.drop(columns = ['iso', 'region'])

In [33]:
world = hfi.groupby('year')['hf_score'].mean().reset_index()

In [34]:
world.rename(columns = {'year' : 'Year'}, inplace = True)

In [35]:
##world.to_csv('hfi_world.csv', index = False)

In [36]:
world2 = hdi.groupby('Year')['HDI'].mean().reset_index()

In [37]:
merged = pd.merge(world, world2, on = 'Year')

In [38]:
merged.rename(columns = {'hf_score' : 'HFI'}, inplace= True)

In [39]:
merged 

Unnamed: 0,Year,HFI,HDI
0,2000,7.143203,0.633663
1,2001,7.180894,0.639709
2,2002,7.201301,0.6436
3,2003,7.208583,0.648239
4,2004,7.234692,0.655085
5,2005,7.190709,0.660495
6,2006,7.204823,0.6675
7,2007,7.21922,0.675297
8,2008,7.188794,0.680973
9,2009,7.167943,0.685135


In [40]:
merged.dtypes

Year      int64
HFI     float64
HDI     float64
dtype: object

In [41]:
def corr(groupped):
    return groupped [['HFI', 'HDI']].corr().iloc[0, 1]

In [42]:
corr = merged[['HFI', 'HDI']].corr().iloc[0, 1]

In [43]:
corr

-0.7849758533191735

In [44]:
hfi.rename(columns = {'year' : 'Year', 'countries' : 'Country', 'hf_score' : 'HFI'}, inplace = True)

In [45]:
merge = pd.merge(hdi, hfi, on = ['Year', 'Country'], how = 'inner')

In [46]:
merge

Unnamed: 0,Country,Year,HDI,HFI,hf_rank,hf_quartile,pf_rol_procedural,pf_rol_civil,pf_rol_criminal,pf_rol_vdem,...,ef_regulation_business_impartial,ef_regulation_business_compliance,ef_regulation_business,ef_regulation_enter_openness,ef_regulation_enter_permits,ef_regulation_enter_distortion,ef_regulation_enter,ef_regulation,ef_score,ef_rank
0,Albania,2000,0.68,7.43,53.0,2.0,,,,6.53,...,4.93,3.65,4.29,5.76,,,5.76,5.69,6.48,71.0
1,Algeria,2000,0.65,4.79,125.0,4.0,,,,5.24,...,3.62,2.95,3.28,3.94,,,3.94,3.64,4.69,119.0
2,Angola,2000,0.38,,,,,,,3.63,...,2.90,1.81,2.36,0.56,,,0.56,4.13,,
3,Argentina,2000,0.78,8.01,41.0,2.0,,,,6.62,...,5.69,2.42,5.60,5.65,,,5.65,5.98,7.13,40.0
4,Armenia,2000,0.66,,,,,,,5.88,...,3.60,0.00,1.80,5.17,,,5.17,4.50,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3515,"Venezuela, RB",2021,0.69,4.22,160.0,4.0,,,,4.29,...,1.92,0.00,0.80,1.39,4.98,0.49,2.29,2.17,3.01,165.0
3516,Vietnam,2021,0.70,5.51,130.0,4.0,,,,4.81,...,3.88,4.79,4.55,4.55,8.21,3.96,5.57,6.10,6.26,106.0
3517,"Yemen, Rep.",2021,0.46,3.43,164.0,4.0,,,,2.39,...,0.62,6.11,2.42,2.07,,5.00,3.54,3.08,4.18,161.0
3518,Zambia,2021,0.56,6.21,113.0,3.0,,,,5.65,...,4.39,4.30,4.06,2.44,7.85,0.19,3.49,4.89,5.84,129.0


In [47]:
def corr(groupped):
    return groupped [['HDI', 'HFI']].corr().iloc[0, 1]

In [48]:
by_year_corr = merge.groupby('Year').apply(corr)

In [49]:
by_year = by_year_corr.to_frame().reset_index()

In [50]:
by_year = by_year.rename(columns = {0 : 'Correlation'})
by_year

Unnamed: 0,Year,Correlation
0,2000,0.73853
1,2001,0.747798
2,2002,0.743742
3,2003,0.729451
4,2004,0.732868
5,2005,0.733759
6,2006,0.718856
7,2007,0.707296
8,2008,0.706415
9,2009,0.700434


In [51]:
##by_year.to_csv('corr_by_year_hdi_hfi.csv', index = False)