In [1]:
import pandas as pd
import difflib
pd.set_option('display.max_rows', 300)
pd.set_option('display.max_columns', 100)

In [2]:
df = pd.read_csv('../data/mortality_rate.csv', header=1)

In [3]:
df.head()

Unnamed: 0,Time,Unnamed: 1,2013,2014,2015,2016,2017,2018,2019
0,Country,,,,,,,,
1,Afghanistan,,57.3,55.1,53.1,51.2,49.5,47.9,..
2,Albania,,9.5,8.9,8.5,8.2,8,7.8,..
3,Algeria,,22.1,21.8,21.4,21,20.6,20.1,..
4,American Samoa,,..,..,..,..,..,..,..


In [4]:
df.rename(columns={'Time': 'Country'}, inplace=True)
df.drop(0, inplace=True)
df.drop(columns=['Unnamed: 1'], inplace=True)
df.drop(columns=['2019'], inplace=True)
df.head()

Unnamed: 0,Country,2013,2014,2015,2016,2017,2018
1,Afghanistan,57.3,55.1,53.1,51.2,49.5,47.9
2,Albania,9.5,8.9,8.5,8.2,8,7.8
3,Algeria,22.1,21.8,21.4,21,20.6,20.1
4,American Samoa,..,..,..,..,..,..
5,Andorra,3.4,3.2,3.1,3,2.8,2.7


In [5]:
countries = pd.read_csv('../data/country-codes_csv.csv')
def get_country_iso(name):
    p = difflib.get_close_matches(name, countries['official_name_en'].dropna().unique(), n=1, cutoff=0.5)
    if len(p)>0:
        return countries[countries['official_name_en']==p[0]]['ISO3166-1-Alpha-3'].to_list()[0]
    else:
        return ""

In [6]:
df['country_iso'] = [get_country_iso(x) for x in df['Country']]
df[df['country_iso']=='']

Unnamed: 0,Country,2013,2014,2015,2016,2017,2018,country_iso


In [7]:
df['country_val'] = [countries[countries['ISO3166-1-Alpha-3']==x]['official_name_en'].to_list()[0] for x in df['country_iso']]

In [8]:
df[['Country', 'country_iso', 'country_val']]

Unnamed: 0,Country,country_iso,country_val
1,Afghanistan,AFG,Afghanistan
2,Albania,ALB,Albania
3,Algeria,DZA,Algeria
4,American Samoa,ASM,American Samoa
5,Andorra,AND,Andorra
6,Angola,AGO,Angola
7,Anguilla,AIA,Anguilla
8,Antigua and Barbuda,ATG,Antigua and Barbuda
9,Argentina,ARG,Argentina
10,Armenia,ARM,Armenia


In [9]:
df.loc[df['Country'] != df["country_val"]]

Unnamed: 0,Country,2013,2014,2015,2016,2017,2018,country_iso,country_val
41,Channel Islands,..,..,..,..,..,..,CYM,Cayman Islands
69,Eswatini,54,52.8,47.9,46.3,45.5,43,EST,Estonia
71,Faeroe Islands,..,..,..,..,..,..,FRO,Faroe Islands
153,North Macedonia,9.9,10.5,10.8,10.5,9.7,8.7,NCL,New Caledonia
159,Palestine,19,18.8,18.5,18.1,17.7,17.3,PSE,State of Palestine
180,Saint-Barthélemy,..,..,..,..,..,..,BLM,Saint Barthélemy
181,Saint-Martin (French part),..,..,..,..,..,..,MAF,Saint Martin (French Part)


In [10]:
df.loc[153,'country_iso'] = 'MKD'
df.drop(41, inplace=True)
df.drop(69, inplace=True)

In [28]:
df.drop(columns=['Country', 'country_val'], inplace=True)

In [29]:
df.columns

Index(['2013', '2014', '2015', '2016', '2017', 'country_iso'], dtype='object')

In [30]:
df

Unnamed: 0,2013,2014,2015,2016,2017,country_iso
1,62.525,62.966,63.377,63.763,64.13,AFG
2,77.554,77.813,78.025,78.194,78.333,ALB
3,75.661,75.878,76.09,76.298,76.499,DZA
4,..,..,..,..,..,ASM
5,..,..,..,..,..,AND
6,58.054,58.776,59.398,59.925,60.379,AGO
7,..,..,..,..,..,AIA
8,76.218,76.349,76.483,76.617,76.752,ATG
9,75.756,75.913,76.068,76.221,76.372,ARG
10,74.056,74.273,74.467,74.64,74.797,ARM


In [31]:
df_new = df.melt(id_vars=["country_iso"], var_name="year", value_name="life_expectancy")

In [32]:
pd.set_option('display.max_rows', None)
df_new = df_new.sort_values(['country_iso', 'year'])
df_new

Unnamed: 0,country_iso,year,life_expectancy
10,ABW,2013,75.441
241,ABW,2014,75.583
472,ABW,2015,75.725
703,ABW,2016,75.868
934,ABW,2017,76.01
0,AFG,2013,62.525
231,AFG,2014,62.966
462,AFG,2015,63.377
693,AFG,2016,63.763
924,AFG,2017,64.13


In [33]:
df_new.drop(df_new[df_new['life_expectancy']=='..'].index, inplace=True)

In [34]:
df_new

Unnamed: 0,country_iso,year,life_expectancy
10,ABW,2013,75.441
241,ABW,2014,75.583
472,ABW,2015,75.725
703,ABW,2016,75.868
934,ABW,2017,76.01
0,AFG,2013,62.525
231,AFG,2014,62.966
462,AFG,2015,63.377
693,AFG,2016,63.763
924,AFG,2017,64.13


In [35]:
df_new.to_csv('../data/converted/life_expectancy.csv', index=False)

In [36]:
df_saved = pd.read_csv('../data/converted/life_expectancy.csv')
df_saved

Unnamed: 0,country_iso,year,life_expectancy
0,ABW,2013,75.441
1,ABW,2014,75.583
2,ABW,2015,75.725
3,ABW,2016,75.868
4,ABW,2017,76.01
5,AFG,2013,62.525
6,AFG,2014,62.966
7,AFG,2015,63.377
8,AFG,2016,63.763
9,AFG,2017,64.13
