In [48]:
import pandas as pd


## Happiness dataset (2008 - 2020)
### File name: 'HappinessDataPanelWHR2021C2.csv'
link: https://worldhappiness.report/ed/2021/#appendices-and-data

In [49]:
df_happiness = pd.read_csv("HappinessDataPanelWHR2021C2.csv")
df_happiness

Unnamed: 0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
0,Afghanistan,2008,3.724,7.370,0.451,50.80,0.718,0.168,0.882,0.518,0.258
1,Afghanistan,2009,4.402,7.540,0.552,51.20,0.679,0.190,0.850,0.584,0.237
2,Afghanistan,2010,4.758,7.647,0.539,51.60,0.600,0.121,0.707,0.618,0.275
3,Afghanistan,2011,3.832,7.620,0.521,51.92,0.496,0.162,0.731,0.611,0.267
4,Afghanistan,2012,3.783,7.705,0.521,52.24,0.531,0.236,0.776,0.710,0.268
...,...,...,...,...,...,...,...,...,...,...,...
1944,Zimbabwe,2016,3.735,7.984,0.768,54.40,0.733,-0.095,0.724,0.738,0.209
1945,Zimbabwe,2017,3.638,8.016,0.754,55.00,0.753,-0.098,0.751,0.806,0.224
1946,Zimbabwe,2018,3.616,8.049,0.775,55.60,0.763,-0.068,0.844,0.710,0.212
1947,Zimbabwe,2019,2.694,7.950,0.759,56.20,0.632,-0.064,0.831,0.716,0.235


## Suicide Rates dataset (2000 - 2019)
### File name: 'Suicide rates WHO.csv', 'Crude Suuicide Rates WHO.csv'
link: https://www.who.int/data/gho/data/themes/mental-health/suicide-rates

In [42]:
df_suicide = pd.read_csv('Suicide rates WHO.csv')
df_suicide[["Location", "Period", "Dim1", "FactValueNumeric"]]


Unnamed: 0,Location,Period,Dim1,FactValueNumeric
0,Antigua and Barbuda,2019,Male,0.00
1,Barbados,2019,Female,0.16
2,Barbados,2019,Both sexes,0.31
3,Antigua and Barbuda,2019,Both sexes,0.32
4,Barbados,2019,Male,0.49
...,...,...,...,...
10975,Guinea,2000,Both sexes,9.73
10976,Ghana,2000,Both sexes,9.75
10977,Malta,2000,Male,9.75
10978,Seychelles,2000,Both sexes,9.76


In [43]:
df_suicide_sex_age = pd.read_csv('Crude Suuicide Rates WHO.csv')
df_suicide_sex_age[["Location", "Period", "Dim1", "Dim2", "FactValueNumeric"]]

Unnamed: 0,Location,Period,Dim1,Dim2,FactValueNumeric
0,Antigua and Barbuda,2019,Both sexes,15-24 years,0.00
1,Antigua and Barbuda,2019,Both sexes,25-34 years,0.00
2,Antigua and Barbuda,2019,Both sexes,35-44 years,0.00
3,Antigua and Barbuda,2019,Both sexes,45-54 years,0.00
4,Antigua and Barbuda,2019,Both sexes,65-74 years,0.00
...,...,...,...,...,...
4387,Burundi,2019,Male,75-84 years,97.84
4388,The former Yugoslav Republic of Macedonia,2019,Male,85+ years,98.13
4389,Lithuania,2019,Male,85+ years,98.72
4390,Ethiopia,2019,Male,75-84 years,99.05


## Mortality data
### File name: 'MortalityDataWHR2021C2CSV.csv'
link: https://worldhappiness.report/ed/2021/#appendices-and-data

- Population data (2019-2020)
- All-cause of death (2017-2020)

In [44]:
df = pd.read_csv("MortalityDataWHR2021C2CSV.csv")

In [45]:
df_population = df[['Country name', 'Population 2020', 'Population 2019']]
df_population_t = pd.melt(df_population[:-1],
        id_vars="Country name",
        value_vars = list(df_population.columns[1:]),
        var_name='Year',
        value_name='Population')
df_population_t

Unnamed: 0,Country name,Year,Population
0,United States,Population 2020,331002647.0
1,Egypt,Population 2020,102334403.0
2,Morocco,Population 2020,36910558.0
3,Lebanon,Population 2020,6825442.0
4,Saudi Arabia,Population 2020,34813867.0
...,...,...,...
325,Uzbekistan,Population 2019,33580650.0
326,Yemen,Population 2019,29161922.0
327,Kosovo,Population 2019,1794248.0
328,Somaliland region,Population 2019,


In [46]:
df_all_cause_death = df[["Country name", "All-cause death count, 2017", "All-cause death count, 2018",
                         "All-cause death count, 2019", "All-cause death count, 2020"]]
df_all_cause_death_t = pd.melt(df_all_cause_death[:-1],
                        id_vars="Country name",
                        value_vars = list(df_all_cause_death.columns[1:]),
                        var_name='Year',
                        value_name='count')
df_all_cause_death_t

Unnamed: 0,Country name,Year,count
0,United States,"All-cause death count, 2017",2810927.0
1,Egypt,"All-cause death count, 2017",
2,Morocco,"All-cause death count, 2017",
3,Lebanon,"All-cause death count, 2017",
4,Saudi Arabia,"All-cause death count, 2017",
...,...,...,...
655,Uzbekistan,"All-cause death count, 2020",175637.0
656,Yemen,"All-cause death count, 2020",
657,Kosovo,"All-cause death count, 2020",11108.0
658,Somaliland region,"All-cause death count, 2020",


In [47]:
df_other_features = df[["Median age", "Island", "Female head of government",
                        "Index of institutional trust", "Gini coefficient of income",
                        "Index of exposure to COVID-19  infections in other countries as of March 31",
                        "COVID-19 deaths per 100,000 population in 2020"]]
df_other_features

Unnamed: 0,Median age,Island,Female head of government,Index of institutional trust,Gini coefficient of income,Index of exposure to COVID-19 infections in other countries as of March 31,"COVID-19 deaths per 100,000 population in 2020"
0,38.3,0,0,0.250,47.51,1.688,104.451
1,25.3,0,0,0.446,31.56,1.627,7.457
2,29.6,0,0,0.397,39.55,2.336,20.016
3,31.1,0,0,0.107,31.83,1.891,21.508
4,31.9,0,0,0.651,45.90,1.250,17.875
...,...,...,...,...,...,...,...
161,20.3,0,0,0.267,36.71,1.005,2.045
162,35.0,0,1,0.169,29.01,3.134,68.916
163,,0,0,,,,
164,,0,0,0.305,,,
