### Prepare whr_data file

In [1]:
import csv
import json
import requests
import pandas as pd

In [2]:
# get csv files
whr = pd.read_csv('whr_raw.csv')
countries = pd.read_csv('country_info.csv')

In [3]:
whr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 16 columns):
Country name                                         1704 non-null object
Year                                                 1704 non-null int64
Life Ladder                                          1704 non-null float64
Log GDP per capita                                   1676 non-null float64
Social support                                       1691 non-null float64
Healthy life expectancy at birth                     1676 non-null float64
Freedom to make life choices                         1675 non-null float64
Generosity                                           1622 non-null float64
Perceptions of corruption                            1608 non-null float64
Positive affect                                      1685 non-null float64
Negative affect                                      1691 non-null float64
Confidence in national government                    1530 non-null floa

In [4]:
whr.rename(columns = {
    'Country name': 'country',
    'Year': 'year',
    'Life Ladder': 'ladder',
    'Log GDP per capita': 'log_gdp_per_capita',
    'Social support': 'social_support',
    'Healthy life expectancy at birth': 'healthy_life_expectancy',
    'Freedom to make life choices': 'freedom',
    'Generosity': 'generosity',
    'Perceptions of corruption': 'corruption',
    'Positive affect': 'positive_affect',
    'Negative affect': 'negative_affect',
    'Confidence in national government': 'trust_government',
    'Democratic Quality': 'democratic_quality',
    'Delivery Quality': 'delivery_quality',
    'Standard deviation of ladder by country-year': 'well_being_inequality_1',
    'Standard deviation/Mean of ladder by country-year': 'well_being_inequality_2'
}, inplace=True)
whr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 16 columns):
country                    1704 non-null object
year                       1704 non-null int64
ladder                     1704 non-null float64
log_gdp_per_capita         1676 non-null float64
social_support             1691 non-null float64
healthy_life_expectancy    1676 non-null float64
freedom                    1675 non-null float64
generosity                 1622 non-null float64
corruption                 1608 non-null float64
positive_affect            1685 non-null float64
negative_affect            1691 non-null float64
trust_government           1530 non-null float64
democratic_quality         1558 non-null float64
delivery_quality           1559 non-null float64
well_being_inequality_1    1704 non-null float64
well_being_inequality_2    1704 non-null float64
dtypes: float64(14), int64(1), object(1)
memory usage: 213.1+ KB


In [5]:
whr1 = whr[['ladder', 'log_gdp_per_capita', 'social_support', 'healthy_life_expectancy', 'freedom', 'generosity',
               'corruption', 'positive_affect', 'negative_affect', 'trust_government', 'democratic_quality',
               'delivery_quality', 'well_being_inequality_1', 'well_being_inequality_2']]

In [6]:
# gives the total of null values in each columns
whr1 = whr1.round(2)
whr1.head()

Unnamed: 0,ladder,log_gdp_per_capita,social_support,healthy_life_expectancy,freedom,generosity,corruption,positive_affect,negative_affect,trust_government,democratic_quality,delivery_quality,well_being_inequality_1,well_being_inequality_2
0,3.72,7.17,0.45,50.8,0.72,0.18,0.88,0.52,0.26,0.61,-1.93,-1.66,1.77,0.48
1,4.4,7.33,0.55,51.2,0.68,0.2,0.85,0.58,0.24,0.61,-2.04,-1.64,1.72,0.39
2,4.76,7.39,0.54,51.6,0.6,0.13,0.71,0.62,0.28,0.3,-1.99,-1.62,1.88,0.39
3,3.83,7.42,0.52,51.92,0.5,0.17,0.73,0.61,0.27,0.31,-1.92,-1.62,1.79,0.47
4,3.78,7.52,0.52,52.24,0.53,0.24,0.78,0.71,0.27,0.44,-1.84,-1.4,1.8,0.48


In [7]:
whr2 = whr[['country', 'year']]
whr_final = pd.concat([whr2, whr1], axis=1)
whr_final.head()

Unnamed: 0,country,year,ladder,log_gdp_per_capita,social_support,healthy_life_expectancy,freedom,generosity,corruption,positive_affect,negative_affect,trust_government,democratic_quality,delivery_quality,well_being_inequality_1,well_being_inequality_2
0,Afghanistan,2008,3.72,7.17,0.45,50.8,0.72,0.18,0.88,0.52,0.26,0.61,-1.93,-1.66,1.77,0.48
1,Afghanistan,2009,4.4,7.33,0.55,51.2,0.68,0.2,0.85,0.58,0.24,0.61,-2.04,-1.64,1.72,0.39
2,Afghanistan,2010,4.76,7.39,0.54,51.6,0.6,0.13,0.71,0.62,0.28,0.3,-1.99,-1.62,1.88,0.39
3,Afghanistan,2011,3.83,7.42,0.52,51.92,0.5,0.17,0.73,0.61,0.27,0.31,-1.92,-1.62,1.79,0.47
4,Afghanistan,2012,3.78,7.52,0.52,52.24,0.53,0.24,0.78,0.71,0.27,0.44,-1.84,-1.4,1.8,0.48


In [8]:
countries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 185 entries, 0 to 184
Data columns (total 18 columns):
country      185 non-null object
ISO code     185 non-null object
OECD         185 non-null int64
CIS          185 non-null int64
NATO         185 non-null int64
EU           185 non-null int64
Schengen     185 non-null int64
Eurozone     185 non-null int64
ACP Group    185 non-null int64
CAN          185 non-null int64
APEC         185 non-null int64
BSEC         185 non-null int64
Caricom      185 non-null int64
EFTA         185 non-null int64
NAFTA        185 non-null int64
WTO          185 non-null int64
continent    185 non-null object
region       184 non-null object
dtypes: int64(14), object(4)
memory usage: 26.1+ KB


In [9]:
countries.rename(columns = {
    'ISO code': 'iso_code',
    'OECD': 'oecd',
    'CIS': 'cis',
    'NATO': 'nato',
    'EU': 'eu',
    'Schengen': 'schengen',
    'Eurozone': 'eurozone',
    'ACP Group': 'acp_group',
    'CAN': 'can',
    'APEC': 'apec',
    'BSEC': 'bsec',
    'Caricom': 'caricom',
    'EFTA': 'efta',
    'NAFTA': 'nafta',
    'WTO': 'wto'
}, inplace=True)
countries.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 185 entries, 0 to 184
Data columns (total 18 columns):
country      185 non-null object
iso_code     185 non-null object
oecd         185 non-null int64
cis          185 non-null int64
nato         185 non-null int64
eu           185 non-null int64
schengen     185 non-null int64
eurozone     185 non-null int64
acp_group    185 non-null int64
can          185 non-null int64
apec         185 non-null int64
bsec         185 non-null int64
caricom      185 non-null int64
efta         185 non-null int64
nafta        185 non-null int64
wto          185 non-null int64
continent    185 non-null object
region       184 non-null object
dtypes: int64(14), object(4)
memory usage: 26.1+ KB


In [10]:
df = pd.merge(whr_final, countries, left_on = 'country', right_on = 'country', how = 'left')
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1704 entries, 0 to 1703
Data columns (total 33 columns):
country                    1704 non-null object
year                       1704 non-null int64
ladder                     1704 non-null float64
log_gdp_per_capita         1676 non-null float64
social_support             1691 non-null float64
healthy_life_expectancy    1676 non-null float64
freedom                    1675 non-null float64
generosity                 1622 non-null float64
corruption                 1608 non-null float64
positive_affect            1685 non-null float64
negative_affect            1691 non-null float64
trust_government           1530 non-null float64
democratic_quality         1558 non-null float64
delivery_quality           1559 non-null float64
well_being_inequality_1    1704 non-null float64
well_being_inequality_2    1704 non-null float64
iso_code                   1704 non-null object
oecd                       1704 non-null int64
cis              

In [11]:
df.head()

Unnamed: 0,country,year,ladder,log_gdp_per_capita,social_support,healthy_life_expectancy,freedom,generosity,corruption,positive_affect,...,acp_group,can,apec,bsec,caricom,efta,nafta,wto,continent,region
0,Afghanistan,2008,3.72,7.17,0.45,50.8,0.72,0.18,0.88,0.52,...,0,0,0,0,0,0,0,1,Asia,South Asia
1,Afghanistan,2009,4.4,7.33,0.55,51.2,0.68,0.2,0.85,0.58,...,0,0,0,0,0,0,0,1,Asia,South Asia
2,Afghanistan,2010,4.76,7.39,0.54,51.6,0.6,0.13,0.71,0.62,...,0,0,0,0,0,0,0,1,Asia,South Asia
3,Afghanistan,2011,3.83,7.42,0.52,51.92,0.5,0.17,0.73,0.61,...,0,0,0,0,0,0,0,1,Asia,South Asia
4,Afghanistan,2012,3.78,7.52,0.52,52.24,0.53,0.24,0.78,0.71,...,0,0,0,0,0,0,0,1,Asia,South Asia


In [12]:
df.to_csv('whr_data.csv')