In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
import functools




In [2]:
# Data file paths
alcohol_path = "data (1).csv"
happiness_path = "2018.csv"
hdi_path = "2018 Human Development Index (HDI).csv"
gdp_path = "GDP per capita (2017 PPP $).csv"
life_path = "Life expectancy at birth (years).csv"

# Output file
output_data_file = "output_data/alcohol_vs_happiness.csv"

# Read the data files
alcohol_data = pd.read_csv(alcohol_path)
happiness_data = pd.read_csv(happiness_path)
hdi_data = pd.read_csv(hdi_path, skiprows=5, encoding="cp1252")
gdp_data = pd.read_csv(gdp_path, encoding="cp1252", skiprows=5)
life_data = pd.read_csv(life_path, encoding="cp1252", skiprows=6)


In [25]:
# Grab 2018 data, clean and merge HDI, GDP and Life Expectancy 
clean_gdp_data = gdp_data[["Country", "2018"]]
gdp_hdi = clean_gdp_data.merge(hdi_data, how='inner', on=["Country"])
final_gdp_hdi = gdp_hdi.rename(columns={"2018_x": "GDP (per capita PPP)", "2018_y": "HDI"})
clean_life_data = life_data[["Country", "2018"]]
clean_life_data = clean_life_data.rename(columns={"2018": "Life Expectancy"})
final_hdi_gdp_life = final_gdp_hdi.merge(clean_life_data, how='inner', on=["Country"])
#final_hdi_gdp_life = final_hdi_gdp_life.dropna()
final_hdi_gdp_life = final_hdi_gdp_life.iloc[:-25]
final_hdi_gdp_life.dtypes

Country                 object
GDP (per capita PPP)    object
HDI Rank                object
HDI                     object
Life Expectancy         object
dtype: object

In [26]:
final_hdi_gdp_life[["GDP (per capita PPP)", "HDI Rank", "HDI", "Life Expectancy"]] = final_hdi_gdp_life[["GDP (per capita PPP)", "HDI Rank", "HDI", "Life Expectancy"]].astype(float)
final_hdi_gdp_life.dtypes


Country                  object
GDP (per capita PPP)    float64
HDI Rank                float64
HDI                     float64
Life Expectancy         float64
dtype: object

In [27]:
final_hdi_gdp_life = final_hdi_gdp_life.sort_values(by='GDP (per capita PPP)', ascending=False)
final_hdi_gdp_life.to_csv(output_data_file, encoding="utf8")
final_hdi_gdp_life

Unnamed: 0,Country,GDP (per capita PPP),HDI Rank,HDI,Life Expectancy
97,Luxembourg,114110.0,23.0,0.913,82.1
149,Singapore,97745.0,11.0,0.936,83.5
135,Qatar,94503.0,45.0,0.845,80.1
78,Ireland,83471.0,2.0,0.951,82.1
159,Switzerland,68479.0,2.0,0.955,83.6
...,...,...,...,...,...
120,Niger,1196.0,189.0,0.391,62.0
38,Congo (Democratic Republic of the),1086.0,175.0,0.478,60.4
99,Malawi,1043.0,174.0,0.478,63.8
31,Central African Republic,933.0,188.0,0.395,52.8


In [6]:
# Clean World Happiness Report
happiness_final = happiness_data.rename(columns={"Country or region": "Country"})
happiness_final

Unnamed: 0,Overall rank,Country,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.632,1.305,1.592,0.874,0.681,0.202,0.393
1,2,Norway,7.594,1.456,1.582,0.861,0.686,0.286,0.340
2,3,Denmark,7.555,1.351,1.590,0.868,0.683,0.284,0.408
3,4,Iceland,7.495,1.343,1.644,0.914,0.677,0.353,0.138
4,5,Switzerland,7.487,1.420,1.549,0.927,0.660,0.256,0.357
...,...,...,...,...,...,...,...,...,...
151,152,Yemen,3.355,0.442,1.073,0.343,0.244,0.083,0.064
152,153,Tanzania,3.303,0.455,0.991,0.381,0.481,0.270,0.097
153,154,South Sudan,3.254,0.337,0.608,0.177,0.112,0.224,0.106
154,155,Central African Republic,3.083,0.024,0.000,0.010,0.305,0.218,0.038


In [18]:
# Clean, pivot and reorganize alcohol data
alcohol_df = alcohol_data.drop([0])
alcohol_results = alcohol_df.pivot_table('Alcohol, recorded per capita (15+) consumption (in litres of pure alcohol)', 
                                           ["Unnamed: 0", "Unnamed: 1"], 'Unnamed: 2')                              
alcohol = pd.DataFrame(alcohol_results.to_records())
alcohol_set = alcohol[["Unnamed: 0", "All types", "Beer", "Spirits", "Wine", "Other alcoholic beverages"]]
alcohol_final = alcohol_set.rename(columns={"Unnamed: 0": "Country", "Other alcoholic beverages": "Other"})
alcohol_final

Unnamed: 0,Country,All types,Beer,Spirits,Wine,Other
0,Afghanistan,0.01,0.00,0.01,0.00,0.00
1,Albania,4.70,1.60,1.72,1.30,0.08
2,Algeria,0.58,0.30,0.10,0.18,0.00
3,Andorra,9.75,3.66,2.31,3.69,0.00
4,Angola,5.35,3.64,0.99,0.64,0.08
...,...,...,...,...,...,...
183,Venezuela (Bolivarian Republic of),2.84,1.86,0.84,0.10,0.04
184,Viet Nam,3.22,2.96,0.23,0.03,0.00
185,Yemen,0.03,0.03,0.00,0.00,0.00
186,Zambia,3.95,1.15,0.62,0.08,2.10


In [19]:
# Combine World Happiness Report and WHO alcohol data
happiness_alcohol = happiness_final.merge(alcohol_final, how='inner', on=["Country"])

# Display the data table for preview
happiness_alcohol


Unnamed: 0,Overall rank,Country,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,All types,Beer,Spirits,Wine,Other
0,1,Finland,7.632,1.305,1.592,0.874,0.681,0.202,0.393,8.37,3.97,1.10,1.72,1.58
1,2,Norway,7.594,1.456,1.582,0.861,0.686,0.286,0.340,6.02,2.66,0.97,2.20,0.19
2,3,Denmark,7.555,1.351,1.590,0.868,0.683,0.284,0.408,9.31,3.45,1.65,4.22,0.00
3,4,Iceland,7.495,1.343,1.644,0.914,0.677,0.353,0.138,7.72,4.39,1.22,2.11,0.00
4,5,Switzerland,7.487,1.420,1.549,0.927,0.660,0.256,0.357,9.51,3.17,1.65,4.23,0.12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,149,Liberia,3.495,0.076,0.858,0.267,0.419,0.206,0.030,3.55,0.37,3.11,0.05,0.02
128,151,Rwanda,3.408,0.332,0.896,0.400,0.636,0.200,0.444,6.99,1.06,0.04,0.01,5.88
129,152,Yemen,3.355,0.442,1.073,0.343,0.244,0.083,0.064,0.03,0.03,0.00,0.00,0.00
130,155,Central African Republic,3.083,0.024,0.000,0.010,0.305,0.218,0.038,0.88,0.51,0.03,0.02,0.32


In [20]:
final_results = final_hdi_gdp_life.merge(happiness_alcohol, how="right", on=["Country"])
final_results

Unnamed: 0,Country,GDP (per capita),HDI Rank,HDI,Life Expectancy,Overall rank,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,All types,Beer,Spirits,Wine,Other
0,Finland,,,,,1,7.632,1.305,1.592,0.874,0.681,0.202,0.393,8.37,3.97,1.10,1.72,1.58
1,Norway,,,,,2,7.594,1.456,1.582,0.861,0.686,0.286,0.340,6.02,2.66,0.97,2.20,0.19
2,Denmark,,,,,3,7.555,1.351,1.590,0.868,0.683,0.284,0.408,9.31,3.45,1.65,4.22,0.00
3,Iceland,,,,,4,7.495,1.343,1.644,0.914,0.677,0.353,0.138,7.72,4.39,1.22,2.11,0.00
4,Switzerland,,,,,5,7.487,1.420,1.549,0.927,0.660,0.256,0.357,9.51,3.17,1.65,4.23,0.12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,Liberia,,,,,149,3.495,0.076,0.858,0.267,0.419,0.206,0.030,3.55,0.37,3.11,0.05,0.02
128,Rwanda,,,,,151,3.408,0.332,0.896,0.400,0.636,0.200,0.444,6.99,1.06,0.04,0.01,5.88
129,Yemen,,,,,152,3.355,0.442,1.073,0.343,0.244,0.083,0.064,0.03,0.03,0.00,0.00,0.00
130,Central African Republic,,,,,155,3.083,0.024,0.000,0.010,0.305,0.218,0.038,0.88,0.51,0.03,0.02,0.32


In [28]:
top_alcohol = happiness_alcohol.sort_values(by='All types', ascending=False)
top_alcohol

Unnamed: 0,Overall rank,Country,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,All types,Beer,Spirits,Wine,Other
47,53,Latvia,5.933,1.148,1.454,0.671,0.363,0.092,0.066,12.60,5.30,5.10,1.40,0.80
113,135,Uganda,4.161,0.322,1.090,0.237,0.450,0.259,0.061,12.20,0.84,0.61,0.01,10.73
10,12,Austria,7.139,1.341,1.504,0.891,0.617,0.242,0.224,11.80,6.40,1.90,3.60,0.00
44,50,Lithuania,5.952,1.197,1.527,0.716,0.350,0.026,0.006,11.49,4.47,4.75,0.80,1.46
85,100,Bulgaria,4.933,1.054,1.515,0.712,0.359,0.064,0.009,11.42,4.32,5.20,1.73,0.16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58,70,Libya,5.566,0.985,1.350,0.553,0.496,0.116,0.148,0.01,0.00,0.00,0.01,0.00
123,145,Afghanistan,3.632,0.332,0.537,0.255,0.085,0.191,0.036,0.01,0.00,0.01,0.00,0.00
94,115,Bangladesh,4.500,0.532,0.850,0.579,0.580,0.153,0.144,0.00,0.00,0.00,0.00,0.00
83,98,Somalia,4.982,0.000,0.712,0.115,0.674,0.238,0.282,0.00,0.00,0.00,0.00,0.00
