# World Happiness Index

## preparing dataset

In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
df2015 = pd.read_csv('2015.csv')
df2016 = pd.read_csv('2016.csv')
df2017 = pd.read_csv('2017.csv')
df2018 = pd.read_csv('2018.csv')
df2019 = pd.read_csv('2019.csv')

df2015.drop(['Standard Error', 'Dystopia Residual'],axis=1,inplace=True)
df2015.rename(columns = {'Economy (GDP per Capita)':'GDP per capita', 'Family':'Social Support', 'Health (Life Expectancy)':'Healthy life expectancy', 'Trust (Government Corruption)':'Perceptions of corruption'}, inplace = True)
#get all regions and proper column order for later on
country_region = df2015[['Country', 'Region']].copy()
cols = df2015.columns.tolist()

df2016.drop(['Lower Confidence Interval', 'Upper Confidence Interval', 'Dystopia Residual'],axis=1,inplace=True)
df2016.rename(columns = {'Economy (GDP per Capita)':'GDP per capita', 'Family':'Social Support', 'Health (Life Expectancy)':'Healthy life expectancy', 'Trust (Government Corruption)':'Perceptions of corruption' }, inplace = True)

df2017.drop(['Whisker.high', 'Whisker.low', 'Dystopia.Residual'],axis=1,inplace=True)
df2017.rename(columns = {'Happiness.Rank':'Happiness Rank', 'Happiness.Score':'Happiness Score', 'Economy..GDP.per.Capita.':'GDP per capita', 'Family': 'Social Support', 'Health..Life.Expectancy.': 'Healthy life expectancy', 'Trust..Government.Corruption.': 'Perceptions of corruption'},inplace=True)
df2017 = df2017.merge(country_region, on='Country') #add the missing region for year 2019
df2017 = df2017[cols] #sort columns

df2018.rename(columns = {'Overall rank':'Happiness Rank', 'Country or region':'Country', 'Social support':'Social Support', 'Freedom to make life choices':'Freedom', 'Score':'Happiness Score'},inplace=True)
df2018 = df2018.merge(country_region, on='Country') #add the missing region for year 2019
df2018 = df2018[cols] #sort columns

df2019.rename(columns = {'Overall rank':'Happiness Rank', 'Country or region':'Country', 'Social support':'Social Support', 'Freedom to make life choices':'Freedom', 'Score':'Happiness Score'},inplace=True)
df2019 = df2019.merge(country_region, on='Country') #add the missing region for year 2019
df2019 = df2019[cols] #sort columns

now all into one df

In [None]:
df2015["year"] = str(2015)
df2016["year"] = str(2016)
df2017["year"] = str(2017)
df2018["year"] = str(2018)
df2019["year"] = str(2019)
df_all = df2015.append([df2016,df2017,df2018,df2019])

# First lets look at 2015 dataset

Considered in dataset contributing to happiness, values are in relation to Dystopia, the most unhappiest place on earth
- GDP per Capita
- Family
- Life Expectancy
- Freedom
- Generosity
- Trust Government Corruption


### Correlation: influence of seperate factors regarding Happiness Rank
We are using a heatmap to show the correlation. We can see that Happiness is highly dependent on GDP as well as Social Support through family and Healthy life expectancy

In [None]:
corr = df2015.corr()
corr

In [None]:
sns.heatmap(corr, annot=True, linewidths=.5, square = True, cmap = 'Blues_r');

In [None]:
mask = np.zeros_like(corr)

mask[np.triu_indices_from(mask)] = True

with sns.axes_style("white"):

    f, ax = plt.subplots(figsize=(7, 5))

    ax = sns.heatmap(corr, mask = mask, annot=True, linewidths=.5, square = True, cmap = 'Blues_r')

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15,10));

df2015.plot.scatter(ax=axes[0,0], x = 'GDP per capita', y = 'Happiness Score');
df2015.plot.scatter(ax=axes[0,1], x = 'Social Support', y = 'Happiness Score');
df2015.plot.scatter(ax=axes[0,2], x = 'Healthy life expectancy', y = 'Happiness Score');
df2015.plot.scatter(ax=axes[1,0], x = 'Freedom', y = 'Happiness Score');
df2015.plot.scatter(ax=axes[1,1], x = 'Perceptions of corruption', y = 'Happiness Score');
df2015.plot.scatter(ax=axes[1,2], x = 'Generosity', y = 'Happiness Score');


This also shows, how gdp and social support attribute the most to the happiness factor while generosity and preception of corruption show least correlation to felt happiness.
Let's use Scatterplot series to look at correlation in the year 2015

In [None]:
sns.relplot(x="Freedom", y="Happiness Score", hue="Happiness Rank", data=df2015);

Let's show all attributes development from year 2015 to 2019

In [None]:
sns.relplot(data=df_all, x="GDP per capita", y="Happiness Score", hue="Happiness Rank", col="year");
sns.relplot(data=df_all, x="Social Support", y="Happiness Score", hue="Happiness Rank", col="year");
sns.relplot(data=df_all, x="Healthy life expectancy", y="Happiness Score", hue="Happiness Rank", col="year");
sns.relplot(data=df_all, x="Freedom", y="Happiness Score", hue="Happiness Rank", col="year");
sns.relplot(data=df_all, x="Perceptions of corruption", y="Happiness Score", hue="Happiness Rank", col="year");
sns.relplot(data=df_all, x="Generosity", y="Happiness Score", hue="Happiness Rank", col="year");

## Mean values of top10 countries, bottom10 countries and regions

In [None]:
location_mean_byregion = df2015.groupby("Region").mean()
print(location_mean_byregion)

In [None]:
fig = plt.gcf()
fig.set_size_inches(30, 8)

sns.violinplot(x="Region", y="Happiness Rank", data=df2015)

plt.show()

Australia and New Zealand obviously is only 2 countries, but we can see that Western Europe is mainly happy, in the middle east the band is pretty wide and in Sub Saharan Africa aswell as Southern Asia Happiness is mainly in the bottom part of the rank.

### Top 10 Countries from 2015 next to each other regarding different attributes using stacked bar charts

In [None]:
df2015_top10 = df2015.head(10)
df2015_top10.drop(['Happiness Rank'],axis=1,inplace=True)

In [None]:
df2015_top10

In [None]:
ax = df2015_top10.plot(y="Social Support", x="Country", kind="bar",color='C3')
df2015_top10.plot(y="GDP per capita", x="Country", kind="bar", ax=ax, color="C1")
df2015_top10.plot(y="Healthy life expectancy", x="Country", kind="bar", ax=ax, color="C2")

plt.show()

### Top10 next to Bottom10 countries for each seperate attribute in 2015

In [None]:
gdp_top10 = df2015.sort_values('GDP per capita', ascending = False).head(10)
gdp_bottom10 = df2015.sort_values('GDP per capita', ascending = True).head(10)

social_top10 = df2015.sort_values('Social Support', ascending = False).head(10)
social_bottom10 = df2015.sort_values('Social Support', ascending = True).head(10)

life_top10 = df2015.sort_values('Healthy life expectancy', ascending = False).head(10)
life_bottom10 = df2015.sort_values('Healthy life expectancy', ascending = True).head(10)

freedom_top10 = df2015.sort_values('Freedom', ascending = False).head(10)
freedom_bottom10 = df2015.sort_values('Freedom', ascending = True).head(10)

corruption_top10 = df2015.sort_values('Perceptions of corruption', ascending = False).head(10)
corruption_bottom10 = df2015.sort_values('Perceptions of corruption', ascending = True).head(10)

generosity_top10 = df2015.sort_values('Generosity', ascending = False).head(10)
generosity_bottom10 = df2015.sort_values('Generosity', ascending = True).head(10)

In [None]:
fig, axes = plt.subplots(nrows=3, ncols=4, constrained_layout=True, figsize=(12,8))

sns.barplot(x='GDP per capita',y='Country',data=gdp_top10, ax=axes[0,0], color='b')
sns.barplot(x='GDP per capita' ,y='Country',data=gdp_bottom10, ax=axes[0,1], color='b')

sns.barplot(x='Social Support',y='Country',data=social_top10, ax=axes[0,2], color='g')
sns.barplot(x='Social Support' ,y='Country',data=social_bottom10, ax=axes[0,3], color='g')

sns.barplot(x='Healthy life expectancy',y='Country',data=life_top10, ax=axes[1,0], color='b')
sns.barplot(x='Healthy life expectancy' ,y='Country',data=life_bottom10, ax=axes[1,1], color='b')

sns.barplot(x='Freedom',y='Country',data=freedom_top10, ax=axes[1,2], color='g')
sns.barplot(x='Freedom' ,y='Country',data=freedom_bottom10, ax=axes[1,3], color='g')

sns.barplot(x='Perceptions of corruption',y='Country',data=corruption_top10, ax=axes[2,0], color='b')
sns.barplot(x='Perceptions of corruption' ,y='Country',data=corruption_bottom10, ax=axes[2,1], color='b')

sns.barplot(x='Generosity',y='Country',data=generosity_top10, ax=axes[2,2], color='g')
sns.barplot(x='Generosity' ,y='Country',data=generosity_bottom10, ax=axes[2,3], color='g')

In [None]:
x = range(10) #list elements 0 to 9

fig, ax = plt.subplots(nrows=3, ncols=4, figsize=(15,10));
fig.tight_layout(pad=3)

ax[0][0].barh(x, gdp_top10.Generosity)
ax[0][0].set_title('Top 10 countries GDP per Capita')
ax[0][0].set_yticklabels(gdp_top10.Country)
ax[0][1].barh(x, gdp_bottom10.Generosity)
ax[0][1].set_title('Bottom 10 countries GDP per Capita')
ax[0][1].set_yticklabels(gdp_bottom10.Country)

ax[0][2].barh(x, social_top10['Social Support'])
ax[0][2].set_title('Top 10 countries Social Support')
ax[0][2].set_yticklabels(social_top10.Country)
ax[0][3].barh(x, social_bottom10['Social Support'])
ax[0][3].set_title('Bottom 10 countries Social Support')
ax[0][3].set_yticklabels(social_bottom10.Country)

ax[1][0].barh(x, life_top10['Healthy life expectancy'])
ax[1][0].set_title('Top 10 countries Healthy life expectancy')
ax[1][0].set_yticklabels(life_top10.Country)
ax[1][1].barh(x, life_bottom10['Healthy life expectancy'])
ax[1][1].set_title('Bottom 10 countries Healthy life expectancy')
ax[1][1].set_yticklabels(life_bottom10.Country)

ax[1][2].barh(x, freedom_top10['Freedom'])
ax[1][2].set_title('Top 10 countries Freedom')
ax[1][2].set_yticklabels(freedom_top10.Country)
ax[1][3].barh(x, freedom_bottom10['Freedom'])
ax[1][3].set_title('Bottom 10 countries Freedom')
ax[1][3].set_yticklabels(freedom_bottom10.Country)

ax[2][0].barh(x, generosity_top10['Generosity'])
ax[2][0].set_title('Top 10 countries Generosity')
ax[2][0].set_yticklabels(generosity_top10.Country)
ax[2][1].barh(x, generosity_bottom10['Generosity'])
ax[2][1].set_title('Bottom 10 countries Generosity')
ax[2][1].set_yticklabels(generosity_bottom10.Country)

ax[2][2].barh(x, corruption_top10['Perceptions of corruption'])
ax[2][2].set_title('Top 10 countries Corruption')
ax[2][2].set_yticklabels(corruption_top10.Country)
ax[2][3].barh(x, corruption_bottom10['Perceptions of corruption'])
ax[2][3].set_title('Bottom 10 countries Corruption')
ax[2][3].set_yticklabels(corruption_bottom10.Country)

plt.show();

### Development of top10 countries from 2015 over time from 2015 - 2019 regarding rank and factors

In [None]:
#line chart time series small multiples

## Maps
### global

In [None]:
import plotly.express as px

fig = px.choropleth(df_all, locationmode = 'country names', locations="Country",
                    color="Happiness Rank",
                    hover_name="Country",
                    animation_frame="year",
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

# Regional exploration

In [None]:
set(df_all.Region)

In [None]:
df_all_oceania = df_all[df_all.Region == 'Australia and New Zealand']
df_all_EuropeCentralEast = df_all[df_all.Region == 'Central and Eastern Europe']
df_all_EuropeWestern = df_all[df_all.Region == 'Australia and New Zealand']
df_all_AmericaSouth = df_all[df_all.Region == 'Latin America and Caribbean']
df_all_AmericaNorth = df_all[df_all.Region == 'North America']
df_all_AfricaMiddleEastNorth = df_all[df_all.Region == 'Middle East and Northern Africa']
df_all_AfricaSubSahara = df_all[df_all.Region == 'Sub-Saharan Africa']
df_all_AsiaEast = df_all[df_all.Region == 'Eastern Asia']
df_all_AsiaSouthEast = df_all[df_all.Region == 'Southeastern Asia']
df_all_AsiaSouth = df_all[df_all.Region == 'Southern Asia']

In [None]:
df_all_oceania['Freedom'].mean()

Happiness ratio per region in year 2015

In [None]:
region_lists=list(df2015['Region'].unique())
region_happiness_ratio=[]
for each in region_lists:
    region=df2015[df2015['Region']==each]
    region_happiness_rate=sum(region['Happiness Score']/len(region))
    region_happiness_ratio.append(region_happiness_rate)
    
data=pd.DataFrame({'region':region_lists,'region happiness ratio':region_happiness_ratio})
new_index=(data['region_happiness_ratio'].sort_values(ascending=False)).index.values
sorted_data = data.reindex(new_index)

sorted_data

In [None]:
plt.figure(figsize=(8,5))
sns.barplot(x=sorted_data['region'], y=sorted_data['region_happiness_ratio'],palette=sns.cubehelix_palette(len(sorted_data['region'])))
plt.xticks(rotation= 90)
plt.xlabel('Region')
plt.ylabel('Region Happiness Ratio')
plt.title('Happiness rate for regions')
plt.show()

### Map visualization development of happiness rank from 2015-2019 per country

In [None]:
fig = px.choropleth(df_all, locationmode = 'country names', locations="Country",
                    color="Happiness Rank",
                    hover_name="Country",
                    animation_frame="year",
                    scope="north america",
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

In [None]:
fig = px.choropleth(df_all, locationmode = 'country names', locations="Country",
                    color="Happiness Rank",
                    hover_name="Country",
                    animation_frame="year",
                    scope="europe",
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

In [None]:
fig = px.choropleth(df_all, locationmode = 'country names', locations="Country",
                    color="Happiness Rank",
                    hover_name="Country",
                    animation_frame="year",
                    scope="asia",
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

In [None]:
fig = px.choropleth(df_all, locationmode = 'country names', locations="Country",
                    color="Happiness Rank",
                    hover_name="Country",
                    animation_frame="year",
                    scope="africa",
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

In [None]:
fig = px.choropleth(df_all, locationmode = 'country names', locations="Country",
                    color="Happiness Rank",
                    hover_name="Country",
                    animation_frame="year",
                    scope="south america",
                    color_continuous_scale=px.colors.sequential.Plasma)
fig.show()

# Development of Switzerland from 2015-2019

In [None]:
column = df_all[df_all.Country == 'Switzerland']
column