In [2]:
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
import seaborn as sns
import numpy as np

In [4]:
happiness=pd.read_csv('../data/world-happiness-report.csv')

In [5]:
happiness

Unnamed: 0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
0,Afghanistan,2008,3.724,7.370,0.451,50.80,0.718,0.168,0.882,0.518,0.258
1,Afghanistan,2009,4.402,7.540,0.552,51.20,0.679,0.190,0.850,0.584,0.237
2,Afghanistan,2010,4.758,7.647,0.539,51.60,0.600,0.121,0.707,0.618,0.275
3,Afghanistan,2011,3.832,7.620,0.521,51.92,0.496,0.162,0.731,0.611,0.267
4,Afghanistan,2012,3.783,7.705,0.521,52.24,0.531,0.236,0.776,0.710,0.268
...,...,...,...,...,...,...,...,...,...,...,...
1944,Zimbabwe,2016,3.735,7.984,0.768,54.40,0.733,-0.095,0.724,0.738,0.209
1945,Zimbabwe,2017,3.638,8.016,0.754,55.00,0.753,-0.098,0.751,0.806,0.224
1946,Zimbabwe,2018,3.616,8.049,0.775,55.60,0.763,-0.068,0.844,0.710,0.212
1947,Zimbabwe,2019,2.694,7.950,0.759,56.20,0.632,-0.064,0.831,0.716,0.235


In [6]:
happiness.columns

Index(['Country name', 'year', 'Life Ladder', 'Log GDP per capita',
       'Social support', 'Healthy life expectancy at birth',
       'Freedom to make life choices', 'Generosity',
       'Perceptions of corruption', 'Positive affect', 'Negative affect'],
      dtype='object')

### Happiness is the outcome variable. It is measured by "Life Ladder"
### The other variables (GDP per capita, Social suppport, Life expectancy, Freedom, etc.) are factors that have an impact on happiness.

## Challenge 1: Find the top 10 happiest countries in the world

### Alternative 1: sort the dataframe by "Life Ladder" in reverse order and extract the first 10 rows.
### Alternative 2: groupby "Country name" and compute the mean of "Life Ladder", then sort in reverse order and extract the first 10 rows

In [5]:
happiness.sort_values(by='Life Ladder', ascending = True)
happiness.head(10)

Unnamed: 0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
0,Afghanistan,2008,3.724,7.37,0.451,50.8,0.718,0.168,0.882,0.518,0.258
1,Afghanistan,2009,4.402,7.54,0.552,51.2,0.679,0.19,0.85,0.584,0.237
2,Afghanistan,2010,4.758,7.647,0.539,51.6,0.6,0.121,0.707,0.618,0.275
3,Afghanistan,2011,3.832,7.62,0.521,51.92,0.496,0.162,0.731,0.611,0.267
4,Afghanistan,2012,3.783,7.705,0.521,52.24,0.531,0.236,0.776,0.71,0.268
5,Afghanistan,2013,3.572,7.725,0.484,52.56,0.578,0.061,0.823,0.621,0.273
6,Afghanistan,2014,3.131,7.718,0.526,52.88,0.509,0.104,0.871,0.532,0.375
7,Afghanistan,2015,3.983,7.702,0.529,53.2,0.389,0.08,0.881,0.554,0.339
8,Afghanistan,2016,4.22,7.697,0.559,53.0,0.523,0.042,0.793,0.565,0.348
9,Afghanistan,2017,2.662,7.697,0.491,52.8,0.427,-0.121,0.954,0.496,0.371


In [7]:
groupavg = happiness.groupby('Country name')['Life Ladder'].mean()
sortavg = groupavg.sort_values(ascending = False)
sortavg.head(10)

Country name
Denmark        7.680400
Finland        7.597154
Switzerland    7.548300
Norway         7.512400
Netherlands    7.466286
Iceland        7.446500
Canada         7.376333
Sweden         7.369467
New Zealand    7.310286
Australia      7.282071
Name: Life Ladder, dtype: float64

## Challenge 2: Compare the 5 happiest countries in the world with the 5 least happy ones


### Tip: find the top 5 and the bottom 5 countries, then filter the original dataframe to extract observations specific to those countries, finally visualize the results
### Tip: you can create a composite filter contatenating each country with an OR operator

In [8]:
happiest = sortavg.head(5)
happiest

Country name
Denmark        7.680400
Finland        7.597154
Switzerland    7.548300
Norway         7.512400
Netherlands    7.466286
Name: Life Ladder, dtype: float64

In [9]:
unhappiest = sortavg.tail(5)
unhappiest

Country name
Afghanistan                 3.594667
Togo                        3.555000
Burundi                     3.548200
Central African Republic    3.515000
South Sudan                 3.402000
Name: Life Ladder, dtype: float64

In [14]:
hcfilter = (happiness['Country name']=='Denmark') | (happiness['Country name']=='Finland') | (happiness['Country name']=='Switzerland') | (happiness['Country name']=='Norway') | (happiness['Country name']=='Netherlands')
top5 = happiness[hcfilter]
top5

Unnamed: 0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
449,Denmark,2005,8.019,10.851,0.972,69.60,0.971,,0.237,0.860,0.154
450,Denmark,2007,7.834,10.891,0.954,69.92,0.932,0.240,0.206,0.828,0.194
451,Denmark,2008,7.971,10.880,0.954,70.08,0.970,0.272,0.248,0.757,0.163
452,Denmark,2009,7.683,10.824,0.939,70.24,0.949,0.264,0.206,0.749,0.234
453,Denmark,2010,7.771,10.839,0.975,70.40,0.944,0.242,0.175,0.785,0.155
...,...,...,...,...,...,...,...,...,...,...,...
1657,Switzerland,2016,7.459,11.106,0.928,73.50,0.934,0.088,0.302,0.779,0.206
1658,Switzerland,2017,7.474,11.115,0.950,73.80,0.925,0.180,0.316,0.774,0.196
1659,Switzerland,2018,7.509,11.134,0.930,74.10,0.926,0.101,0.301,0.792,0.192
1660,Switzerland,2019,7.694,11.136,0.949,74.40,0.913,0.036,0.294,0.798,0.171


In [16]:
unhcfilter = (happiness['Country name']=='Afghanistan') | (happiness['Country name']=='Togo') | (happiness['Country name']=='Burundi') | (happiness['Country name']=='Central African Republic') | (happiness['Country name']=='South Sudan')
bottom5 = happiness[unhcfilter]
bottom5

Unnamed: 0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
0,Afghanistan,2008,3.724,7.37,0.451,50.8,0.718,0.168,0.882,0.518,0.258
1,Afghanistan,2009,4.402,7.54,0.552,51.2,0.679,0.19,0.85,0.584,0.237
2,Afghanistan,2010,4.758,7.647,0.539,51.6,0.6,0.121,0.707,0.618,0.275
3,Afghanistan,2011,3.832,7.62,0.521,51.92,0.496,0.162,0.731,0.611,0.267
4,Afghanistan,2012,3.783,7.705,0.521,52.24,0.531,0.236,0.776,0.71,0.268
5,Afghanistan,2013,3.572,7.725,0.484,52.56,0.578,0.061,0.823,0.621,0.273
6,Afghanistan,2014,3.131,7.718,0.526,52.88,0.509,0.104,0.871,0.532,0.375
7,Afghanistan,2015,3.983,7.702,0.529,53.2,0.389,0.08,0.881,0.554,0.339
8,Afghanistan,2016,4.22,7.697,0.559,53.0,0.523,0.042,0.793,0.565,0.348
9,Afghanistan,2017,2.662,7.697,0.491,52.8,0.427,-0.121,0.954,0.496,0.371


### Challenge 3: Which factors have an impact on happiness ?


#### Tip: consider computing the correlation between variables
#### Tip: consider https://seaborn.pydata.org/tutorial/relational.html#relating-variables-with-scatter-plots