In [1]:
# Simple choropleth map from Happiness index
# Data imported from https://happiness-report.s3.amazonaws.com/2022/Appendix_1_StatiscalAppendix_Ch2.pdf
# Data looks at an average factors score that determines the rank of the countries 
import numpy as numpy
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import chart_studio.plotly as py
import plotly.graph_objs as go 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True) 
import cufflinks as cp

In [2]:
# Load the file 
happiness_report = pd.read_csv('The Happiness Project2.csv')

In [3]:
#
happiness_report.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 147 entries, 0 to 146
Data columns (total 12 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   RANK                                        147 non-null    int64  
 1   Country                                     147 non-null    object 
 2   Happiness score                             146 non-null    float64
 3   Whisker-high                                146 non-null    float64
 4   Whisker-low                                 146 non-null    float64
 5   Dystopia (1.83) + residual                  146 non-null    float64
 6   Explained by: GDP per capita                146 non-null    float64
 7   Explained by: Social support                146 non-null    float64
 8   Explained by: Healthy life expectancy       146 non-null    float64
 9   Explained by: Freedom to make life choices  146 non-null    float64
 10  Explained by: 

In [4]:
# Data columns 
happiness_report.columns

Index(['RANK', 'Country', 'Happiness score', 'Whisker-high', 'Whisker-low',
       'Dystopia (1.83) + residual', 'Explained by: GDP per capita',
       'Explained by: Social support', 'Explained by: Healthy life expectancy',
       'Explained by: Freedom to make life choices',
       'Explained by: Generosity', 'Explained by: Perceptions of corruption'],
      dtype='object')

In [5]:
#Select only relevant columns
cols = ['RANK', 'Country', 'Happiness score', 'Explained by: GDP per capita',
       'Explained by: Social support', 'Explained by: Healthy life expectancy',
       'Explained by: Freedom to make life choices',
       'Explained by: Generosity', 'Explained by: Perceptions of corruption']

In [6]:
happiness_report = happiness_report[cols]

In [7]:
# Rename the columns names for relevancy
happiness_report.rename({'Explained by: GDP per capita':'GDP per capita','Explained by: Social support':'Social Support', 'Explained by: Healthy life expectancy':'Healthy life expectancy',
                       'Explained by: Freedom to make life choices':'Life Choice Freedom','Explained by: Generosity':'Generosity', 'Explained by: Perceptions of corruption':'Corruption Perception'}, axis=1,inplace=True)

In [8]:
# Check for any missing values in the columns
happiness_report.isnull().sum()

RANK                       0
Country                    0
Happiness score            1
GDP per capita             1
Social Support             1
Healthy life expectancy    1
Life Choice Freedom        1
Generosity                 1
Corruption Perception      1
dtype: int64

In [9]:
# Dropping missing data due to lack of relevancy
happiness_report.dropna(axis=0,inplace=True)


In [134]:
# Exploratory Analysis
# Top 5 ranked countries 
happiness_report.head(5)

Unnamed: 0,RANK,Country,Happiness score,GDP per capita,Social Support,Healthy life expectancy,Life Choice Freedom,Generosity,Corruption Perception,text
0,1,Finland,7.821,1.892,1.258,0.775,0.736,0.109,0.534,Finland<br>Happiness score7.821GDP per capita1...
1,2,Denmark,7.636,1.953,1.243,0.777,0.719,0.188,0.532,Denmark<br>Happiness score7.636GDP per capita1...
2,3,Iceland,7.557,1.936,1.32,0.803,0.718,0.27,0.191,Iceland<br>Happiness score7.557GDP per capita1...
3,4,Switzerland,7.512,2.026,1.226,0.822,0.677,0.147,0.461,Switzerland<br>Happiness score7.512GDP per cap...
4,5,Netherlands,7.415,1.945,1.206,0.787,0.651,0.271,0.419,Netherlands<br>Happiness score7.415GDP per cap...


In [133]:
# Bottom 5 ranked countries
happiness_report.tail(5)

Unnamed: 0,RANK,Country,Happiness score,GDP per capita,Social Support,Healthy life expectancy,Life Choice Freedom,Generosity,Corruption Perception,text
141,142,Botswana*,3.471,1.503,0.815,0.28,0.571,0.012,0.102,Botswana*<br>Happiness score3.471GDP per capit...
142,143,Rwanda*,3.268,0.785,0.133,0.462,0.621,0.187,0.544,Rwanda*<br>Happiness score3.268GDP per capita0...
143,144,Zimbabwe,2.995,0.947,0.69,0.27,0.329,0.106,0.105,Zimbabwe<br>Happiness score2.995GDP per capita...
144,145,Lebanon,2.955,1.392,0.498,0.631,0.103,0.082,0.034,Lebanon<br>Happiness score2.955GDP per capita1...
145,146,Afghanistan,2.404,0.758,0.0,0.289,0.0,0.089,0.005,Afghanistan<br>Happiness score2.404GDP per cap...


In [88]:
#Statistical Summary
happiness_report.describe().T

Unnamed: 0,count,unique,top,freq
RANK,146,146,1,1
Country,146,146,Finland,1
Happiness score,146,141,4.516,2
GDP per capita,146,141,1.815,3
Social Support,146,133,0.865,3
Healthy life expectancy,146,134,0.803,2
Life Choice Freedom,146,128,0.448,3
Generosity,146,116,0.089,4
Corruption Perception,146,116,0.077,4
text,146,146,Finland<br>Happiness score7.821GDP per capita1...,1


In [89]:
# Country ranking and important influencing components that determine it

for cols in happiness_report.columns:
    happiness_report[cols] = happiness_report[cols].astype(str)
happiness_report['text'] = happiness_report['Country'] +'<br>' + \
    'Happiness score' + happiness_report['Happiness score'] + 'GDP per capita' + happiness_report['GDP per capita'] + '<br>' + \
    'Social Support' + happiness_report['Social Support'] +'Healthy life expectancy' + happiness_report['Healthy life expectancy'] + '<br>' + \
    'Life Choice Freedom' + happiness_report['Life Choice Freedom'] + 'Generosity' + happiness_report['Generosity'] + 'Corruption Perception' +happiness_report['Corruption Perception']


In [90]:
factors = dict(type='choropleth',
            colorscale = 'Blues',
            locations = happiness_report['Country'],
            z = happiness_report['RANK'].astype(int),
            locationmode = 'country names',
            text = happiness_report['text'],
            colorbar_title = 'Countries Happiness Ranking'
            ) 

In [91]:
factor_layout = dict(title = '2005-2019 Happiness Ranking of Each Country<br>(Hover for more average factor breakdown)',
              geo = dict(scope='world'))

In [92]:
choromap = go.Figure(data = [factors],layout = factor_layout)

In [83]:
# Country ranking and important influencing components

for cols in happiness_report.columns:
    happiness_report[cols] = happiness_report[cols].astype(str)
happiness_report['text'] = happiness_report['Country'] +'<br>' + \
    'Happiness score' + happiness_report['Happiness score'] + 'GDP per capita' + happiness_report['GDP per capita'] + '<br>' + \
    'Social Support' + happiness_report['Social Support'] +'Healthy life expectancy' + happiness_report['Healthy life expectancy'] + '<br>' + \
    'Life Choice Freedom' + happiness_report['Life Choice Freedom'] + 'Generosity' + happiness_report['Generosity'] + 'Corruption Perception' +happiness_report['Corruption Perception']

In [84]:
factors = dict(type='choropleth',
            colorscale = 'Blues',
            locations = happiness_report['Country'],
            z = happiness_report['RANK'].astype(int),
            locationmode = 'country names',
            text = happiness_report['text'],
            colorbar_title = 'Countries Happiness Ranking'
            ) 

In [99]:
factor_layout = dict(title = 'Happiness Ranking of Each Country<br>(Hover for more average factor breakdown)',
              geo = dict(scope='world'))
             

In [100]:
choromap = go.Figure(data = [factors],layout = factor_layout)

In [142]:
iplot(choromap)