In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
happiness = pd.read_csv("../Datasets/Happiness_2019.csv")

## Data Exploration

In [10]:
happiness.shape

(156, 9)

In [14]:
happiness.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 9 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Overall rank                  156 non-null    int64  
 1   Country or region             156 non-null    object 
 2   Score                         156 non-null    float64
 3   GDP per capita                156 non-null    float64
 4   Social support                156 non-null    float64
 5   Healthy life expectancy       156 non-null    float64
 6   Freedom to make life choices  156 non-null    float64
 7   Generosity                    156 non-null    float64
 8   Perceptions of corruption     156 non-null    float64
dtypes: float64(7), int64(1), object(1)
memory usage: 11.1+ KB


In [12]:
happiness.head()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [16]:
happiness.tail()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
151,152,Rwanda,3.334,0.359,0.711,0.614,0.555,0.217,0.411
152,153,Tanzania,3.231,0.476,0.885,0.499,0.417,0.276,0.147
153,154,Afghanistan,3.203,0.35,0.517,0.361,0.0,0.158,0.025
154,155,Central African Republic,3.083,0.026,0.0,0.105,0.225,0.235,0.035
155,156,South Sudan,2.853,0.306,0.575,0.295,0.01,0.202,0.091


In [15]:
happiness.describe()

Unnamed: 0,Overall rank,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
count,156.0,156.0,156.0,156.0,156.0,156.0,156.0,156.0
mean,78.5,5.407096,0.905147,1.208814,0.725244,0.392571,0.184846,0.110603
std,45.177428,1.11312,0.398389,0.299191,0.242124,0.143289,0.095254,0.094538
min,1.0,2.853,0.0,0.0,0.0,0.0,0.0,0.0
25%,39.75,4.5445,0.60275,1.05575,0.54775,0.308,0.10875,0.047
50%,78.5,5.3795,0.96,1.2715,0.789,0.417,0.1775,0.0855
75%,117.25,6.1845,1.2325,1.4525,0.88175,0.50725,0.24825,0.14125
max,156.0,7.769,1.684,1.624,1.141,0.631,0.566,0.453


## Data Cleaning 

In [23]:
# column renaming
happiness = happiness.rename(columns={"Overall rank":"Rank","Country or region":"Country/Region","GDP per capita":"GDP/Capita","Social support":"Social Support",
                                    "Healthy life expectancy":"Life Expectancy","Freedom to make life choices":"Freedom",
                                    "Perceptions of corruption":"Corruption Perceptions Index"})

In [22]:
happiness.head(1)

Unnamed: 0,Rank,Country/Region,Score,GDP/Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption Perceptions Index
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393


*Column Indications*
* **Rank** - The Happiness rank out of all the countries
* **Country/Region** - Country names
* **Score** - Happiness score for each country
* **GDP** - Gross Domestic Product is the total market value of all goods and service produced by a country's economy
  during a specific period of time
* **per capita** - by head or for each person
* **Social Support** - how well assisted are the citizens from that particular country
* **Life Expectancy** - Life expectancy of citizens from a country
* **Freedom** - Freedom provided to the people on various basis
* **Generosity** - How Generous are the people towards each other
* **Corruption Perceptions Index** - Measure of corruption in a country

In [24]:
happiness.head()

Unnamed: 0,Rank,Country/Region,Score,GDP/Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption Perceptions Index
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [26]:
# Null Check
happiness.isnull().sum()

Rank                            0
Country/Region                  0
Score                           0
GDP/Capita                      0
Social Support                  0
Life Expectancy                 0
Freedom                         0
Generosity                      0
Corruption Perceptions Index    0
dtype: int64

In [27]:
# Duplication check
happiness.duplicated().sum()

0

In [28]:
happiness.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 9 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Rank                          156 non-null    int64  
 1   Country/Region                156 non-null    object 
 2   Score                         156 non-null    float64
 3   GDP/Capita                    156 non-null    float64
 4   Social Support                156 non-null    float64
 5   Life Expectancy               156 non-null    float64
 6   Freedom                       156 non-null    float64
 7   Generosity                    156 non-null    float64
 8   Corruption Perceptions Index  156 non-null    float64
dtypes: float64(7), int64(1), object(1)
memory usage: 11.1+ KB
