In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
happiness = pd.read_csv("Happiness_2019.csv")

## Data Exploration

In [3]:
happiness.shape

(156, 9)

In [4]:
happiness.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 9 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Overall rank                  156 non-null    int64  
 1   Country or region             156 non-null    object 
 2   Score                         156 non-null    float64
 3   GDP per capita                156 non-null    float64
 4   Social support                156 non-null    float64
 5   Healthy life expectancy       156 non-null    float64
 6   Freedom to make life choices  156 non-null    float64
 7   Generosity                    156 non-null    float64
 8   Perceptions of corruption     156 non-null    float64
dtypes: float64(7), int64(1), object(1)
memory usage: 11.1+ KB


In [5]:
happiness.head()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


In [6]:
happiness.tail()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
151,152,Rwanda,3.334,0.359,0.711,0.614,0.555,0.217,0.411
152,153,Tanzania,3.231,0.476,0.885,0.499,0.417,0.276,0.147
153,154,Afghanistan,3.203,0.35,0.517,0.361,0.0,0.158,0.025
154,155,Central African Republic,3.083,0.026,0.0,0.105,0.225,0.235,0.035
155,156,South Sudan,2.853,0.306,0.575,0.295,0.01,0.202,0.091


In [7]:
happiness.describe()

Unnamed: 0,Overall rank,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
count,156.0,156.0,156.0,156.0,156.0,156.0,156.0,156.0
mean,78.5,5.407096,0.905147,1.208814,0.725244,0.392571,0.184846,0.110603
std,45.177428,1.11312,0.398389,0.299191,0.242124,0.143289,0.095254,0.094538
min,1.0,2.853,0.0,0.0,0.0,0.0,0.0,0.0
25%,39.75,4.5445,0.60275,1.05575,0.54775,0.308,0.10875,0.047
50%,78.5,5.3795,0.96,1.2715,0.789,0.417,0.1775,0.0855
75%,117.25,6.1845,1.2325,1.4525,0.88175,0.50725,0.24825,0.14125
max,156.0,7.769,1.684,1.624,1.141,0.631,0.566,0.453


In [8]:
# Converting datatype of Country_Region from object to string
happiness["Country_Region"] = happiness["Country_Region"].astype("string")
happiness.info()

KeyError: 'Country_Region'

## Data Cleaning 

In [18]:
# column renaming
happiness = happiness.rename(columns={"Overall rank":"Rank","Country or region":"Country_Region","GDP per capita":"GDP_Capita","Social support":"Social Support",
                                    "Healthy life expectancy":"Life Expectancy","Freedom to make life choices":"Freedom",
                                    "Perceptions of corruption":"Corruption_Perceptions_Index"})

In [None]:
happiness.head(1)

*Column Indications*
* **Rank** - The Happiness rank out of all the countries
* **Country_Region** - Country names
* **Score** - Happiness score for each country
* **GDP** - Gross Domestic Product is the total market value of all goods and service produced by a country's economy
  during a specific period of time
* **per capita** - by head or for each person
* **Social Support** - how well assisted are the citizens from that particular country
* **Life Expectancy** - Life expectancy of citizens from a country
* **Freedom** - Freedom provided to the people on various basis
* **Generosity** - How Generous are the people towards each other
* **Corruption Perceptions Index** - Measure of corruption in a country

In [None]:
happiness.head()

In [None]:
# Null Check
happiness.isnull().sum()

In [None]:
# Duplication check
happiness.duplicated().sum()

In [None]:
happiness.info()

In [None]:
happiness.head()


## Effect of GDP on Happiness ?

In [None]:
plt.title("Effect of GDP on Happiness")
sns.scatterplot(x = happiness.GDP_Capita,y = happiness.Rank,alpha=1);

**It is clearly evident that GDP_Capita has a significant effect on the happiness level.**

**As a Country's GDP is increased,happiness level of citizens increases thus higher the rank of that country**

In [9]:
happiness.head()

Unnamed: 0,Overall rank,Country or region,Score,GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption
0,1,Finland,7.769,1.34,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.6,1.383,1.573,0.996,0.592,0.252,0.41
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.38,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298


## Countries with the Highest Corruption Perceptions Index ?

In [None]:
happiness.head()

In [None]:
corruptions = happiness.nlargest(10,["Corruption_Perceptions_Index"])
corruptions

In [None]:
plt.title("Corruption Perceptions Index w.r.t Rank")
sns.scatterplot(x=happiness.Corruption_Perceptions_Index,y=happiness.Rank)

**Corruption has no effect on the happiness level of citizens in a country as the top 25 ranked countries has the highest**
**levels of corruption**

In [None]:
happiness.shape

## Sort the last 10 countries in the decreasing order of Corruption Perceptions Index

In [None]:
corruption = happiness.loc[140:156].sort_values(by=["Corruption_Perceptions_Index"],ascending=False)

In [None]:
corruption

In [None]:
happiness.head()

## Check whether the rank repeats for two countries

In [None]:
happiness["Rank"].value_counts()

## Which country has the highest rating in terms of freedom?
(Display the complete country information)


In [None]:
#idxmax()
happiness.loc[happiness["Freedom"].idxmax()]

## Which country has the highest life expectancy? 
(Display the complete country information)

In [None]:
happiness.loc[happiness["Life Expectancy"].idxmax()]

In [None]:
happiness.head()

## Which country has the highest GDP & Freedom Rating?

In [None]:
happiness.loc[happiness[["GDP_Capita","Freedom"]].idxmax()]

## Which country has the least Social Support ?
(Display the complete country Information)

In [20]:
# idxmin()
happiness.loc[happiness["Social Support"].idxmin()]

Rank                                                 155
Country_Region                  Central African Republic
Score                                              3.083
GDP_Capita                                         0.026
Social Support                                       0.0
Life Expectancy                                    0.105
Freedom                                            0.225
Generosity                                         0.235
Corruption_Perceptions_Index                       0.035
Name: 154, dtype: object

## Create a new column named "Freedom" by merging values of Social Support and Freedom Column

In [None]:
happiness["Freedom_Support"] = happiness["Social Support"] + happiness["Freedom"]

In [None]:
happiness

## Which country has the highest Freedom and Support ?

In [None]:
happiness.loc[happiness["Freedom_Support"].idxmax()]

## Create a copy of the current dataframe

In [None]:
new_happiness = happiness.copy()

## Save the copy as a new csv file

In [None]:
happiness.to_csv("new_happiness.csv")

In [None]:
happiness

In [None]:
## Delete the column Freedom & Generosity from happiness Dataframe

In [None]:
happiness.drop(["Freedom","Social Support"],axis=1,inplace=True)

In [19]:
happiness

Unnamed: 0,Rank,Country_Region,Score,GDP_Capita,Social Support,Life Expectancy,Freedom,Generosity,Corruption_Perceptions_Index
0,1,Finland,7.769,1.340,1.587,0.986,0.596,0.153,0.393
1,2,Denmark,7.600,1.383,1.573,0.996,0.592,0.252,0.410
2,3,Norway,7.554,1.488,1.582,1.028,0.603,0.271,0.341
3,4,Iceland,7.494,1.380,1.624,1.026,0.591,0.354,0.118
4,5,Netherlands,7.488,1.396,1.522,0.999,0.557,0.322,0.298
...,...,...,...,...,...,...,...,...,...
151,152,Rwanda,3.334,0.359,0.711,0.614,0.555,0.217,0.411
152,153,Tanzania,3.231,0.476,0.885,0.499,0.417,0.276,0.147
153,154,Afghanistan,3.203,0.350,0.517,0.361,0.000,0.158,0.025
154,155,Central African Republic,3.083,0.026,0.000,0.105,0.225,0.235,0.035


## Display the country information which has the highest generosity level

In [14]:
happiness.iloc[happiness["Generosity"].idxmax()]

Overall rank                        131
Country or region               Myanmar
Score                              4.36
GDP per capita                     0.71
Social support                    1.181
Healthy life expectancy           0.555
Freedom to make life choices      0.525
Generosity                        0.566
Perceptions of corruption         0.172
Name: 130, dtype: object