In [31]:
#for manipulations
import numpy as np
import pandas as pd

#for data visualizations
import matplotlib.pyplot as plt
import seaborn as sns

#for interactivity
from ipywidgets import interact


In [32]:
#lets read the dataset
data = pd.read_csv('world-happiness-report.csv')

In [33]:
#lets check the shape of the dataset
print("shape of the Dataset: ", data.shape)

shape of the Dataset:  (1949, 11)


In [34]:
#lets check the head of the dataset
data.head()

Unnamed: 0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
0,Afghanistan,2008,3.724,7.37,0.451,50.8,0.718,0.168,0.882,0.518,0.258
1,Afghanistan,2009,4.402,7.54,0.552,51.2,0.679,0.19,0.85,0.584,0.237
2,Afghanistan,2010,4.758,7.647,0.539,51.6,0.6,0.121,0.707,0.618,0.275
3,Afghanistan,2011,3.832,7.62,0.521,51.92,0.496,0.162,0.731,0.611,0.267
4,Afghanistan,2012,3.783,7.705,0.521,52.24,0.531,0.236,0.776,0.71,0.268


In [35]:
#lets check if there is any missing value present in the dataset
data.isnull().sum()

Country name                          0
year                                  0
Life Ladder                           0
Log GDP per capita                   36
Social support                       13
Healthy life expectancy at birth     55
Freedom to make life choices         32
Generosity                           89
Perceptions of corruption           110
Positive affect                      22
Negative affect                      16
dtype: int64

In [40]:
#lets handl the missing values
new_data = data.dropna()

In [41]:
new_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1708 entries, 0 to 1948
Data columns (total 11 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Country name                      1708 non-null   object 
 1   year                              1708 non-null   int64  
 2   Life Ladder                       1708 non-null   float64
 3   Log GDP per capita                1708 non-null   float64
 4   Social support                    1708 non-null   float64
 5   Healthy life expectancy at birth  1708 non-null   float64
 6   Freedom to make life choices      1708 non-null   float64
 7   Generosity                        1708 non-null   float64
 8   Perceptions of corruption         1708 non-null   float64
 9   Positive affect                   1708 non-null   float64
 10  Negative affect                   1708 non-null   float64
dtypes: float64(9), int64(1), object(1)
memory usage: 160.1+ KB


In [37]:
#lets check the shape of data after drop the missing vlaues
new_data.shape

(1708, 11)

In [38]:
new_data.isnull().sum()

Country name                        0
year                                0
Life Ladder                         0
Log GDP per capita                  0
Social support                      0
Healthy life expectancy at birth    0
Freedom to make life choices        0
Generosity                          0
Perceptions of corruption           0
Positive affect                     0
Negative affect                     0
dtype: int64

In [39]:
new_data['year'].value_counts()

2011    132
2017    132
2014    127
2015    127
2018    126
2019    126
2013    124
2016    124
2012    121
2010    112
2009    106
2008    101
2007     94
2020     81
2006     74
2005      1
Name: year, dtype: int64

In [13]:
#lets check the summary statistics for each year

@interact
def summary(year_list = list(new_data['year'].value_counts().index)):
    x = new_data[new_data['year'] == year_list]
    print("........................................................")
    print("Statistics for Life Ladder")
    print("Minimum of Life Ladder : ", x['Life Ladder'].min())
    print("Average of Life Ladder : ", x['Life Ladder'].mean())
    print("Maximum of Life Ladder : ", x['Life Ladder'].max())
    print("........................................................")
    print("Statistics for Log GDP per capita")
    print("Minimum of Log GDP per capita : ", x['Log GDP per capita'].min())
    print("Average of Log GDP per capita : ", x['Log GDP per capita'].mean())
    print("Maximum of Log GDP per capita : ", x['Log GDP per capita'].max())
    print("........................................................")
    print("Statistics for Social support")
    print("Minimum of Social support : ", x['Social support'].min())
    print("Average of Social support : ", x['Social support'].mean())
    print("Maximum of Social support : ", x['Social support'].max())
    print("........................................................")
    print("Statistics for Healthy life expectancy at birth")
    print("Minimum of Healthy life expectancy at birth : ", x['Healthy life expectancy at birth'].min())
    print("Average of Healthy life expectancy at birth : ", x['Healthy life expectancy at birth'].mean())
    print("Maximum of Healthy life expectancy at birth : ", x['Healthy life expectancy at birth'].max())
    print("........................................................")
    print("Statistics for Freedom to make life choices")
    print("Minimum of Freedom to make life choices : ", x['Freedom to make life choices'].min())
    print("Average of Freedom to make life choices : ", x['Freedom to make life choices'].mean())
    print("Maximum of Freedom to make life choices : ", x['Freedom to make life choices'].max())
    print("........................................................")
    print("Statistics for Generosity")
    print("Minimum of Generosity : ", x['Generosity'].min())
    print("Average of Generosity : ", x['Generosity'].mean())
    print("Maximum of Generosity : ", x['Generosity'].max())
    print("........................................................")
    print("Statistics for Perceptions of corruption")
    print("Minimum of Perceptions of corruption : ", x['Perceptions of corruption'].min())
    print("Average of Perceptions of corruption : ", x['Perceptions of corruption'].mean())
    print("Maximum of Perceptions of corruption : ", x['Perceptions of corruption'].max())
    print("........................................................")
    print("Statistics for Positive affect")
    print("Minimum of Positive affect : ", x['Positive affect'].min())
    print("Average of Positive affect : ", x['Positive affect'].mean())
    print("Maximum of Positive affect : ", x['Positive affect'].max())
    print("........................................................")
    print("Statistics for Negative affect")
    print("Minimum of Negative affect : ", x['Negative affect'].min())
    print("Average of Negative affect : ", x['Negative affect'].mean())
    print("Maximum of Negative affect : ", x['Negative affect'].max())
    print("........................................................")

interactive(children=(Dropdown(description='year_list', options=(2011, 2017, 2014, 2015, 2018, 2019, 2013, 201…

In [14]:
#lets check the summary statistics for each country

@interact
def summary(country_list = list(new_data['Country name'].value_counts().index)):
    x = new_data[new_data['Country name'] == country_list]
    print("........................................................")
    print("Statistics for Life Ladder")
    print("Minimum of Life Ladder : ", x['Life Ladder'].min())
    print("Average of Life Ladder : ", x['Life Ladder'].mean())
    print("Maximum of Life Ladder : ", x['Life Ladder'].max())
    print("........................................................")
    print("Statistics for Log GDP per capita")
    print("Minimum of Log GDP per capita : ", x['Log GDP per capita'].min())
    print("Average of Log GDP per capita : ", x['Log GDP per capita'].mean())
    print("Maximum of Log GDP per capita : ", x['Log GDP per capita'].max())
    print("........................................................")
    print("Statistics for Social support")
    print("Minimum of Social support : ", x['Social support'].min())
    print("Average of Social support : ", x['Social support'].mean())
    print("Maximum of Social support : ", x['Social support'].max())
    print("........................................................")
    print("Statistics for Healthy life expectancy at birth")
    print("Minimum of Healthy life expectancy at birth : ", x['Healthy life expectancy at birth'].min())
    print("Average of Healthy life expectancy at birth : ", x['Healthy life expectancy at birth'].mean())
    print("Maximum of Healthy life expectancy at birth : ", x['Healthy life expectancy at birth'].max())
    print("........................................................")
    print("Statistics for Freedom to make life choices")
    print("Minimum of Freedom to make life choices : ", x['Freedom to make life choices'].min())
    print("Average of Freedom to make life choices : ", x['Freedom to make life choices'].mean())
    print("Maximum of Freedom to make life choices : ", x['Freedom to make life choices'].max())
    print("........................................................")
    print("Statistics for Generosity")
    print("Minimum of Generosity : ", x['Generosity'].min())
    print("Average of Generosity : ", x['Generosity'].mean())
    print("Maximum of Generosity : ", x['Generosity'].max())
    print("........................................................")
    print("Statistics for Perceptions of corruption")
    print("Minimum of Perceptions of corruption : ", x['Perceptions of corruption'].min())
    print("Average of Perceptions of corruption : ", x['Perceptions of corruption'].mean())
    print("Maximum of Perceptions of corruption : ", x['Perceptions of corruption'].max())
    print("........................................................")
    print("Statistics for Positive affect")
    print("Minimum of Positive affect : ", x['Positive affect'].min())
    print("Average of Positive affect : ", x['Positive affect'].mean())
    print("Maximum of Positive affect : ", x['Positive affect'].max())
    print("........................................................")
    print("Statistics for Negative affect")
    print("Minimum of Negative affect : ", x['Negative affect'].min())
    print("Average of Negative affect : ", x['Negative affect'].mean())
    print("Maximum of Negative affect : ", x['Negative affect'].max())
    print("........................................................")

interactive(children=(Dropdown(description='country_list', options=('Zimbabwe', 'Bolivia', 'Kyrgyzstan', 'Lith…

In [43]:
#lets check some interesting facts

print("Countries Which have the High Generosity:", new_data[new_data['Generosity'] > 0.688]['Country name'].unique())
print("Countries Which have the High Social support:", new_data[new_data['Social support'] > 0.98]['Country name'].unique())
print("Countries Which have the High Freedom to make life choices:", new_data[new_data['Freedom to make life choices'] > 0.97]['Country name'].unique())
print("Countries Which have the High Healthy life expectancy at birth:", new_data[new_data['Healthy life expectancy at birth'] > 75]['Country name'].unique())
print("Countries Which have the Low Healthy life expectancy at birth:", new_data[new_data['Healthy life expectancy at birth'] < 45]['Country name'].unique())
print("Countries Which have the High Perceptions of corruption:", new_data[new_data['Perceptions of corruption'] > 0.95]['Country name'].unique())
print("Countries Which have the Low Perceptions of corruption:", new_data[new_data['Perceptions of corruption'] < 0.078]['Country name'].unique())
print("Countries Which have the High Positive affect:", new_data[new_data['Positive affect'] > 0.93]['Country name'].unique())
print("Countries Which have the Low Positive affect:", new_data[new_data['Positive affect'] < 0.44]['Country name'].unique())
print("Countries Which have the High Negative affect:", new_data[new_data['Negative affect'] > 0.55]['Country name'].unique())
print("Countries Which have the Low Negative affect:", new_data[new_data['Negative affect'] < 0.11]['Country name'].unique())


Countries Which have the High Generosity: ['Myanmar']
Countries Which have the High Social support: ['Iceland' 'Ireland' 'New Zealand']
Countries Which have the High Freedom to make life choices: ['Uzbekistan']
Countries Which have the High Healthy life expectancy at birth: ['Japan' 'Singapore']
Countries Which have the Low Healthy life expectancy at birth: ['Central African Republic' 'Chad' 'Haiti' 'Malawi' 'Mozambique' 'Nigeria'
 'Sierra Leone' 'Swaziland' 'Zambia' 'Zimbabwe']
Countries Which have the High Perceptions of corruption: ['Afghanistan' 'Bosnia and Herzegovina' 'Bulgaria' 'Cambodia' 'Chad'
 'Croatia' 'Czech Republic' 'Greece' 'Hungary' 'Indonesia' 'Lithuania'
 'Moldova' 'Mongolia' 'Portugal' 'Romania' 'Russia' 'Serbia'
 'Trinidad and Tobago' 'Ukraine' 'Zimbabwe']
Countries Which have the Low Perceptions of corruption: ['Singapore']
Countries Which have the High Positive affect: ['Paraguay' 'Thailand']
Countries Which have the Low Positive affect: ['Afghanistan' 'Georgia' '

In [63]:
#lets split the Dataset for predictive Modeling

y = new_data['Country name']
x = new_data.drop(['Country name'], axis=1)

print("Shape of y:", y.shape)
print("Shape of x:", x.shape)

Shape of y: (1708,)
Shape of x: (1708, 10)


In [45]:
#lets create Training any Testing sets for validation of Results

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size= 1, random_state = 0)

print("The shape of x train:", x_train.shape)
print("The shape of x test:", x_test.shape)
print("The shape of y train:", y_train.shape)
print("The shape of y test:", y_test.shape)

The shape of x train: (1707, 10)
The shape of x test: (1, 10)
The shape of y train: (1707,)
The shape of y test: (1,)


In [46]:
#lets create a predictive Model

from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [49]:
new_data.head()

Unnamed: 0,Country name,year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect
0,Afghanistan,2008,3.724,7.37,0.451,50.8,0.718,0.168,0.882,0.518,0.258
1,Afghanistan,2009,4.402,7.54,0.552,51.2,0.679,0.19,0.85,0.584,0.237
2,Afghanistan,2010,4.758,7.647,0.539,51.6,0.6,0.121,0.707,0.618,0.275
3,Afghanistan,2011,3.832,7.62,0.521,51.92,0.496,0.162,0.731,0.611,0.267
4,Afghanistan,2012,3.783,7.705,0.521,52.24,0.531,0.236,0.776,0.71,0.268


In [65]:
prediction = model.predict((np.array([[2009,
                                      3.7,
                                      7.3,
                                      0.451,
                                      50.80,
                                      0.7,
                                      0.168,
                                      0.882,
                                      0.518,
                                      0.258]])))

print("The S", prediction)

Something     ['Uganda']


