In [1]:
%config Completer.use_jedi = False

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
path = '/kaggle/input/world-happiness-report/'

data_2021 = pd.read_csv(os.path.join(path,'2021.csv'))
data_2022 = pd.read_csv(os.path.join(path,'2022.csv'))


In [4]:
print(data_2021.info())
print(data_2022.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 20 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   Country name                                149 non-null    object 
 1   Regional indicator                          149 non-null    object 
 2   Ladder score                                149 non-null    float64
 3   Standard error of ladder score              149 non-null    float64
 4   upperwhisker                                149 non-null    float64
 5   lowerwhisker                                149 non-null    float64
 6   Logged GDP per capita                       149 non-null    float64
 7   Social support                              149 non-null    float64
 8   Healthy life expectancy                     149 non-null    float64
 9   Freedom to make life choices                149 non-null    float64
 10  Generosity    

In [5]:
from sklearn.preprocessing import StandardScaler

In [6]:
scaler1 = StandardScaler()
cols = ['Explained by: Social support']
data_2021[cols] = scaler1.fit_transform(data_2021[cols])

In [7]:
data_2021[cols]

Unnamed: 0,Explained by: Social support
0,1.211951
1,1.219702
2,1.107300
3,1.467763
4,1.107300
...,...
144,-0.241532
145,-0.268663
146,-2.291911
147,-0.559360


In [8]:
data_2022['Happiness score'] = data_2022['Happiness score'].str.replace(',', '.').astype(float)

In [9]:
data_2022['Explained by: GDP per capita'] = data_2022['Explained by: GDP per capita'].str.replace(',', '.').astype(float)

In [10]:
data_2022['Explained by: Social support'] = data_2022['Explained by: Social support'].str.replace(',', '.').astype(float)
data_2022['Explained by: Healthy life expectancy'] = data_2022['Explained by: Healthy life expectancy'].str.replace(',', '.').astype(float)

In [11]:
scaler2 = StandardScaler()
cols = ['Explained by: Social support']
data_2022[cols] = scaler1.fit_transform(data_2022[cols])

In [12]:
data_2021['Happiness Rank'] = data_2021['Ladder score'].rank(ascending=False)

In [13]:
rows = []

for i, row in data_2021.iterrows():
    rows.append([row['Country name'], row['Regional indicator'], row['Happiness Rank'], row['Ladder score'], row['Explained by: Log GDP per capita'], row['Explained by: Social support'], row['Explained by: Healthy life expectancy']])
    
print(len(rows))

149


In [14]:
result = pd.DataFrame(columns=['Country', 'Region', 'Rank', 'Score', 'GDP', 'Social Support', 'Health'], data=rows)
result.head()

Unnamed: 0,Country,Region,Rank,Score,GDP,Social Support,Health
0,Finland,Western Europe,1.0,7.842,1.446,1.211951,0.741
1,Denmark,Western Europe,2.0,7.62,1.502,1.219702,0.763
2,Switzerland,Western Europe,3.0,7.571,1.566,1.1073,0.816
3,Iceland,Western Europe,4.0,7.554,1.482,1.467763,0.772
4,Netherlands,Western Europe,5.0,7.464,1.501,1.1073,0.753


In [15]:
data_2022.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 147 entries, 0 to 146
Data columns (total 12 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   RANK                                        147 non-null    int64  
 1   Country                                     147 non-null    object 
 2   Happiness score                             146 non-null    float64
 3   Whisker-high                                146 non-null    object 
 4   Whisker-low                                 146 non-null    object 
 5   Dystopia (1.83) + residual                  146 non-null    object 
 6   Explained by: GDP per capita                146 non-null    float64
 7   Explained by: Social support                146 non-null    float64
 8   Explained by: Healthy life expectancy       146 non-null    float64
 9   Explained by: Freedom to make life choices  146 non-null    object 
 10  Explained by: 

In [16]:
result.set_index('Country', inplace=True)
result.head()

Unnamed: 0_level_0,Region,Rank,Score,GDP,Social Support,Health
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Finland,Western Europe,1.0,7.842,1.446,1.211951,0.741
Denmark,Western Europe,2.0,7.62,1.502,1.219702,0.763
Switzerland,Western Europe,3.0,7.571,1.566,1.1073,0.816
Iceland,Western Europe,4.0,7.554,1.482,1.467763,0.772
Netherlands,Western Europe,5.0,7.464,1.501,1.1073,0.753


In [17]:
data_2022.set_index('Country', inplace=True)

In [18]:
column_mapping_2022 = {
    "Happiness score": "Score",
    "Explained by: GDP per capita": "GDP",
    "Explained by: Social support": "Social Support",
    "Explained by: Healthy life expectancy": "Health",
}

data_2022.rename(columns=column_mapping_2022, inplace=True)

In [19]:
data_2022.head()

Unnamed: 0_level_0,RANK,Score,Whisker-high,Whisker-low,Dystopia (1.83) + residual,GDP,Social Support,Health,Explained by: Freedom to make life choices,Explained by: Generosity,Explained by: Perceptions of corruption
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Finland,1,7.821,7886,7756,2518,1.892,1.261414,0.775,736,109,534
Denmark,2,7.636,7710,7563,2226,1.953,1.207681,0.777,719,188,532
Iceland,3,7.557,7651,7464,2320,1.936,1.483508,0.803,718,270,191
Switzerland,4,7.512,7586,7437,2153,2.026,1.146784,0.822,677,147,461
Netherlands,5,7.415,7471,7359,2137,1.945,1.075141,0.787,651,271,419


In [20]:
result['Year'] = 2021
result.head()

Unnamed: 0_level_0,Region,Rank,Score,GDP,Social Support,Health,Year
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Finland,Western Europe,1.0,7.842,1.446,1.211951,0.741,2021
Denmark,Western Europe,2.0,7.62,1.502,1.219702,0.763,2021
Switzerland,Western Europe,3.0,7.571,1.566,1.1073,0.816,2021
Iceland,Western Europe,4.0,7.554,1.482,1.467763,0.772,2021
Netherlands,Western Europe,5.0,7.464,1.501,1.1073,0.753,2021


In [21]:
data_2022['Year'] = 2022
data_2022.head()

Unnamed: 0_level_0,RANK,Score,Whisker-high,Whisker-low,Dystopia (1.83) + residual,GDP,Social Support,Health,Explained by: Freedom to make life choices,Explained by: Generosity,Explained by: Perceptions of corruption,Year
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Finland,1,7.821,7886,7756,2518,1.892,1.261414,0.775,736,109,534,2022
Denmark,2,7.636,7710,7563,2226,1.953,1.207681,0.777,719,188,532,2022
Iceland,3,7.557,7651,7464,2320,1.936,1.483508,0.803,718,270,191,2022
Switzerland,4,7.512,7586,7437,2153,2.026,1.146784,0.822,677,147,461,2022
Netherlands,5,7.415,7471,7359,2137,1.945,1.075141,0.787,651,271,419,2022


In [22]:
data_2022['Region'] = pd.merge(data_2022, result, how='left', on='Country')['Region'].values

In [23]:
result.reset_index(inplace=True)

In [24]:
result.head()

Unnamed: 0,Country,Region,Rank,Score,GDP,Social Support,Health,Year
0,Finland,Western Europe,1.0,7.842,1.446,1.211951,0.741,2021
1,Denmark,Western Europe,2.0,7.62,1.502,1.219702,0.763,2021
2,Switzerland,Western Europe,3.0,7.571,1.566,1.1073,0.816,2021
3,Iceland,Western Europe,4.0,7.554,1.482,1.467763,0.772,2021
4,Netherlands,Western Europe,5.0,7.464,1.501,1.1073,0.753,2021


In [25]:
data_2022.reset_index(inplace=True)

In [26]:
result2 = data_2022[['Country','Region','RANK', 'Score', 'GDP', "Social Support", 'Health', 'Year' ]]
result2

Unnamed: 0,Country,Region,RANK,Score,GDP,Social Support,Health,Year
0,Finland,Western Europe,1,7.821,1.892,1.261414,0.775,2022
1,Denmark,Western Europe,2,7.636,1.953,1.207681,0.777,2022
2,Iceland,Western Europe,3,7.557,1.936,1.483508,0.803,2022
3,Switzerland,Western Europe,4,7.512,2.026,1.146784,0.822,2022
4,Netherlands,Western Europe,5,7.415,1.945,1.075141,0.787,2022
...,...,...,...,...,...,...,...,...
142,Rwanda*,,143,3.268,0.785,-2.768525,0.462,2022
143,Zimbabwe,Sub-Saharan Africa,144,2.995,0.947,-0.773258,0.270,2022
144,Lebanon,Middle East and North Africa,145,2.955,1.392,-1.461034,0.631,2022
145,Afghanistan,South Asia,146,2.404,0.758,-3.244953,0.289,2022


In [27]:
result2.rename(columns={'RANK':'Rank'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [28]:
final = pd.concat([result, result2], ignore_index=True)
final

Unnamed: 0,Country,Region,Rank,Score,GDP,Social Support,Health,Year
0,Finland,Western Europe,1.0,7.842,1.446,1.211951,0.741,2021
1,Denmark,Western Europe,2.0,7.620,1.502,1.219702,0.763,2021
2,Switzerland,Western Europe,3.0,7.571,1.566,1.107300,0.816,2021
3,Iceland,Western Europe,4.0,7.554,1.482,1.467763,0.772,2021
4,Netherlands,Western Europe,5.0,7.464,1.501,1.107300,0.753,2021
...,...,...,...,...,...,...,...,...
291,Rwanda*,,143.0,3.268,0.785,-2.768525,0.462,2022
292,Zimbabwe,Sub-Saharan Africa,144.0,2.995,0.947,-0.773258,0.270,2022
293,Lebanon,Middle East and North Africa,145.0,2.955,1.392,-1.461034,0.631,2022
294,Afghanistan,South Asia,146.0,2.404,0.758,-3.244953,0.289,2022


In [32]:
final.to_csv('/kaggle/working/2021_2022.csv', index=False)