In [7]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

%matplotlib inline

In [8]:
# Read the 2021.csv file into a DataFrame and add a 'Year' column
df_2021 = pd.read_csv("2021.csv")
df_2021["Year"] = 2021

# Read the 2022.csv file into a DataFrame and add a 'Year' column
df_2022 = pd.read_csv("2022.csv")
df_2022["Year"] = 2022


In [9]:
df_2021['Happiness Rank'] = df_2021['Ladder score'].rank(ascending=False)

In [10]:
df_2021.drop('Generosity', axis=1, inplace=True)

In [11]:
column_mapping_2021 = {
    "Country name": "Country",
    "Regional indicator": "Region",
    "Ladder score": "Happiness score",
    "Explained by: Log GDP per capita": "Economy",
    "Explained by: Social support": "Family",
    "Explained by: Healthy life expectancy": "Healthy",
    "Explained by: Freedom to make life choices": "Freedom",
    "Explained by: Generosity": "Generosity",
    "Explained by: Perceptions of corruption": "Trust",
    "Dystopia + residual": "Dystopia residual"
}

df_2021.rename(columns=column_mapping_2021, inplace=True)

In [12]:
column_mapping_2022 = {
    "RANK": "Happiness Rank",
    "Country": "Country",
    "Happiness score": "Happiness score",
    "Dystopia (1.83) + residual": "Dystopia residual",
    "Explained by: GDP per capita": "Economy",
    "Explained by: Social support": "Family",
    "Explained by: Healthy life expectancy": "Healthy",
    "Explained by: Freedom to make life choices": "Freedom",
    "Explained by: Generosity": "Generosity",
    "Explained by: Perceptions of corruption": "Trust"
}

df_2022.rename(columns=column_mapping_2022, inplace=True)

In [13]:
df_2022 = df_2022.merge(df_2021[['Country', 'Region']], on='Country', how='left')

In [14]:
desired_columns = [
    "Country",
    "Region",
    "Happiness Rank",
    "Happiness score",
    "Economy",
    "Family",
    "Healthy",
    "Freedom",
    "Generosity",
    "Trust",
    "Dystopia residual",
    "Year"
]
refined_df_2021 = df_2021[desired_columns]

In [15]:
desired_columns = [
    "Country",
    "Region",
    "Happiness Rank",
    "Happiness score",
    "Economy",
    "Family",
    "Healthy",
    "Freedom",
    "Generosity",
    "Trust",
    "Dystopia residual",
    "Year"
]

refined_df_2022 = df_2022[desired_columns]

In [16]:
# Get the intersection of columns from both DataFrames
common_columns = list(set(refined_df_2021.columns) & set(refined_df_2022.columns))

# Reorder columns in both DataFrames to match
refined_df_2021 = refined_df_2021[common_columns]
refined_df_2022 = refined_df_2022[common_columns]

# Concatenate the DataFrames
combined_df = pd.concat([refined_df_2021, refined_df_2022], axis=0, ignore_index=True)


In [17]:
combined_df

Unnamed: 0,Country,Region,Happiness Rank,Generosity,Happiness score,Freedom,Economy,Family,Healthy,Trust,Year,Dystopia residual
0,Finland,Western Europe,1.0,0.124,7.842,0.691,1.446,1.106,0.741,0.481,2021,3.253
1,Denmark,Western Europe,2.0,0.208,7.62,0.686,1.502,1.108,0.763,0.485,2021,2.868
2,Switzerland,Western Europe,3.0,0.204,7.571,0.653,1.566,1.079,0.816,0.413,2021,2.839
3,Iceland,Western Europe,4.0,0.293,7.554,0.698,1.482,1.172,0.772,0.17,2021,2.967
4,Netherlands,Western Europe,5.0,0.302,7.464,0.647,1.501,1.079,0.753,0.384,2021,2.798
...,...,...,...,...,...,...,...,...,...,...,...,...
291,Rwanda*,,143.0,0187,3268,0621,0785,0133,0462,0544,2022,0536
292,Zimbabwe,Sub-Saharan Africa,144.0,0106,2995,0329,0947,0690,0270,0105,2022,0548
293,Lebanon,Middle East and North Africa,145.0,0082,2955,0103,1392,0498,0631,0034,2022,0216
294,Afghanistan,South Asia,146.0,0089,2404,0000,0758,0000,0289,0005,2022,1263


In [18]:
refined_df_2021.dtypes

Country               object
Region                object
Happiness Rank       float64
Generosity           float64
Happiness score      float64
Freedom              float64
Economy              float64
Family               float64
Healthy              float64
Trust                float64
Year                   int64
Dystopia residual    float64
dtype: object

In [19]:
refined_df_2022.dtypes

Country              object
Region               object
Happiness Rank        int64
Generosity           object
Happiness score      object
Freedom              object
Economy              object
Family               object
Healthy              object
Trust                object
Year                  int64
Dystopia residual    object
dtype: object

In [20]:
refined_df_2022['Generosity'] = refined_df_2022['Generosity'].str.replace(',', '.').astype(float)
refined_df_2022['Family'] = refined_df_2022['Family'].str.replace(',', '.').astype(float)
refined_df_2022['Freedom'] = refined_df_2022['Freedom'].str.replace(',', '.').astype(float)
refined_df_2022['Trust'] = refined_df_2022['Trust'].str.replace(',', '.').astype(float)
refined_df_2022['Healthy'] = refined_df_2022['Healthy'].str.replace(',', '.').astype(float)
refined_df_2022['Happiness score'] = refined_df_2022['Happiness score'].str.replace(',', '.').astype(float)
refined_df_2022['Economy'] = refined_df_2022['Economy'].str.replace(',', '.').astype(float)
refined_df_2022['Dystopia residual'] = refined_df_2022['Dystopia residual'].str.replace(',', '.').astype(float)

In [21]:
refined_df_2022.dtypes

Country               object
Region                object
Happiness Rank         int64
Generosity           float64
Happiness score      float64
Freedom              float64
Economy              float64
Family               float64
Healthy              float64
Trust                float64
Year                   int64
Dystopia residual    float64
dtype: object

In [22]:
refined_df_2022

Unnamed: 0,Country,Region,Happiness Rank,Generosity,Happiness score,Freedom,Economy,Family,Healthy,Trust,Year,Dystopia residual
0,Finland,Western Europe,1,0.109,7.821,0.736,1.892,1.258,0.775,0.534,2022,2.518
1,Denmark,Western Europe,2,0.188,7.636,0.719,1.953,1.243,0.777,0.532,2022,2.226
2,Iceland,Western Europe,3,0.270,7.557,0.718,1.936,1.320,0.803,0.191,2022,2.320
3,Switzerland,Western Europe,4,0.147,7.512,0.677,2.026,1.226,0.822,0.461,2022,2.153
4,Netherlands,Western Europe,5,0.271,7.415,0.651,1.945,1.206,0.787,0.419,2022,2.137
...,...,...,...,...,...,...,...,...,...,...,...,...
142,Rwanda*,,143,0.187,3.268,0.621,0.785,0.133,0.462,0.544,2022,0.536
143,Zimbabwe,Sub-Saharan Africa,144,0.106,2.995,0.329,0.947,0.690,0.270,0.105,2022,0.548
144,Lebanon,Middle East and North Africa,145,0.082,2.955,0.103,1.392,0.498,0.631,0.034,2022,0.216
145,Afghanistan,South Asia,146,0.089,2.404,0.000,0.758,0.000,0.289,0.005,2022,1.263


In [None]:
sns.pairplot(refined_df_2021)