In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings

warnings.filterwarnings("ignore")

In order to visualize the data, we need to both clean our data and create some useful columns.

### Knowing the Data

In [2]:
df=pd.read_csv("world-happiness-report-2021.csv")
df.head()

Unnamed: 0,Country name,Regional indicator,Ladder score,Standard error of ladder score,upperwhisker,lowerwhisker,Logged GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,Ladder score in Dystopia,Explained by: Log GDP per capita,Explained by: Social support,Explained by: Healthy life expectancy,Explained by: Freedom to make life choices,Explained by: Generosity,Explained by: Perceptions of corruption,Dystopia + residual
0,Finland,Western Europe,7.842,0.032,7.904,7.78,10.775,0.954,72.0,0.949,-0.098,0.186,2.43,1.446,1.106,0.741,0.691,0.124,0.481,3.253
1,Denmark,Western Europe,7.62,0.035,7.687,7.552,10.933,0.954,72.7,0.946,0.03,0.179,2.43,1.502,1.108,0.763,0.686,0.208,0.485,2.868
2,Switzerland,Western Europe,7.571,0.036,7.643,7.5,11.117,0.942,74.4,0.919,0.025,0.292,2.43,1.566,1.079,0.816,0.653,0.204,0.413,2.839
3,Iceland,Western Europe,7.554,0.059,7.67,7.438,10.878,0.983,73.0,0.955,0.16,0.673,2.43,1.482,1.172,0.772,0.698,0.293,0.17,2.967
4,Netherlands,Western Europe,7.464,0.027,7.518,7.41,10.932,0.942,72.4,0.913,0.175,0.338,2.43,1.501,1.079,0.753,0.647,0.302,0.384,2.798


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 20 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   Country name                                149 non-null    object 
 1   Regional indicator                          149 non-null    object 
 2   Ladder score                                149 non-null    float64
 3   Standard error of ladder score              149 non-null    float64
 4   upperwhisker                                149 non-null    float64
 5   lowerwhisker                                149 non-null    float64
 6   Logged GDP per capita                       149 non-null    float64
 7   Social support                              149 non-null    float64
 8   Healthy life expectancy                     149 non-null    float64
 9   Freedom to make life choices                149 non-null    float64
 10  Generosity    

In [4]:
df.isnull().sum()*100/len(df)

Country name                                  0.0
Regional indicator                            0.0
Ladder score                                  0.0
Standard error of ladder score                0.0
upperwhisker                                  0.0
lowerwhisker                                  0.0
Logged GDP per capita                         0.0
Social support                                0.0
Healthy life expectancy                       0.0
Freedom to make life choices                  0.0
Generosity                                    0.0
Perceptions of corruption                     0.0
Ladder score in Dystopia                      0.0
Explained by: Log GDP per capita              0.0
Explained by: Social support                  0.0
Explained by: Healthy life expectancy         0.0
Explained by: Freedom to make life choices    0.0
Explained by: Generosity                      0.0
Explained by: Perceptions of corruption       0.0
Dystopia + residual                           0.0


In [5]:
df[df.duplicated()]

Unnamed: 0,Country name,Regional indicator,Ladder score,Standard error of ladder score,upperwhisker,lowerwhisker,Logged GDP per capita,Social support,Healthy life expectancy,Freedom to make life choices,Generosity,Perceptions of corruption,Ladder score in Dystopia,Explained by: Log GDP per capita,Explained by: Social support,Explained by: Healthy life expectancy,Explained by: Freedom to make life choices,Explained by: Generosity,Explained by: Perceptions of corruption,Dystopia + residual


### Data Preprocessing

In [6]:
# I am planning to create Continent and ISO Codes columns. So we need to arrange the names of countries.
country_name_dict = {"Luxembourg*": "Luxembourg", 
                    "Guatemala*": "Guatemala",
                    "Kuwait*": "Kuwait",
                    "Belarus*": "Belarus",
                    "Turkmenistan*": "Turkmenistan",
                    "North Cyprus*": "North Cyprus",
                    "Libya*": "Libya",
                    "Azerbaijan*": "Azerbaijan",
                    "Gambia*": "Gambia",
                    "Liberia*": "Liberia",
                    "Niger*": "Niger",
                    "Comoros*": "Comoros",
                    "Palestinian Territories*": "Palestinian Territories",
                    "Eswatini, Kingdom of*": "Eswatini, Kingdom of",
                    "Madagascar*": "Madagascar",
                    "Chad*": "Chad",
                    "Yemen*": "Yemen",
                    "Mauritania*": "Mauritania",
                    "Lesotho*": "Lesotho",
                    "Botswana*": "Botswana",
                    "Rwanda*": "Rwanda",}
df["Country"] = df["Country name"].replace(country_name_dict)

In [7]:
# Editing the column names to improve clarity.
df.rename(columns={"Regional indicator":"Region",
                  "Ladder score":"Ladder_Score",
                  "Standard error of ladder score":"Error_Ladder_Score",
                  'Logged GDP per capita':"GDP_Per_capita",
                  "Social support":"Social_Support",
                  "Healthy life expectancy":"Life_Expectancy",
                  "Freedom to make life choices":"Freedom",
                  "Perceptions of corruption":"Perception_of_Corruption",
                  "Ladder score in Dystopia":"LS_Dystopia",
                  "Explained by: Log GDP per capita":"E_GDP_Per_Capita",
                  "Explained by: Social support":"E_Social_Support",
                  "Explained by: Healthy life expectancy":"E_Life_Expectancy",
                  "Explained by: Freedom to make life choices":"E_Freedom",
                  "Explained by: Generosity":"E_Generosity",
                  "Explained by: Perceptions of corruption":"E_Perception_of_Corruption",
                  "Dystopia + residual":"Dystopia_Residual"},inplace=True)

In [8]:
df.drop(columns=["Country name"],inplace=True)

In [9]:
# We don't need all the columns for the visualization. That's why we separate only those that will be useful to us.
df=df[["Country",'Region', 'Ladder_Score', 'Error_Ladder_Score', 'upperwhisker',
       'lowerwhisker', 'GDP_Per_capita', 'Social_Support', 'Life_Expectancy',
       'Freedom', 'Generosity', 'Perception_of_Corruption', 'LS_Dystopia',
       'E_GDP_Per_Capita', 'E_Social_Support', 'E_Life_Expectancy',
       'E_Freedom', 'E_Generosity', 'E_Perception_of_Corruption',
       'Dystopia_Residual']]

In [10]:
df.head()

Unnamed: 0,Country,Region,Ladder_Score,Error_Ladder_Score,upperwhisker,lowerwhisker,GDP_Per_capita,Social_Support,Life_Expectancy,Freedom,Generosity,Perception_of_Corruption,LS_Dystopia,E_GDP_Per_Capita,E_Social_Support,E_Life_Expectancy,E_Freedom,E_Generosity,E_Perception_of_Corruption,Dystopia_Residual
0,Finland,Western Europe,7.842,0.032,7.904,7.78,10.775,0.954,72.0,0.949,-0.098,0.186,2.43,1.446,1.106,0.741,0.691,0.124,0.481,3.253
1,Denmark,Western Europe,7.62,0.035,7.687,7.552,10.933,0.954,72.7,0.946,0.03,0.179,2.43,1.502,1.108,0.763,0.686,0.208,0.485,2.868
2,Switzerland,Western Europe,7.571,0.036,7.643,7.5,11.117,0.942,74.4,0.919,0.025,0.292,2.43,1.566,1.079,0.816,0.653,0.204,0.413,2.839
3,Iceland,Western Europe,7.554,0.059,7.67,7.438,10.878,0.983,73.0,0.955,0.16,0.673,2.43,1.482,1.172,0.772,0.698,0.293,0.17,2.967
4,Netherlands,Western Europe,7.464,0.027,7.518,7.41,10.932,0.942,72.4,0.913,0.175,0.338,2.43,1.501,1.079,0.753,0.647,0.302,0.384,2.798


In [11]:
#Creating continent column
Continent=[]

for i in df.Region:
    if i=="Sub-Saharan Africa":
        Continent.append("Africa")
    elif i=="Western Europe":
        Continent.append("Europe")
    elif i=="Latin America and Caribbean":
        Continent.append("America")
    elif i=="Middle East and North Africa":
        Continent.append("Temporary")
    elif i=="Central and Eastern Europe":
        Continent.append("Europe")
    elif i=="Commonwealth of Independent States":
        Continent.append("Temporary")
    elif i=="Southeast Asia":
        Continent.append("Asia")
    elif i=="East Asia":
        Continent.append("Asia")
    elif i=="South Asia":
        Continent.append("Asia")
    elif i=="North America and ANZ":
        Continent.append("Temporary")

In [12]:
df["Continent"]=Continent

In [13]:
df.head()

Unnamed: 0,Country,Region,Ladder_Score,Error_Ladder_Score,upperwhisker,lowerwhisker,GDP_Per_capita,Social_Support,Life_Expectancy,Freedom,...,Perception_of_Corruption,LS_Dystopia,E_GDP_Per_Capita,E_Social_Support,E_Life_Expectancy,E_Freedom,E_Generosity,E_Perception_of_Corruption,Dystopia_Residual,Continent
0,Finland,Western Europe,7.842,0.032,7.904,7.78,10.775,0.954,72.0,0.949,...,0.186,2.43,1.446,1.106,0.741,0.691,0.124,0.481,3.253,Europe
1,Denmark,Western Europe,7.62,0.035,7.687,7.552,10.933,0.954,72.7,0.946,...,0.179,2.43,1.502,1.108,0.763,0.686,0.208,0.485,2.868,Europe
2,Switzerland,Western Europe,7.571,0.036,7.643,7.5,11.117,0.942,74.4,0.919,...,0.292,2.43,1.566,1.079,0.816,0.653,0.204,0.413,2.839,Europe
3,Iceland,Western Europe,7.554,0.059,7.67,7.438,10.878,0.983,73.0,0.955,...,0.673,2.43,1.482,1.172,0.772,0.698,0.293,0.17,2.967,Europe
4,Netherlands,Western Europe,7.464,0.027,7.518,7.41,10.932,0.942,72.4,0.913,...,0.338,2.43,1.501,1.079,0.753,0.647,0.302,0.384,2.798,Europe


In [14]:
# Arranging the temporaries
df.at[8,"Continent"]="Australia"
df.at[10,"Continent"]="Australia"
df.at[11,"Continent"]="Asia"
df.at[13,"Continent"]="America"
df.at[18,"Continent"]="America"
df.at[21,"Continent"]="Asia"
df.at[24,"Continent"]="Asia"
df.at[25,"Continent"]="Asia"
df.at[41,"Continent"]="Asia"
df.at[44,"Continent"]="Asia"
df.at[46,"Continent"]="Asia"
df.at[64,"Continent"]="Europe"
df.at[66,"Continent"]="Asia"
df.at[74,"Continent"]="Europe"
df.at[75,"Continent"]="Europe"
df.at[77,"Continent"]="Asia"
df.at[79,"Continent"]="Africa"
df.at[85,"Continent"]="Asia"
df.at[89,"Continent"]="Asia"
df.at[96,"Continent"]="Asia"
df.at[103,"Continent"]="Europe"
df.at[105,"Continent"]="Africa"
df.at[107,"Continent"]="Asia"
df.at[108,"Continent"]="Africa"
df.at[109,"Continent"]="Europe"
df.at[110,"Continent"]="Asia"
df.at[117,"Continent"]="Asia"
df.at[121,"Continent"]="Africa"
df.at[122,"Continent"]="Asia"
df.at[124,"Continent"]="Asia"
df.at[126,"Continent"]="Asia"
df.at[131,"Continent"]="Africa"
df.at[140,"Continent"]="Asia"

In [15]:
# Creating ISO Code column.
import pycountry

input_countries=[]
for i in df.Country:
    input_countries.append(i)
countries={}

for country in pycountry.countries:
    countries[country.name]=country.alpha_3

    
codes=[countries.get(country, "Unknown Code") for country in input_countries]
print(codes)

df["Iso_Codes"]=codes

['FIN', 'DNK', 'CHE', 'ISL', 'NLD', 'NOR', 'SWE', 'LUX', 'NZL', 'AUT', 'AUS', 'ISR', 'DEU', 'CAN', 'IRL', 'CRI', 'GBR', 'Unknown Code', 'USA', 'BEL', 'FRA', 'BHR', 'MLT', 'Unknown Code', 'ARE', 'SAU', 'ESP', 'ITA', 'SVN', 'GTM', 'URY', 'SGP', 'Unknown Code', 'SVK', 'BRA', 'MEX', 'JAM', 'LTU', 'CYP', 'EST', 'PAN', 'UZB', 'CHL', 'POL', 'KAZ', 'ROU', 'KWT', 'SRB', 'SLV', 'MUS', 'LVA', 'COL', 'HUN', 'THA', 'NIC', 'JPN', 'ARG', 'PRT', 'HND', 'HRV', 'PHL', 'Unknown Code', 'PER', 'BIH', 'Unknown Code', 'ECU', 'KGZ', 'GRC', 'Unknown Code', 'MNG', 'PRY', 'MNE', 'DOM', 'Unknown Code', 'BLR', 'Unknown Code', 'Unknown Code', 'TJK', 'Unknown Code', 'LBY', 'MYS', 'IDN', 'Unknown Code', 'CHN', 'Unknown Code', 'ARM', 'NPL', 'BGR', 'MDV', 'AZE', 'CMR', 'SEN', 'ALB', 'MKD', 'GHA', 'NER', 'TKM', 'GMB', 'BEN', 'Unknown Code', 'BGD', 'GIN', 'ZAF', 'TUR', 'PAK', 'MAR', 'Unknown Code', 'GEO', 'DZA', 'UKR', 'IRQ', 'GAB', 'BFA', 'KHM', 'MOZ', 'NGA', 'MLI', 'Unknown Code', 'UGA', 'LBR', 'KEN', 'TUN', 'LBN', 'NA

In [16]:
# Final form of the data.
df.head()

Unnamed: 0,Country,Region,Ladder_Score,Error_Ladder_Score,upperwhisker,lowerwhisker,GDP_Per_capita,Social_Support,Life_Expectancy,Freedom,...,LS_Dystopia,E_GDP_Per_Capita,E_Social_Support,E_Life_Expectancy,E_Freedom,E_Generosity,E_Perception_of_Corruption,Dystopia_Residual,Continent,Iso_Codes
0,Finland,Western Europe,7.842,0.032,7.904,7.78,10.775,0.954,72.0,0.949,...,2.43,1.446,1.106,0.741,0.691,0.124,0.481,3.253,Europe,FIN
1,Denmark,Western Europe,7.62,0.035,7.687,7.552,10.933,0.954,72.7,0.946,...,2.43,1.502,1.108,0.763,0.686,0.208,0.485,2.868,Europe,DNK
2,Switzerland,Western Europe,7.571,0.036,7.643,7.5,11.117,0.942,74.4,0.919,...,2.43,1.566,1.079,0.816,0.653,0.204,0.413,2.839,Europe,CHE
3,Iceland,Western Europe,7.554,0.059,7.67,7.438,10.878,0.983,73.0,0.955,...,2.43,1.482,1.172,0.772,0.698,0.293,0.17,2.967,Europe,ISL
4,Netherlands,Western Europe,7.464,0.027,7.518,7.41,10.932,0.942,72.4,0.913,...,2.43,1.501,1.079,0.753,0.647,0.302,0.384,2.798,Europe,NLD
