In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data2021=pd.read_csv('../input/world-happiness-report-2021/world-happiness-report-2021.csv')
data=pd.read_csv('../input/world-happiness-report-2021/world-happiness-report.csv')

<font size="5"> <b>The Dataset - World happiness Report 2021</b> </font>

<font size="4">This dataset has information of 149 countries, separated in 10 different regions.               </font>
<font size="3">There are 195 internationally-recognised countries in the world today</font>

In [None]:
data2021.head()

In [None]:
RegionalCount = data2021["Regional indicator"].value_counts()
values=[36,21,20,17,17,12,9,7,6,4]

def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        val = int(round(pct*total/100.0))
        return '{v:d}'.format(p=pct,v=val)
    return my_autopct

plt.figure(figsize=(12,6))
RegionalCount.plot.pie(autopct=make_autopct(values), fontsize='12',normalize=True, cmap='RdBu_r')
plt.ylabel("")
plt.title("Number of Countries per Region", fontsize='20')

<font size="4"> <b>About the Happiness Score</b> </font>

In [None]:
Odata = data2021.sort_values(by='Ladder score', ascending=False).head(3)
Udata = data2021.sort_values(by='Ladder score', ascending=False).tail(3)
Odata = Odata.append(Udata)
plt.figure(figsize=(10,3))
plt.title("Happiest and Unhappiest Countries ", fontsize="20")
sns.set_color_codes("pastel")
sns.barplot(x = Odata["Country name"], y = Odata["Ladder score"],palette='Spectral')

In [None]:
plt.figure(figsize=(18,7))
ax = sns.boxplot(x="Ladder score", y="Regional indicator", data=data2021, orient="h", palette="RdBu_r")
ax = sns.swarmplot(x="Ladder score", y="Regional indicator", data=data2021, color=".15")
plt.title("Happiness Score per region")

In [None]:
ordering = ['North America and ANZ',
'Western Europe','Central and Eastern Europe',
'Latin America and Caribbean','East Asia',
'Commonwealth of Independent States','Southeast Asia',
'Middle East and North Africa',
'Sub-Saharan Africa','South Asia']

Exp = data2021[["Country name","Regional indicator","Ladder score","Explained by: Log GDP per capita","Explained by: Social support","Explained by: Healthy life expectancy","Explained by: Freedom to make life choices","Explained by: Generosity","Explained by: Perceptions of corruption","Dystopia + residual"]]
Exp_mean = Exp.groupby(by="Regional indicator")[["Explained by: Log GDP per capita","Explained by: Social support","Explained by: Healthy life expectancy","Explained by: Freedom to make life choices","Explained by: Generosity","Explained by: Perceptions of corruption","Dystopia + residual"]].mean()
plt.figure(figsize=(40,15))
fig4 = Exp_mean.reindex(index = ordering).plot.barh(stacked=True, colormap='Spectral')
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.title("How the six variables contribute to each Region’s average Happiness score", fontsize="18")
fig4.invert_yaxis()

<font size="5"> <b>Possible correlations between Happiness score and different social/economical parameters</b> </font>

<font size="4"> <b>Average GDP per capita x Average Happiness score per region.</b>  </font>

<font size="4">When we observe average values from each region, It looks like there can be some correlation between this two variables </font>

In [None]:
meanLadder = data2021.groupby(by="Regional indicator")["Ladder score"].mean()
meanLadder = meanLadder.sort_values(ascending=False)

plt.figure(figsize=(10,3))
plt.title("avg Happiness Score for each Region", fontsize="20")
sns.set_color_codes("pastel")
sns.barplot(x = meanLadder, y = meanLadder.index, orient = 'h',palette='Spectral')

meanGDP = data2021.groupby(by="Regional indicator")["Logged GDP per capita"].mean()
meanGDP = meanGDP.reindex(index = ordering)

plt.figure(figsize=(10,3))
plt.title("avg GDP per capita for each Region", fontsize="20")
plt.legend(fontsize="20")
sns.set_color_codes("pastel")
sns.barplot(x = meanGDP, y = meanGDP.index, orient = 'h',palette='Spectral')

meanHeal = data2021.groupby(by="Regional indicator")["Healthy life expectancy"].mean()
meanHeal = meanHeal.reindex(index = ordering)

<font size="4"> <b>GDP per capita x Happiness score</b> </font>

<font size="4">Observing all countries independently, the <b>correlation</b> appears more clearly</font>\
<font size="3.8">The data points can be fitted by a linear equation ( Y = aX + b)</font>

In [None]:
plt.figure(figsize=(15,20))
sns.relplot(x="Logged GDP per capita", y="Ladder score", hue="Regional indicator", data=data2021
            , alpha=0.9,palette='Spectral', s=110);
plt.title("GDP per capita vs Happiness score", fontsize="15")


<font size="4">Although It's simple to observe this correlation between Happiness and the GDP per capita, <b>Sub-Saharan Africa</b> Countries are much more dispersed than other regions.</font>

<font size="4">It's important to mention that GDP per capita is an useful factor for this study, but one possible factor to add to this evaluation could be <b>Social Inequality</b>. The value of GDP per capita of a country doesn't necessarily mean the wealth of each individual in that society. </font>

<font size="4"> <b>Social Support, Healthy life expectancy and Happiness Score</b> </font>

<font size="4">A similar correlation appears when we plot Social Support vs Happiness Score and Healthy life expectancy vs Happiness Score.</font>

In [None]:
plt.figure(figsize=(15,20))
sns.relplot(x="Social support", y="Ladder score", hue="Regional indicator", data=data2021
            , alpha=0.9, palette='Spectral', s=110);
plt.title("Social Support vs Happiness score", fontsize="15")
plt.figure(figsize=(15,20))
sns.relplot(x="Healthy life expectancy", y="Ladder score", hue="Regional indicator", data=data2021
            , alpha=0.9, palette='Spectral', s=110);
plt.title("Healthy Life Expectancy vs Happiness score", fontsize="15")

<font size="4"> <b>Perceptions of Corruption and Happiness Score</b> </font>

<font size="4">Looking at the plot below, It appears that the correlation is stronger in <b>Western Europe and North America and ANZ</b> than for other regions. For these 2 regions, the happiness score declines the greater the perception of corruption is. </font>

In [None]:
plt.figure(figsize=(15,20))
sns.relplot(x="Perceptions of corruption", y="Ladder score", hue="Regional indicator", data=data2021
            , alpha=0.9, palette='Spectral', s=110);
plt.title("Perceptions of corruption vs Happiness score", fontsize="15")

<font size="4"> <b>Freedom to make life choices and Happiness Score</b> </font>

<font size="4"> Freedom to make life choices also seem to have an impact on the Happiness score. The scatted points forms a triangular shape. It implies that the <b>happiest countries</b> are also those with the highest sense of freedom.   </font>

In [None]:
plt.figure(figsize=(15,20))
sns.relplot(x="Freedom to make life choices", y="Ladder score", hue="Regional indicator", data=data2021
            , alpha=0.9, palette='Spectral', s=110);
plt.title("Freedom to make life choices vs Happiness score", fontsize="15")

<font size="4"> <b>Generosity and Happiness Score</b> </font>

<font size="4">Unlike the other parameters, generosity and Happiness score seem to have a weak correlation.\
It's interesting to notice that countries with the highest Generosity factors are <b>bellow the Happiness score average</b>.
</font> 

In [None]:
plt.figure(figsize=(15,20))
sns.relplot(x="Generosity", y="Ladder score", hue="Regional indicator", data=data2021
            , alpha=0.9, palette='Spectral', s=110);
plt.title("Generosity vs Happiness score", fontsize="15")

<font size="5"><b>Conclusions</b> </font>
<font size="4">    
1. **GDP per capita and hapiness score** show a good correlation. The countries with higher Happiness scores are also those with the higher GDP per capita. This relation is also true for the contries with lower happiness scores.
2. **Social inequality** parameter could be a good factor to add to the dataset, to evaluate if there's also a correlation with the happiness score of a population.
3. **Healthy life expectancy** and Happiness score seem to have a **linear correlation**, which is expected since the relationship between healthy life and GDP per capita is also linear.  
4. Happiness score and **Social support** seem to have almost a exponencial relationship, which can indicate that social conection and mutual support between friends and family have a **great influence** in people's hapiness.  
4. For countries from Western Europe and North America and ANZ, the **perception of corruption** has a significant influence on the Happiness score. It's good to notice that these countries have the highest scores of almost all parameters.   
5. The parameter **Freedom to Make Life Choices** is an interessing factor. The triangular shape of the scatter plot expresses that the freedom is more of a **limiting factor** for happiness, but countries with very similar sense of freedom can vary a lot in the happiness score.
6. **Generosity** seem to have a weak correlation with how happy countries are. It is particularly curious how the country with the highest happiness score has actually a **negative value** of generosity, and the countries with highest generosity factors are **bellow the average happiness score**.  
</font>

<font size="6"> <b>Thank You for reading!</b> </font>