In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
%%bash

# *Warning*: You need kaggle-api to be setup, for more information: https://github.com/Kaggle/kaggle-api

if [ ! -d data ]; then
    kaggle datasets download -d unsdsn/world-happiness
    mkdir -p data
    unzip world-happiness.zip -d data
fi

ls data

In [None]:
happiness_2015 = pd.read_csv("./data/2015.csv")

In [None]:
happiness_2015

In [None]:
happiness_2015.groupby("Region").mean().sort_values("Happiness Score", ascending=False)

In [None]:
variables = [
    "Economy (GDP per Capita)",
    "Family",
    "Health (Life Expectancy)",
    "Freedom",
    "Trust (Government Corruption)",
    "Generosity",
]
variables

In [None]:
fig, axs = plt.subplots(
    ncols=2, nrows=len(variables) // 2, figsize=(16, 2 * len(variables))
)

for i, column in enumerate(variables):
    sns.regplot(
        data=happiness_2015, y="Happiness Score", x=column, ax=axs[i // 2, i % 2]
    );

In [None]:
sns.heatmap(happiness_2015.corr());

In [None]:
regions = happiness_2015["Region"].unique()
regions

In [None]:
fig, axs = plt.subplots(
    ncols=2, nrows=len(regions) // 2, figsize=(12, 2 * len(regions))
)
fig.tight_layout(pad=10)
fig.autofmt_xdate(rotation=45)

for i, region in enumerate(regions):
    axs[i // 2, i % 2].set_title(f"{region} correlation plot")
    sns.heatmap(
        data=happiness_2015[happiness_2015["Region"] == region].corr(),
        ax=axs[i // 2, i % 2],
    );

In [None]:
plot = sns.catplot(
    data=happiness_2015, kind="box", x="Region", y="Happiness Score", aspect=2
)
plot.set_xticklabels(rotation=45);

In [None]:
sns.scatterplot(
    data=happiness_2015,
    y="Happiness Score",
    x="Family",
    hue="Region",
    size="Economy (GDP per Capita)",
)
plt.legend(loc="center right", bbox_to_anchor=(2.2, 0.5), ncol=2);