<a href="https://colab.research.google.com/github/Krzesimir13/DataQuest/blob/main/world_happiness.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# World Happiness Report

## Uploading the dataset

In [16]:
import pandas as pd
import numpy as np
from pathlib import Path
import kagglehub

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [9]:
path_0 = kagglehub.dataset_download("unsdsn/world-happiness")

print("Path to dataset files:", path_0)

Using Colab cache for faster access to the 'world-happiness' dataset.
Path to dataset files: /kaggle/input/world-happiness


In [10]:
path = Path(path_0)

list(path.iterdir())

[PosixPath('/kaggle/input/world-happiness/2015.csv'),
 PosixPath('/kaggle/input/world-happiness/2017.csv'),
 PosixPath('/kaggle/input/world-happiness/2019.csv'),
 PosixPath('/kaggle/input/world-happiness/2018.csv'),
 PosixPath('/kaggle/input/world-happiness/2016.csv')]

In [13]:
data_2015 = pd.read_csv(path / '2015.csv')
happiness_2015 = data_2015.copy()

## Dataset preview

In [14]:
first_5 = happiness_2015.head()
first_5

Unnamed: 0,Country,Region,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
0,Switzerland,Western Europe,1,7.587,0.03411,1.39651,1.34951,0.94143,0.66557,0.41978,0.29678,2.51738
1,Iceland,Western Europe,2,7.561,0.04884,1.30232,1.40223,0.94784,0.62877,0.14145,0.4363,2.70201
2,Denmark,Western Europe,3,7.527,0.03328,1.32548,1.36058,0.87464,0.64938,0.48357,0.34139,2.49204
3,Norway,Western Europe,4,7.522,0.0388,1.459,1.33095,0.88521,0.66973,0.36503,0.34699,2.46531
4,Canada,North America,5,7.427,0.03553,1.32629,1.32261,0.90563,0.63297,0.32957,0.45811,2.45176


In [15]:
happiness_2015.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 158 entries, 0 to 157
Data columns (total 12 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Country                        158 non-null    object 
 1   Region                         158 non-null    object 
 2   Happiness Rank                 158 non-null    int64  
 3   Happiness Score                158 non-null    float64
 4   Standard Error                 158 non-null    float64
 5   Economy (GDP per Capita)       158 non-null    float64
 6   Family                         158 non-null    float64
 7   Health (Life Expectancy)       158 non-null    float64
 8   Freedom                        158 non-null    float64
 9   Trust (Government Corruption)  158 non-null    float64
 10  Generosity                     158 non-null    float64
 11  Dystopia Residual              158 non-null    float64
dtypes: float64(9), int64(1), object(2)
memory usage: 1

## Aggregating the data

In [18]:
mean_happiness = {}
regions = happiness_2015['Region'].unique()

for region in regions:
  region_group = happiness_2015[happiness_2015['Region'] == region]
  hs_region = region_group['Happiness Score'].mean()
  mean_happiness[region] = hs_region

mean_happiness

{'Western Europe': np.float64(6.689619047619048),
 'North America': np.float64(7.273),
 'Australia and New Zealand': np.float64(7.285),
 'Middle East and Northern Africa': np.float64(5.406899999999999),
 'Latin America and Caribbean': np.float64(6.1446818181818195),
 'Southeastern Asia': np.float64(5.317444444444444),
 'Central and Eastern Europe': np.float64(5.332931034482757),
 'Eastern Asia': np.float64(5.626166666666666),
 'Sub-Saharan Africa': np.float64(4.2028),
 'Southern Asia': np.float64(4.580857142857143)}

In [21]:
grouped = happiness_2015.groupby('Region')
aus_nz = grouped.get_group('Australia and New Zealand')

In [22]:
grouped.groups

{'Australia and New Zealand': [8, 9], 'Central and Eastern Europe': [30, 43, 44, 51, 53, 54, 55, 58, 59, 61, 63, 68, 69, 72, 76, 79, 82, 85, 86, 88, 92, 94, 95, 103, 105, 110, 126, 129, 133], 'Eastern Asia': [37, 45, 46, 71, 83, 99], 'Latin America and Caribbean': [11, 13, 15, 22, 24, 26, 29, 31, 32, 39, 40, 41, 42, 47, 50, 52, 56, 57, 64, 97, 104, 118], 'Middle East and Northern Africa': [10, 19, 21, 27, 34, 38, 48, 62, 67, 75, 81, 91, 102, 106, 107, 109, 111, 134, 135, 155], 'North America': [4, 14], 'Southeastern Asia': [23, 33, 60, 73, 74, 89, 98, 128, 144], 'Southern Asia': [78, 80, 108, 116, 120, 131, 152], 'Sub-Saharan Africa': [70, 77, 84, 90, 93, 96, 100, 112, 113, 114, 115, 117, 119, 121, 122, 123, 124, 125, 127, 130, 132, 136, 137, 138, 139, 140, 141, 142, 143, 145, 146, 147, 148, 149, 150, 151, 153, 154, 156, 157], 'Western Europe': [0, 1, 2, 3, 5, 6, 7, 12, 16, 17, 18, 20, 25, 28, 35, 36, 49, 65, 66, 87, 101]}

In [26]:
north_america = happiness_2015.iloc[[4, 14]]
na_group = grouped.get_group('North America')
equal = north_america is na_group
equal

False

In [28]:
grouped.size()

Unnamed: 0_level_0,0
Region,Unnamed: 1_level_1
Australia and New Zealand,2
Central and Eastern Europe,29
Eastern Asia,6
Latin America and Caribbean,22
Middle East and Northern Africa,20
North America,2
Southeastern Asia,9
Southern Asia,7
Sub-Saharan Africa,40
Western Europe,21


In [30]:
means = grouped.mean(numeric_only=True)
means

Unnamed: 0_level_0,Happiness Rank,Happiness Score,Standard Error,Economy (GDP per Capita),Family,Health (Life Expectancy),Freedom,Trust (Government Corruption),Generosity,Dystopia Residual
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Australia and New Zealand,9.5,7.285,0.03727,1.29188,1.31445,0.919965,0.64531,0.392795,0.455315,2.265355
Central and Eastern Europe,79.0,5.332931,0.045208,0.942438,1.053042,0.718774,0.358269,0.086674,0.152264,2.0214
Eastern Asia,64.5,5.626167,0.037225,1.15178,1.099427,0.877388,0.46249,0.127695,0.225885,1.681607
Latin America and Caribbean,46.909091,6.144682,0.061127,0.876815,1.10472,0.70387,0.50174,0.117172,0.217788,2.622577
Middle East and Northern Africa,77.6,5.4069,0.046337,1.066974,0.92049,0.705615,0.361751,0.181702,0.190376,1.980008
North America,10.0,7.273,0.03696,1.3604,1.28486,0.88371,0.589505,0.244235,0.42958,2.480935
Southeastern Asia,81.222222,5.317444,0.042691,0.789054,0.940468,0.677357,0.557104,0.151276,0.419261,1.78302
Southern Asia,113.142857,4.580857,0.032161,0.560486,0.645321,0.54083,0.373337,0.102536,0.341429,2.016769
Sub-Saharan Africa,127.9,4.2028,0.055299,0.380473,0.809085,0.282332,0.365944,0.123878,0.221137,2.01998
Western Europe,29.52381,6.689619,0.037625,1.298596,1.247302,0.909148,0.549926,0.231463,0.302109,2.151185


In [31]:
happy_grouped = grouped['Happiness Score']
happy_mean = happy_grouped.mean()
happy_mean

Unnamed: 0_level_0,Happiness Score
Region,Unnamed: 1_level_1
Australia and New Zealand,7.285
Central and Eastern Europe,5.332931
Eastern Asia,5.626167
Latin America and Caribbean,6.144682
Middle East and Northern Africa,5.4069
North America,7.273
Southeastern Asia,5.317444
Southern Asia,4.580857
Sub-Saharan Africa,4.2028
Western Europe,6.689619


In [42]:
def dif(group):
    return group.max() - group.mean()

happy_mean_max = happy_grouped.agg(['mean', 'max'])
happy_mean_max_dif = happy_grouped.agg(dif)

happy_mean_max_dif

Unnamed: 0_level_0,Happiness Score
Region,Unnamed: 1_level_1
Australia and New Zealand,0.001
Central and Eastern Europe,1.172069
Eastern Asia,0.671833
Latin America and Caribbean,1.081318
Middle East and Northern Africa,1.8711
North America,0.154
Southeastern Asia,1.480556
Southern Asia,0.672143
Sub-Saharan Africa,1.2742
Western Europe,0.897381


In [44]:
happiness_means = happiness_2015.groupby('Region')['Happiness Score'].mean()
happiness_means

Unnamed: 0_level_0,Happiness Score
Region,Unnamed: 1_level_1
Australia and New Zealand,7.285
Central and Eastern Europe,5.332931
Eastern Asia,5.626167
Latin America and Caribbean,6.144682
Middle East and Northern Africa,5.4069
North America,7.273
Southeastern Asia,5.317444
Southern Asia,4.580857
Sub-Saharan Africa,4.2028
Western Europe,6.689619
