# Merging dataframes

# Imports

In [3]:
import pandas as pd
import numpy as np

# 1) Merging Social Expenditure Data with World Happiness data

### 1.1) Loading data from file with data frames

In [6]:
file_name = 'dataframes.h5'
hap_key = "happiness_df"
soc_key = "social_exp_df"

hap_df = pd.read_hdf(file_name, hap_key)
soc_df = pd.read_hdf(file_name, soc_key)

### 1.2) Merging data frames

In [8]:
hap_soc_df = pd.merge(hap_df, soc_df, on =['Country', 'Year'], how='inner') 
hap_soc_df.head()

Unnamed: 0,Country,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect,Social expenditure as % of GDP
0,Australia,2007,7.285391,10.694434,0.965276,69.959999,0.890682,0.343434,0.512578,0.762304,0.215351,15.858
1,Australia,2008,7.253757,10.709456,0.946635,70.040001,0.915733,0.301722,0.430811,0.728992,0.218427,17.05
2,Australia,2010,7.450047,10.713649,0.95452,70.199997,0.932059,0.313121,0.366127,0.761716,0.220073,16.553
3,Australia,2011,7.405616,10.723386,0.967029,70.279999,0.944586,0.365759,0.381772,0.724132,0.195324,17.02
4,Australia,2012,7.195586,10.744205,0.944599,70.360001,0.935146,0.270048,0.368252,0.728092,0.214397,17.308


In [9]:
hap_soc_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 424 entries, 0 to 423
Data columns (total 12 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Country                           424 non-null    object 
 1   Year                              424 non-null    int64  
 2   Life Ladder                       424 non-null    float64
 3   Log GDP per capita                424 non-null    float64
 4   Social support                    424 non-null    float64
 5   Healthy life expectancy at birth  424 non-null    float64
 6   Freedom to make life choices      424 non-null    float64
 7   Generosity                        424 non-null    float64
 8   Perceptions of corruption         424 non-null    float64
 9   Positive affect                   424 non-null    float64
 10  Negative affect                   424 non-null    float64
 11  Social expenditure as % of GDP    424 non-null    float64
dtypes: float

### 1.3) Storing data frame 

In [11]:
with pd.HDFStore('dataframes.h5') as store:
    store['hap_soc_df'] = hap_soc_df

# 2) Merging taxation data into happiness and social expenditure data

### 2.1) Loading data from file with data frames

In [14]:
tax_key = "taxation_df"
tax_df = pd.read_hdf(file_name, tax_key)

### 2.2) Merging data frames

In [16]:
hp_tax_df = pd.merge(hap_soc_df, tax_df, on =['Country', 'Year'], how='inner') 
hp_tax_df.head()

Unnamed: 0,Country,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect,Social expenditure as % of GDP,Main aggregated taxation as % of GDP,Tax difference 2012-2022
0,Austria,2010,7.302679,10.855984,0.914193,69.900002,0.89598,0.126924,0.546145,0.710302,0.155793,28.052,41.100269,1.3
1,Austria,2011,7.470513,10.881422,0.944157,70.0,0.939356,0.12759,0.702721,0.672192,0.145238,27.231,41.229561,1.3
2,Austria,2012,7.400689,10.883644,0.945142,70.099998,0.919704,0.113814,0.770586,0.71215,0.156675,27.611,41.870248,1.3
3,Austria,2013,7.498803,10.878005,0.949809,70.199997,0.921734,0.164263,0.678937,0.725053,0.162603,28.031,42.74021,1.3
4,Austria,2014,6.95,10.876781,0.89892,70.300003,0.885027,0.113623,0.566931,0.720965,0.17015,28.255,42.793747,1.3


In [17]:
hp_tax_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 286 entries, 0 to 285
Data columns (total 14 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Country                               286 non-null    object 
 1   Year                                  286 non-null    int64  
 2   Life Ladder                           286 non-null    float64
 3   Log GDP per capita                    286 non-null    float64
 4   Social support                        286 non-null    float64
 5   Healthy life expectancy at birth      286 non-null    float64
 6   Freedom to make life choices          286 non-null    float64
 7   Generosity                            286 non-null    float64
 8   Perceptions of corruption             286 non-null    float64
 9   Positive affect                       286 non-null    float64
 10  Negative affect                       286 non-null    float64
 11  Social expenditure 

### 2.3) Storing data frame 

In [19]:
with pd.HDFStore('dataframes.h5') as store:
    store['hp_tax_df'] = hp_tax_df

# 3) Merging unemployment data into happiness, social expenditure and taxation data

In [21]:
une_key = "unempl_act_df"

une_df = pd.read_hdf(file_name, une_key)

hpt_une_df = pd.merge(hp_tax_df, une_df, on =['Country', 'Year'], how='inner') 
hpt_une_df.head()

Unnamed: 0,Country,Year,Life Ladder,Log GDP per capita,Social support,Healthy life expectancy at birth,Freedom to make life choices,Generosity,Perceptions of corruption,Positive affect,Negative affect,Social expenditure as % of GDP,Main aggregated taxation as % of GDP,Tax difference 2012-2022,Unemployment as % of labour force
0,Austria,2012,7.400689,10.883644,0.945142,70.099998,0.919704,0.113814,0.770586,0.71215,0.156675,27.611,41.870248,1.3,5.2
1,Austria,2013,7.498803,10.878005,0.949809,70.199997,0.921734,0.164263,0.678937,0.725053,0.162603,28.031,42.74021,1.3,5.7
2,Austria,2014,6.95,10.876781,0.89892,70.300003,0.885027,0.113623,0.566931,0.720965,0.17015,28.255,42.793747,1.3,6.0
3,Austria,2015,7.076447,10.875665,0.92811,70.400002,0.900305,0.09491,0.55748,0.747708,0.164469,28.261,43.211649,1.3,6.1
4,Austria,2016,7.048072,10.884549,0.926319,70.525002,0.888514,0.075823,0.523641,0.713451,0.197424,28.197,41.815396,1.3,6.5


In [22]:
hpt_une_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 222 entries, 0 to 221
Data columns (total 15 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Country                               222 non-null    object 
 1   Year                                  222 non-null    int64  
 2   Life Ladder                           222 non-null    float64
 3   Log GDP per capita                    222 non-null    float64
 4   Social support                        222 non-null    float64
 5   Healthy life expectancy at birth      222 non-null    float64
 6   Freedom to make life choices          222 non-null    float64
 7   Generosity                            222 non-null    float64
 8   Perceptions of corruption             222 non-null    float64
 9   Positive affect                       222 non-null    float64
 10  Negative affect                       222 non-null    float64
 11  Social expenditure 

In [23]:
with pd.HDFStore('dataframes.h5') as store:
    store['all_df'] = hpt_une_df

In [24]:
hap_soc_df.duplicated().sum()


0

In [25]:
hp_tax_df.duplicated().sum()


0

In [26]:
hpt_une_df.duplicated().sum()

0