In [1]:
import warnings
warnings.filterwarnings('ignore')

# Import our dependencies
import numpy as np
import pandas as pd
from functools import reduce


In [2]:
# read in the social security system data.csv
systems_df = pd.read_csv("../../Clean_Data/Cluster_Analysis_Data/country_social_security_systems-coded.csv", encoding='utf-8')
systems_df

Unnamed: 0,country,sss_depth,s1,s2,s3,s4,s5
0,Albania,2,Universal medical benefits,Social insurance (cash benefits),,,
1,Algeria,1,Social insurance system,,,,
2,Andorra,1,Social insurance system,,,,
3,Angola,3,Universal medical benefits,Social insurance (cash maternity benefits),Employer-liability (cash sickness benefit),,
4,Antigua and Barbuda,1,Social insurance system,,,,
...,...,...,...,...,...,...,...
178,Venezuela,2,Universal (birth grant and medical benefits),Social insurance (cash and medical benefits),,,
179,Vietnam,1,Social insurance system,,,,
180,Yemen,2,Universal medical benefits,Employer-liability (cash benefits),,,
181,Zambia,2,Universal medical benefits,Employer-liability (cash benefits),,,


In [3]:
# read in the GDP data 
gdp_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/gdp_clean.csv', encoding='utf-8')
gdp_df['GDP(M$)'] = gdp_df['GDP 2021'] / 1000000
gdp_df = gdp_df.drop(['GDP 2021'], axis=1)
gdp_df

Unnamed: 0,country,GDP(M$)
0,Albania,18260.043500
1,Algeria,167983.141738
2,Andorra,3329.910724
3,Angola,72546.985709
4,Antigua and Barbuda,1471.125926
...,...,...
173,Vanuatu,983.469257
174,Vietnam,362637.524071
175,Yemen,21061.691630
176,Zambia,21203.059080


In [4]:
# read in the Life Expectancy data
le_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/WHO-LEx-clean.csv', encoding='utf-8')
le_df

Unnamed: 0,country,both_sexes_lex,female_lex,male_lex
0,Albania,78.00,79.91,76.25
1,Algeria,77.13,78.12,76.23
2,Angola,63.06,65.52,60.70
3,Antigua and Barbuda,76.45,77.96,74.88
4,Argentina,76.58,79.50,73.51
...,...,...,...,...
164,Venezuela,73.95,78.17,69.91
165,Vietnam,73.74,78.11,69.56
166,Yemen,66.63,68.92,64.41
167,Zambia,62.45,65.37,59.54


In [5]:
# read in the Population data
pop_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/un_population_2022-clean.csv', encoding='utf-8')
pop_df

Unnamed: 0,country,Population (M) 2022
0,Albania,2.9
1,Algeria,45.4
2,Angola,35.0
3,Antigua and Barbuda,0.1
4,Argentina,46.0
...,...,...
167,Venezuela,29.3
168,Vietnam,99.0
169,Yemen,31.2
170,Zambia,19.5


In [6]:
# read in the Health Expenditure data
he_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/health_expenditure-clean.csv', encoding='utf-8')
he_df

Unnamed: 0,country,govt_he,private_he,govt_he_gdp
0,Albania,392.53,324.65,2.82
1,Algeria,487.79,262.32,4.06
2,Andorra,2582.32,1133.94,4.66
3,Angola,73.39,98.99,1.04
4,Antigua and Barbuda,594.02,421.98,2.60
...,...,...,...,...
168,Venezuela,177.03,207.32,2.47
169,Vietnam,244.78,308.67,2.30
170,Yemen,11.20,90.25,0.43
171,Zambia,77.16,31.21,2.13


In [7]:
# read in the Alcohol Tobacco data
at_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/alcohol_tobacco-clean.csv', encoding='utf-8')
at_df

Unnamed: 0,country,tot_alcohol_consumption,tobacco_use_%
0,Albania,7.17,23.0
1,Algeria,0.95,21.0
2,Argentina,9.65,24.9
3,Armenia,5.55,25.8
4,Australia,10.51,14.0
...,...,...,...
140,Vanuatu,2.25,18.2
141,Vietnam,8.66,25.0
142,Yemen,0.05,20.5
143,Zambia,6.54,14.6


In [8]:
# Read in the protein intake csv
pi_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/daily-protein-supply-animal-veg.csv', encoding='utf-8')
pi_df

Unnamed: 0,country,daily plant protein (g) 2013,daily animal protein (g) 2013
0,Albania,52.00,59.42
1,Algeria,66.97,24.99
2,Angola,38.86,18.40
3,Antigua and Barbuda,26.88,56.83
4,Argentina,35.70,66.94
...,...,...,...
158,Venezuela,34.89,38.12
159,Vietnam,50.56,31.05
160,Yemen,45.06,13.88
161,Zambia,45.99,9.23


In [9]:
# Read in the food supply csv
fs_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/food_supply_life_expectancy.csv', encoding='utf-8')
fs_df

Unnamed: 0,country,Daily caloric supply (2018)
0,Albania,3360
1,Algeria,3322
2,Angola,2385
3,Antigua and Barbuda,2445
4,Argentina,3307
...,...,...
153,Venezuela,2120
154,Vietnam,3025
155,Yemen,2154
156,Zambia,2002


In [10]:
# prepare to mass-merge the DataFrames
frames = [systems_df, gdp_df, le_df, pop_df, he_df, at_df, pi_df, fs_df]

combined_df = reduce(lambda left,right: pd.merge(left,right,on='country'),frames)
combined_df

Unnamed: 0,country,sss_depth,s1,s2,s3,s4,s5,GDP(M$),both_sexes_lex,female_lex,male_lex,Population (M) 2022,govt_he,private_he,govt_he_gdp,tot_alcohol_consumption,tobacco_use_%,daily plant protein (g) 2013,daily animal protein (g) 2013,Daily caloric supply (2018)
0,Albania,2,Universal medical benefits,Social insurance (cash benefits),,,,1.826004e+04,78.00,79.91,76.25,2.9,392.53,324.65,2.82,7.17,23.0,52.00,59.42,3360
1,Algeria,1,Social insurance system,,,,,1.679831e+05,77.13,78.12,76.23,45.4,487.79,262.32,4.06,0.95,21.0,66.97,24.99,3322
2,Argentina,3,Universal medical benefits,Social insurance (cash maternity and medical b...,Employer-liability (cash sickness benefit),,,4.914927e+05,76.58,79.50,73.51,46.0,1371.24,822.13,5.93,9.65,24.9,35.70,66.94,3307
3,Armenia,3,Universal (birth or adoption grant and medical...,Social insurance (cash benefits),Social assistance (maternity benefits),,,1.386118e+04,76.03,79.16,72.49,3.0,200.49,1399.60,1.41,5.55,25.8,46.79,43.25,2997
4,Australia,3,Universal medical benefits,Employment-related (parental benefits),Social assistance (cash sickness benefits and ...,,,1.542660e+06,83.04,84.84,81.25,26.1,3795.18,1499.29,7.10,10.51,14.0,34.60,71.68,3391
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129,Uzbekistan,2,Universal medical benefits,Social insurance (cash benefits),,,,6.923890e+04,73.01,75.21,70.76,34.4,173.84,243.95,2.34,2.59,17.8,54.24,29.19,3011
130,Vietnam,1,Social insurance system,,,,,3.626375e+05,73.74,78.11,69.56,99.0,244.78,308.67,2.30,8.66,25.0,50.56,31.05,3025
131,Yemen,2,Universal medical benefits,Employer-liability (cash benefits),,,,2.106169e+04,66.63,68.92,64.41,31.2,11.20,90.25,0.43,0.05,20.5,45.06,13.88,2154
132,Zambia,2,Universal medical benefits,Employer-liability (cash benefits),,,,2.120306e+04,62.45,65.37,59.54,19.5,77.16,31.21,2.13,6.54,14.6,45.99,9.23,2002


In [12]:
combined_df.head(55)

Unnamed: 0,country,sss_depth,s1,s2,s3,s4,s5,GDP(M$),both_sexes_lex,female_lex,male_lex,Population (M) 2022,govt_he,private_he,govt_he_gdp,tot_alcohol_consumption,tobacco_use_%,daily plant protein (g) 2013,daily animal protein (g) 2013,Daily caloric supply (2018)
0,Albania,2,Universal medical benefits,Social insurance (cash benefits),,,,18260.04,78.0,79.91,76.25,2.9,392.53,324.65,2.82,7.17,23.0,52.0,59.42,3360
1,Algeria,1,Social insurance system,,,,,167983.1,77.13,78.12,76.23,45.4,487.79,262.32,4.06,0.95,21.0,66.97,24.99,3322
2,Argentina,3,Universal medical benefits,Social insurance (cash maternity and medical b...,Employer-liability (cash sickness benefit),,,491492.7,76.58,79.5,73.51,46.0,1371.24,822.13,5.93,9.65,24.9,35.7,66.94,3307
3,Armenia,3,Universal (birth or adoption grant and medical...,Social insurance (cash benefits),Social assistance (maternity benefits),,,13861.18,76.03,79.16,72.49,3.0,200.49,1399.6,1.41,5.55,25.8,46.79,43.25,2997
4,Australia,3,Universal medical benefits,Employment-related (parental benefits),Social assistance (cash sickness benefits and ...,,,1542660.0,83.04,84.84,81.25,26.1,3795.18,1499.29,7.1,10.51,14.0,34.6,71.68,3391
5,Austria,1,Social insurance system,,,,,477082.5,81.65,83.78,79.44,9.1,4478.39,1655.74,7.62,11.96,27.1,43.35,62.86,3695
6,Azerbaijan,2,Universal medical benefits,Social insurance (cash benefits),,,,54622.18,71.43,74.09,68.78,10.3,192.3,411.81,1.28,4.41,24.2,63.07,30.08,3149
7,Bahamas,3,Universal medical benefits,Social insurance (cash benefits),Social assistance (medical benefits),,,11208.6,73.21,76.59,69.85,0.4,1157.54,1058.74,2.99,4.78,10.7,30.19,56.43,2655
8,Bangladesh,1,Employer-liability system,,,,,416264.9,74.25,75.64,72.99,167.9,22.96,92.78,0.46,0.02,35.2,45.92,9.92,2563
9,Barbados,2,Universal medical benefits,Social insurance (cash benefits),,,,4900.8,76.03,77.66,74.32,0.3,461.27,558.42,2.82,9.69,8.5,37.72,50.52,2956


In [None]:
combined_df.to_csv('../../Clean_Data/Cluster_Analysis_Data/final/final_system_coded.csv', encoding='utf-8', index=False)