In [1]:
import warnings
warnings.filterwarnings('ignore')

# Import our dependencies
import numpy as np
import pandas as pd
from functools import reduce


In [2]:
# read in the social security system data.csv
systems_df = pd.read_csv("../../Clean_Data/Cluster_Analysis_Data/country_social_security_systems-list.csv", encoding='utf-8')
systems_df

Unnamed: 0,country,sss_depth,system_type
0,Albania,2,"['Universal medical benefits','Social insuranc..."
1,Algeria,1,['Social insurance system']
2,Andorra,1,['Social insurance system']
3,Angola,3,"['Universal medical benefits','Social insuranc..."
4,Antigua and Barbuda,1,['Social insurance system']
...,...,...,...
178,Venezuela,2,['Universal (birth grant and medical benefits)...
179,Vietnam,1,['Social insurance system']
180,Yemen,2,"['Universal medical benefits','Employer-liabil..."
181,Zambia,2,"['Universal medical benefits','Employer-liabil..."


In [3]:
# read in the GDP data 
gdp_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/gdp_clean.csv', encoding='utf-8')
gdp_df['GDP(M$)'] = gdp_df['GDP 2021'] / 1000000
gdp_df = gdp_df.drop(['GDP 2021'], axis=1)
gdp_df

Unnamed: 0,country,GDP(M$)
0,Albania,18260.043500
1,Algeria,167983.141738
2,Andorra,3329.910724
3,Angola,72546.985709
4,Antigua and Barbuda,1471.125926
...,...,...
173,Vanuatu,983.469257
174,Vietnam,362637.524071
175,Yemen,21061.691630
176,Zambia,21203.059080


In [4]:
# read in the Life Expectancy data
le_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/WHO-LEx-clean.csv', encoding='utf-8')
le_df

Unnamed: 0,country,both_sexes_lex,female_lex,male_lex
0,Albania,78.00,79.91,76.25
1,Algeria,77.13,78.12,76.23
2,Angola,63.06,65.52,60.70
3,Antigua and Barbuda,76.45,77.96,74.88
4,Argentina,76.58,79.50,73.51
...,...,...,...,...
164,Venezuela,73.95,78.17,69.91
165,Vietnam,73.74,78.11,69.56
166,Yemen,66.63,68.92,64.41
167,Zambia,62.45,65.37,59.54


In [5]:
# read in the Population data
pop_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/un_population_2022-clean.csv', encoding='utf-8')
pop_df

Unnamed: 0,country,Population (M) 2022
0,Albania,2.9
1,Algeria,45.4
2,Angola,35.0
3,Antigua and Barbuda,0.1
4,Argentina,46.0
...,...,...
167,Venezuela,29.3
168,Vietnam,99.0
169,Yemen,31.2
170,Zambia,19.5


In [6]:
# read in the Health Expenditure data
he_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/health_expenditure-clean.csv', encoding='utf-8')
he_df

Unnamed: 0,country,govt_he,private_he,govt_he_gdp
0,Albania,392.53,324.65,2.82
1,Algeria,487.79,262.32,4.06
2,Andorra,2582.32,1133.94,4.66
3,Angola,73.39,98.99,1.04
4,Antigua and Barbuda,594.02,421.98,2.60
...,...,...,...,...
168,Venezuela,177.03,207.32,2.47
169,Vietnam,244.78,308.67,2.30
170,Yemen,11.20,90.25,0.43
171,Zambia,77.16,31.21,2.13


In [7]:
# read in the Alcohol Tobacco data
at_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/alcohol_tobacco-clean.csv', encoding='utf-8')
at_df

Unnamed: 0,country,tot_alcohol_consumption,tobacco_use_%
0,Albania,7.17,23.0
1,Algeria,0.95,21.0
2,Argentina,9.65,24.9
3,Armenia,5.55,25.8
4,Australia,10.51,14.0
...,...,...,...
140,Vanuatu,2.25,18.2
141,Vietnam,8.66,25.0
142,Yemen,0.05,20.5
143,Zambia,6.54,14.6


In [8]:
# Read in the protein intake csv
pi_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/daily-protein-supply-animal-veg.csv', encoding='utf-8')
pi_df

Unnamed: 0,country,daily plant protein (g) 2013,daily animal protein (g) 2013
0,Albania,52.00,59.42
1,Algeria,66.97,24.99
2,Angola,38.86,18.40
3,Antigua and Barbuda,26.88,56.83
4,Argentina,35.70,66.94
...,...,...,...
158,Venezuela,34.89,38.12
159,Vietnam,50.56,31.05
160,Yemen,45.06,13.88
161,Zambia,45.99,9.23


In [9]:
# Read in the food supply csv
fs_df = pd.read_csv('../../Clean_Data/Cluster_Analysis_Data/food_supply_life_expectancy.csv', encoding='utf-8')
fs_df

Unnamed: 0,country,Daily caloric supply (2018)
0,Albania,3360
1,Algeria,3322
2,Angola,2385
3,Antigua and Barbuda,2445
4,Argentina,3307
...,...,...
153,Venezuela,2120
154,Vietnam,3025
155,Yemen,2154
156,Zambia,2002


In [10]:
# prepare to mass-merge the DataFrames
frames = [systems_df, gdp_df, le_df, pop_df, he_df, at_df, pi_df, fs_df]

combined_df = reduce(lambda left,right: pd.merge(left,right,on='country'),frames)
combined_df

Unnamed: 0,country,sss_depth,system_type,GDP(M$),both_sexes_lex,female_lex,male_lex,Population (M) 2022,govt_he,private_he,govt_he_gdp,tot_alcohol_consumption,tobacco_use_%,daily plant protein (g) 2013,daily animal protein (g) 2013,Daily caloric supply (2018)
0,Albania,2,"['Universal medical benefits','Social insuranc...",1.826004e+04,78.00,79.91,76.25,2.9,392.53,324.65,2.82,7.17,23.0,52.00,59.42,3360
1,Algeria,1,['Social insurance system'],1.679831e+05,77.13,78.12,76.23,45.4,487.79,262.32,4.06,0.95,21.0,66.97,24.99,3322
2,Argentina,3,"['Universal medical benefits','Social insuranc...",4.914927e+05,76.58,79.50,73.51,46.0,1371.24,822.13,5.93,9.65,24.9,35.70,66.94,3307
3,Armenia,3,['Universal (birth or adoption grant and medic...,1.386118e+04,76.03,79.16,72.49,3.0,200.49,1399.60,1.41,5.55,25.8,46.79,43.25,2997
4,Australia,3,"['Universal medical benefits','Employment-rela...",1.542660e+06,83.04,84.84,81.25,26.1,3795.18,1499.29,7.10,10.51,14.0,34.60,71.68,3391
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,Uzbekistan,2,"['Universal medical benefits','Social insuranc...",6.923890e+04,73.01,75.21,70.76,34.4,173.84,243.95,2.34,2.59,17.8,54.24,29.19,3011
132,Vietnam,1,['Social insurance system'],3.626375e+05,73.74,78.11,69.56,99.0,244.78,308.67,2.30,8.66,25.0,50.56,31.05,3025
133,Yemen,2,"['Universal medical benefits','Employer-liabil...",2.106169e+04,66.63,68.92,64.41,31.2,11.20,90.25,0.43,0.05,20.5,45.06,13.88,2154
134,Zambia,2,"['Universal medical benefits','Employer-liabil...",2.120306e+04,62.45,65.37,59.54,19.5,77.16,31.21,2.13,6.54,14.6,45.99,9.23,2002


In [13]:
combined_df.tail(35)

Unnamed: 0,country,sss_depth,system_type,GDP(M$),both_sexes_lex,female_lex,male_lex,Population (M) 2022,govt_he,private_he,govt_he_gdp,tot_alcohol_consumption,tobacco_use_%,daily plant protein (g) 2013,daily animal protein (g) 2013,Daily caloric supply (2018)
101,Philippines,1,['Social insurance system'],394086.4,70.43,73.6,67.4,112.5,153.96,223.7,1.66,6.86,23.4,35.08,24.85,2662
102,Poland,1,['Social insurance system'],674048.3,78.27,81.93,74.53,37.7,1575.14,630.18,4.6,11.71,24.7,48.2,53.27,3537
103,Portugal,3,"['Universal medical benefits','Social insuranc...",249886.5,81.57,84.4,78.56,10.1,2142.55,1372.73,5.81,12.03,25.3,43.6,67.28,3480
104,Romania,2,['Social insurance (cash and medical benefits)...,284087.6,75.57,79.26,71.95,19.0,1527.93,378.68,4.6,11.74,28.4,55.81,47.21,3581
105,Russian Federation,1,['Social insurance system'],1775800.0,73.23,78.0,68.18,145.8,1042.05,661.99,3.45,11.19,27.1,46.66,56.18,3345
106,Rwanda,2,['Social insurance (cash maternity and medical...,11070.36,69.1,71.24,66.88,13.6,58.22,38.28,2.56,8.95,14.1,51.22,5.59,2188
107,Samoa,1,['Employer-liability (cash benefits)'],788.39,70.45,71.85,69.16,0.2,314.68,46.88,4.62,2.66,25.9,31.87,53.66,3030
108,Sao Tome and Principe,1,['Social insurance system'],547.0929,70.42,71.99,68.79,0.2,107.27,43.59,2.62,5.88,5.8,37.18,15.9,2446
109,Saudi Arabia,2,"['Universal medical benefits','Employer-liabil...",833541.2,74.31,76.15,73.11,35.8,1929.67,859.89,3.93,0.19,14.2,51.7,39.93,3307
110,Senegal,3,['Social insurance (cash maternity and materni...,27625.39,68.58,70.14,66.82,17.7,36.25,82.58,1.03,0.76,7.1,43.24,15.25,2545


In [14]:
combined_df.to_csv('../../Clean_Data/Cluster_Analysis_Data/final/final_system_list.csv', encoding='utf-8', index=False)