In [1]:
import warnings
warnings.filterwarnings('ignore')

# Import our dependencies
import numpy as np
import pandas as pd
from functools import reduce


In [2]:
# read in the social security system data.csv
systems_df = pd.read_csv("../Clean_Data/Cluster_Analysis_Data/country_social_security_systems-coded.csv")
systems_df

Unnamed: 0,country,sss_depth,system_type
0,Albania,2,"['Universal medical benefits','Social insuranc..."
1,Algeria,1,['Social insurance system']
2,Andorra,1,['Social insurance system']
3,Angola,3,"['Universal medical benefits','Social insuranc..."
4,Antigua and Barbuda,1,['Social insurance system']
...,...,...,...
178,Venezuela,2,['Universal (birth grant and medical benefits)...
179,Vietnam,1,['Social insurance system']
180,Yemen,2,"['Universal medical benefits','Employer-liabil..."
181,Zambia,2,"['Universal medical benefits','Employer-liabil..."


In [3]:
# read in the GDP data 
gdp_df = pd.read_csv('../Clean_Data/Cluster_Analysis_Data/gdp_clean.csv')
gdp_df['GDP(M$)'] = gdp_df['2021_GDP'] / 1000000
gdp_df = gdp_df.drop(['2021_GDP'], axis=1)
gdp_df

Unnamed: 0,country,GDP(M$)
0,Albania,18260.043500
1,Algeria,167983.141738
2,Andorra,3329.910724
3,Angola,72546.985709
4,Antigua and Barbuda,1471.125926
...,...,...
173,Vanuatu,983.469257
174,Vietnam,362637.524071
175,Yemen,21061.691630
176,Zambia,21203.059080


In [4]:
# read in the Life Expectancy data
le_df = pd.read_csv('../Clean_Data/Cluster_Analysis_Data/WHO-LEx-clean.csv')
le_df

Unnamed: 0,country,both_sexes_lex,female_lex,male_lex
0,Albania,78.00,79.91,76.25
1,Algeria,77.13,78.12,76.23
2,Angola,63.06,65.52,60.70
3,Antigua and Barbuda,76.45,77.96,74.88
4,Argentina,76.58,79.50,73.51
...,...,...,...,...
164,Venezuela,73.95,78.17,69.91
165,Vietnam,73.74,78.11,69.56
166,Yemen,66.63,68.92,64.41
167,Zambia,62.45,65.37,59.54


In [5]:
# read in the Population data
pop_df = pd.read_csv('../Clean_Data/Cluster_Analysis_Data/un_population_2022-clean.csv')
pop_df

Unnamed: 0,country,population
0,Albania,2.9
1,Algeria,45.4
2,Angola,35.0
3,Antigua and Barbuda,0.1
4,Argentina,46.0
...,...,...
167,Venezuela,29.3
168,Vietnam,99.0
169,Yemen,31.2
170,Zambia,19.5


In [6]:
# read in the Health Expenditure data
he_df = pd.read_csv('../Clean_Data/Cluster_Analysis_Data/health_expenditure-clean.csv')
he_df

Unnamed: 0,country,govt_he,private_he,govt_he_gdp
0,Albania,392.53,324.65,2.82
1,Algeria,487.79,262.32,4.06
2,Andorra,2582.32,1133.94,4.66
3,Angola,73.39,98.99,1.04
4,Antigua and Barbuda,594.02,421.98,2.60
...,...,...,...,...
168,Venezuela,177.03,207.32,2.47
169,Vietnam,244.78,308.67,2.30
170,Yemen,11.20,90.25,0.43
171,Zambia,77.16,31.21,2.13


In [7]:
# read in the Alcohol Tobacco data
at_df = pd.read_csv('../Clean_Data/Cluster_Analysis_Data/alcohol_tobacco-clean.csv')
at_df

Unnamed: 0,country,tot_alcohol_consumption,tobacco_use_%
0,Albania,7.17,23.0
1,Algeria,0.95,21.0
2,Argentina,9.65,24.9
3,Armenia,5.55,25.8
4,Australia,10.51,14.0
...,...,...,...
140,Vanuatu,2.25,18.2
141,Vietnam,8.66,25.0
142,Yemen,0.05,20.5
143,Zambia,6.54,14.6


In [8]:
# Read in the protein intake csv
pi_df = pd.read_csv('../Clean_Data/Cluster_Analysis_Data/daily-protein-supply-animal-veg.csv', encoding='iso-8859-1')
pi_df

Unnamed: 0,country,daily plant protein (g  2013),daily animal protein (g  2013)
0,Albania,52.00,59.42
1,Algeria,66.97,24.99
2,Angola,38.86,18.40
3,Antigua and Barbuda,26.88,56.83
4,Argentina,35.70,66.94
...,...,...,...
158,Venezuela,34.89,38.12
159,Vietnam,50.56,31.05
160,Yemen,45.06,13.88
161,Zambia,45.99,9.23


In [9]:
# Read in the food supply csv
fs_df = pd.read_csv('../Clean_Data/Cluster_Analysis_Data/food_supply_life_expectancy.csv')
fs_df

Unnamed: 0,country,daily calories (2018)
0,Albania,3360
1,Algeria,3322
2,Angola,2385
3,Antigua and Barbuda,2445
4,Argentina,3307
...,...,...
149,Venezuela,2120
150,Vietnam,3025
151,Yemen,2154
152,Zambia,2002


In [10]:
# prepare to mass-merge the DataFrames
frames = [systems_df, gdp_df, le_df, pop_df, he_df, at_df, pi_df, fs_df]

combined_df = reduce(lambda left,right: pd.merge(left,right,on='country'),frames)
combined_df

Unnamed: 0,country,sss_depth,system_type,GDP(M$),both_sexes_lex,female_lex,male_lex,population,govt_he,private_he,govt_he_gdp,tot_alcohol_consumption,tobacco_use_%,daily plant protein (g  2013),daily animal protein (g  2013),daily calories (2018)
0,Albania,2,"['Universal medical benefits','Social insuranc...",1.826004e+04,78.00,79.91,76.25,2.9,392.53,324.65,2.82,7.17,23.0,52.00,59.42,3360
1,Algeria,1,['Social insurance system'],1.679831e+05,77.13,78.12,76.23,45.4,487.79,262.32,4.06,0.95,21.0,66.97,24.99,3322
2,Argentina,3,"['Universal medical benefits','Social insuranc...",4.914927e+05,76.58,79.50,73.51,46.0,1371.24,822.13,5.93,9.65,24.9,35.70,66.94,3307
3,Armenia,3,['Universal (birth or adoption grant and medic...,1.386118e+04,76.03,79.16,72.49,3.0,200.49,1399.60,1.41,5.55,25.8,46.79,43.25,2997
4,Australia,3,"['Universal medical benefits','Employment-rela...",1.542660e+06,83.04,84.84,81.25,26.1,3795.18,1499.29,7.10,10.51,14.0,34.60,71.68,3391
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128,Uzbekistan,2,"['Universal medical benefits','Social insuranc...",6.923890e+04,73.01,75.21,70.76,34.4,173.84,243.95,2.34,2.59,17.8,54.24,29.19,3011
129,Vietnam,1,['Social insurance system'],3.626375e+05,73.74,78.11,69.56,99.0,244.78,308.67,2.30,8.66,25.0,50.56,31.05,3025
130,Yemen,2,"['Universal medical benefits','Employer-liabil...",2.106169e+04,66.63,68.92,64.41,31.2,11.20,90.25,0.43,0.05,20.5,45.06,13.88,2154
131,Zambia,2,"['Universal medical benefits','Employer-liabil...",2.120306e+04,62.45,65.37,59.54,19.5,77.16,31.21,2.13,6.54,14.6,45.99,9.23,2002


In [11]:
combined_df.to_csv('../Clean_Data/Cluster_Analysis_Data/final/final_system_list.csv', index=False)