In [1]:
# Import our dependencies
import pandas as pd
import numpy as np
from functools import reduce

In [2]:
#  Import and read the input csv
cgs_df = pd.read_csv('../../Clean_Data/Clean_CSV_Files/cia-government_systems.csv')
cgs_df

Unnamed: 0,country,government_type
0,Albania,parliamentary republic
1,Algeria,presidential republic
2,Andorra,parliamentary democracy
3,Angola,presidential republic
4,Antigua and Barbuda,parliamentary democracy under a constitutional...
...,...,...
180,Venezuela,federal presidential republic
181,Vietnam,communist state
182,Yemen,in transition
183,Zambia,presidential republic


In [3]:
# Open the list of country name corrections
corrections_df = pd.read_csv('../../Clean_Data/master_country_list/country_name_corrections.csv', encoding='utf-8')

# Convert the corrections dataframe to a dictionary.
correction_dict = dict(zip(corrections_df['wrong'], corrections_df['correct']))

In [4]:
# Apply the correction dictionary to fix the known errors
cgs_df['country'] = cgs_df['country'].replace(correction_dict)

In [5]:
#  Import and read the input csv
sss_df = pd.read_csv('../../Clean_Data/Clean_CSV_Files/country_social_security_systems-list.csv', encoding='utf-8')
sss_df

Unnamed: 0,country,sss_depth,system_type
0,Albania,2,"Universal medical benefits,Social insurance (c..."
1,Algeria,1,Social insurance system
2,Andorra,1,Social insurance system
3,Angola,3,"Universal medical benefits,Social insurance (c..."
4,Antigua and Barbuda,1,Social insurance system
...,...,...,...
180,Venezuela,2,"Universal (birth grant and medical benefits),S..."
181,Vietnam,1,Social insurance system
182,Yemen,2,"Universal medical benefits,Employer-liability ..."
183,Zambia,2,"Universal medical benefits,Employer-liability ..."


In [6]:
# Apply the correction dictionary to fix the known errors
sss_df['country'] = sss_df['country'].replace(correction_dict)
sss_df

Unnamed: 0,country,sss_depth,system_type
0,Albania,2,"Universal medical benefits,Social insurance (c..."
1,Algeria,1,Social insurance system
2,Andorra,1,Social insurance system
3,Angola,3,"Universal medical benefits,Social insurance (c..."
4,Antigua and Barbuda,1,Social insurance system
...,...,...,...
180,Venezuela,2,"Universal (birth grant and medical benefits),S..."
181,Vietnam,1,Social insurance system
182,Yemen,2,"Universal medical benefits,Employer-liability ..."
183,Zambia,2,"Universal medical benefits,Employer-liability ..."


In [7]:
# Open the master list of countries
countries_df = pd.read_csv('../../Clean_Data/master_country_list/country_profile_urls.csv', encoding='utf-8')
countries_df = countries_df.drop(['profile_url'], axis=1)

In [8]:
master_countries = countries_df['country'].tolist()
master_countries

['Albania',
 'Algeria',
 'Andorra',
 'Angola',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Aruba',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bermuda',
 'Bhutan',
 'Bolivia',
 'Botswana',
 'Brazil',
 'British Virgin Islands',
 'Brunei',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cabo Verde',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Congo',
 'Democratic Republic of the Congo',
 'Costa Rica',
 'Croatia',
 'Cuba',
 'Cyprus',
 'Czechia',
 "Côte d'Ivoire",
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equatorial Guinea',
 'Estonia',
 'Eswatini',
 'Ethiopia',
 'Fiji',
 'Finland',
 'France',
 'Gabon',
 'Gambia',
 'Georgia',
 'Germany',
 'Ghana',
 'Greece',
 'Grenada',
 'Guatemala',
 'Guinea',
 'Guinea-Bissau',
 'Guyana',
 'Haiti',
 'Honduras',
 'Hong Kong (China)',
 

In [9]:
# Fill out the data for merging with other dataframes

# Define the range of years for the data
years = list(range(1990, 2023))

# Create a helper dataframe and merge with existing to ensure resulting df contains all countries and years
helper_df = pd.DataFrame({'country':np.repeat(master_countries, len(years)), 'Year':np.tile(years, len(master_countries))})

# Merge the helper DataFrame with the original data to ensure the data range is complete.
frames = [helper_df, cgs_df, sss_df]
complete_df = reduce(lambda left,right: pd.merge(left,right,how='left',on='country'),frames)

# Sort by 'Country' and 'Year'.
complete_df = complete_df.sort_values(['country','Year'], axis=0)

# Create a "country_year" column
complete_df['Country_Year'] = complete_df['country'] + '_' + complete_df['Year'].astype(str)

# Get a list of the column names:
cols = list(complete_df.columns)

# Move the 'Country_Year' columnt to head of list using index(), pop() and insert()
cols.insert(0, cols.pop(cols.index('Country_Year')))

# Use loc to reorder the DataFrame:
complete_df = complete_df.loc[:, cols]

       country  Year
0      Albania  1990
1      Albania  1991
2      Albania  1992
3      Albania  1993
4      Albania  1994
...        ...   ...
6100  Zimbabwe  2018
6101  Zimbabwe  2019
6102  Zimbabwe  2020
6103  Zimbabwe  2021
6104  Zimbabwe  2022

[6105 rows x 2 columns]


In [10]:
complete_df

Unnamed: 0,Country_Year,country,Year,government_type,sss_depth,system_type
0,Albania_1990,Albania,1990,parliamentary republic,2.0,"Universal medical benefits,Social insurance (c..."
1,Albania_1991,Albania,1991,parliamentary republic,2.0,"Universal medical benefits,Social insurance (c..."
2,Albania_1992,Albania,1992,parliamentary republic,2.0,"Universal medical benefits,Social insurance (c..."
3,Albania_1993,Albania,1993,parliamentary republic,2.0,"Universal medical benefits,Social insurance (c..."
4,Albania_1994,Albania,1994,parliamentary republic,2.0,"Universal medical benefits,Social insurance (c..."
...,...,...,...,...,...,...
6100,Zimbabwe_2018,Zimbabwe,2018,presidential republic,1.0,Employer-liability system (cash sickness and m...
6101,Zimbabwe_2019,Zimbabwe,2019,presidential republic,1.0,Employer-liability system (cash sickness and m...
6102,Zimbabwe_2020,Zimbabwe,2020,presidential republic,1.0,Employer-liability system (cash sickness and m...
6103,Zimbabwe_2021,Zimbabwe,2021,presidential republic,1.0,Employer-liability system (cash sickness and m...


In [11]:
complete_df.to_csv('../../Clean_Data/Clean_CSV_Files/gov_systems-ss_systems-complete.csv', encoding='utf-8', index=False)