In [1]:
# Import our dependencies
import pandas as pd
import numpy as np
from functools import reduce

In [2]:
#  Import and read the input csv
cgs_df = pd.read_csv('../../Clean_Data/Clean_CSV_Files/cia-government_systems.csv')
cgs_df

Unnamed: 0,country,government_type
0,Albania,parliamentary republic
1,Algeria,presidential republic
2,Andorra,parliamentary democracy
3,Angola,presidential republic
4,Antigua and Barbuda,parliamentary democracy under a constitutional...
...,...,...
180,Venezuela,federal presidential republic
181,Vietnam,communist state
182,Yemen,in transition
183,Zambia,presidential republic


In [3]:
# Open the list of country name corrections
corrections_df = pd.read_csv('../../Clean_Data/master_country_list/country_name_corrections.csv')

# Convert the corrections dataframe to a dictionary.
correction_dict = dict(zip(corrections_df['wrong'], corrections_df['correct']))

In [4]:
# Apply the correction dictionary to fix the known errors
cgs_df['country'] = cgs_df['country'].replace(correction_dict)

In [5]:
#  Import and read the input csv
sss_df = pd.read_csv('../../Clean_Data/Clean_CSV_Files/country_social_security_systems.csv')
sss_df = sss_df.drop(['0'], axis=1)

In [6]:
# Apply the correction dictionary to fix the known errors
sss_df['country'] = sss_df['country'].replace(correction_dict)
sss_df

Unnamed: 0,country,system_type
0,Albania,Universal (medical benefits) and social insur...
1,Algeria,Social insurance system.
2,Andorra,Social insurance system.
3,Angola,"Universal (medical benefits), social insuranc..."
4,Antigua and Barbuda,Social insurance system.
...,...,...
180,Venezuela,Universal (birth grant and medical benefits) ...
181,Vietnam,Social insurance system.
182,Yemen,Universal (medical benefits) and employer-lia...
183,Zambia,Universal (medical benefits) and employer-lia...


In [7]:
# Open the master list of countries
countries_df = pd.read_csv('../../Clean_Data/master_country_list/country_profile_urls.csv')
countries_df = countries_df.drop(['profile_url'], axis=1)

In [8]:
master_countries = countries_df['country'].tolist()

In [11]:
# Fill out the data for merging with other dataframes

# Define the range of years for the data
years = list(range(1990, 2023))

# Create a helper dataframe and merge with existing to ensure resulting df contains all countries and years
helper_df = pd.DataFrame({'country':np.repeat(master_countries, len(years)), 'Year':np.tile(years, len(master_countries))})

# Merge the helper DataFrame with the original data to ensure the data range is complete.
frames = [helper_df, cgs_df, sss_df]
complete_df = reduce(lambda left,right: pd.merge(left,right,on='country'),frames)

# Sort by 'Country' and 'Year'.
complete_df = complete_df.sort_values(['country','Year'], axis=0)

# Create a "country_year" column
complete_df['Country_Year'] = complete_df['country'] + '_' + complete_df['Year'].astype(str)

# Get a list of the column names:
cols = list(complete_df.columns)

# Move the 'Country_Year' columnt to head of list using index(), pop() and insert()
cols.insert(0, cols.pop(cols.index('Country_Year')))

# Use loc to reorder the DataFrame:
complete_df = complete_df.loc[:, cols]

In [12]:
complete_df

Unnamed: 0,Country_Year,country,Year,government_type,system_type
0,Albania_1990,Albania,1990,parliamentary republic,Universal (medical benefits) and social insur...
1,Albania_1991,Albania,1991,parliamentary republic,Universal (medical benefits) and social insur...
2,Albania_1992,Albania,1992,parliamentary republic,Universal (medical benefits) and social insur...
3,Albania_1993,Albania,1993,parliamentary republic,Universal (medical benefits) and social insur...
4,Albania_1994,Albania,1994,parliamentary republic,Universal (medical benefits) and social insur...
...,...,...,...,...,...
6100,Zimbabwe_2018,Zimbabwe,2018,presidential republic,Employer-liability system. Cash sickness and ...
6101,Zimbabwe_2019,Zimbabwe,2019,presidential republic,Employer-liability system. Cash sickness and ...
6102,Zimbabwe_2020,Zimbabwe,2020,presidential republic,Employer-liability system. Cash sickness and ...
6103,Zimbabwe_2021,Zimbabwe,2021,presidential republic,Employer-liability system. Cash sickness and ...


In [13]:
complete_df.to_csv('../../Clean_Data/Clean_CSV_Files/gov_systems-ss_systems-complete.csv', index=False)