In [1]:
from pathlib import Path
import pandas as pd
import requests

This code calculates and gathers all the data required in policy_analysis.ipynb, namely the average stringency and facemask indices for each country (averaging from 1/1/2020 to 31/12/2022). It compiles all relevant data into one csv file named cleaned_data_df.


In [2]:
# reading in datasets 

csv_file_full = Path("Original_data/owid-covid-data.csv")
df = pd.read_csv(csv_file_full)

csv_file_facemasks = Path("Original_data/face-covering-policies-covid.csv")
facemask_df = pd.read_csv(csv_file_facemasks)

# iso2codes are required to get information about each country from worldbank api
csv_file_iso = Path("cleaned_data/iso_codes.csv")
iso_codes_df = pd.read_csv(csv_file_iso)


In [3]:
# This csv file just contains stringency index each day for all countries

string_cases_df = df[['location','date','stringency_index']]
string_cases_df.to_csv('cleaned_data/string_df.csv',index=False)


In [4]:
# extracting raw data required

covid_data_df = df[['continent','location','date','stringency_index','total_cases_per_million']]
# Dropping na removes locations that are not countries since the value in continent is NaN for continents and 
# also removes countries that do not have a recorded stringency index.
countries = covid_data_df.dropna()['location'].unique()

facemask_df = facemask_df[['Entity','Day','facial_coverings']]
facemask_df = facemask_df.rename(columns={"Entity": "location", "Day": "date", "facial_coverings": "mask_index"})



In [19]:
# The code in this cell retrieves incomeLevel for each country from workbank api. 
base_url = "http://api.worldbank.org/V2/country/"
income_status=[]
for i in range(0,len(countries)):
    query_url = base_url + iso_codes_df.iloc[i,1] + "?format=json"
    response = requests.get(query_url).json()
    income_status.append(response[1][0]['incomeLevel']['value'])
    

In [14]:
mean_stringency_index = []
mean_facemask_index = []
total_cases_pM = []

for i in countries:
    # dropping na removes any data later than 31/12/2022 since string/face index not recorded in dataset after this date
    country_data_1 = covid_data_df.loc[(covid_data_df['location']==i),:].dropna()
    #collecting total cases per million as of 31/12/2022
    total_cases_pM.append(country_data_1.iloc[-1][4])
    mean_stringency_index.append(country_data_1['stringency_index'].mean())
    country_data_2 = facemask_df.loc[(facemask_df['location']==i),:].dropna()
    mean_facemask_index.append(country_data_2['mask_index'].mean())
 

In [24]:
cleaned_data_df = pd.DataFrame({'Country':countries,'Mean Stringency Index':mean_stringency_index,
                                'Mean Facemask Index':mean_facemask_index,'Total Cases per Million':total_cases_pM,
                                'income status':income_status})


In [25]:
cleaned_data_df.to_csv('cleaned_data/cleaned_data.csv',index=False)

In [22]:
cleaned_data_df.head()

Unnamed: 0,location,mean_stringency_index,mean_mask_index,total_cases_per_million,income_status
0,Afghanistan,28.845644,2.225365,5046.443,Low income
1,Albania,44.38107,2.314781,117076.274,Upper middle income
2,Algeria,50.972144,2.605839,6040.033,Lower middle income
3,Andorra,35.494681,2.178832,598061.195,High income
4,Angola,55.480695,2.775547,2953.02,Lower middle income
