# Covid Measures Preprocessing

The data on Covid measures contains information for countries around the globe and is quite large (link: https://github.com/OxCGRT/covid-policy-tracker). Therefore, this mini-script reduces it the necessary columns and rows. Because the original data set is so big, I did not upload it to the GitHub.

In [None]:
# Import the necessary packages
import numpy as np
import matplotlib as plt
import pandas as pd
import plotly.express as px
from urllib.request import urlopen
import json
import ipywidgets as widgets
import datetime as dt

Only concider information about vaccination (V4), Facial Coverings (H6) and schools closing (C1).
More specifically:
* If at least some types of school need to close, we indicate it with a 1. Otherwise it gets a 0
* As soon as facial coverings become required in public spaces where social distancing is not possible, we indicate it as a 1, otherwise it gets a zero
* If vaccination is universally available, this is indicated as a 1; otherwise it is 0.

In [None]:
# Read in the data
df = pd.read_csv("RawData/OxCGRT_latest_combined.csv")

df_reduced = df[['CountryName', 'RegionName', 'Date', 'H7_combined', 'H6_combined', 'C1_combined']]
df_reduced.rename(columns = {"H7_combined" : "Vaccination", "H6_combined" : "Masks", "C1_combined" : "Close_schools"}, \
                 inplace = True)

def get_masks_var(mask):
    if pd.isnull(mask):
        return 0
    elif int(mask[0]) >= 3:
        return 1
    else:
        return 0

def get_schools_var(school):
    if pd.isnull(school):
        return 0
    elif int(school[0]) >= 2:
        return 1
    else:
        return 0

def get_vacs_var(row):
    if pd.isnull(row['Vaccination']):
        return 0
    else:
        return 1 if row['Vaccination'].startswith('5') else 0

df_reduced.loc[:,'Masks'] = df_reduced.apply(lambda row: get_masks_var(row['Masks']), axis = 1)
df_reduced.loc[:,'Close_schools'] = df_reduced.apply(lambda row: get_schools_var(row['Close_schools']), axis = 1)
df_reduced.loc[:,'Vaccination'] = df_reduced.apply(lambda row: get_vacs_var(row), axis = 1)

# Reduce to only US observations
df_reduced = df_reduced.loc[df_reduced['CountryName'] == "United States",:]
df_reduced.reset_index(inplace = True, drop = True)



In [None]:
# Remove missing values
df_reduced = df_reduced.loc[~pd.isnull(df_reduced['RegionName'])]

# Merge state fips
state_fips = pd.read_csv("RawData/state_fips.csv")
df_reduced = pd.merge(df_reduced, state_fips, left_on = "RegionName", right_on = "Name")
df_reduced.drop(columns = ['Name', 'CountryName', 'Postal Code'], inplace = True)


### Collapse the data frame to only contain changes in policies

In [None]:
cols = ['fips', 'Vaccination', 'Masks', 'Close_schools']
df_collapsed = df_reduced.loc[(df_reduced[cols].shift() != df_reduced[cols]).any(axis = 1)]
cols.append('Date')
df_collapsed = df_collapsed[cols].reset_index(drop = True)

# Fix the dates
def fix_dates(date):
    date_string = str(date)
    year = date_string[:4]
    month = date_string[4:6]
    day = date_string[6:8]
    
    return year + "-" + month + "-" + day

df_collapsed.loc[:,'Date'] = df_collapsed.apply(lambda row: fix_dates(row['Date']), axis = 1)

In [None]:
df_collapsed

In [None]:
df_collapsed.to_csv("PreprocessedData/covidMeasures.csv", index = False)