### Generate distict event types from the OxCGRT dataset
Import all the required libraries

In [1]:
import pandas as pd 
import datetime
import warnings
warnings.filterwarnings("ignore")

Fetch and/or download 
1. us_states_oxcgrt_df => OxCGRT NPIs Stringency Index data for US States that can be downloaded from https://raw.githubusercontent.com/OxCGRT/USA-covid-policy/master/data/OxCGRT_US_latest.csv
2. world_oxcgrt_data_df => OxCGRT NPIs Stringency Index data for world countries that can be downloaded from https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv

In [2]:
try:
    us_states_oxcgrt_df = pd.read_csv('us_states_oxcgrt.csv')
    world_oxcgrt_data_df = pd.read_csv('world_oxcgrt.csv')
except FileNotFoundError:
    us_oxcgrt_data_url = 'https://raw.githubusercontent.com/OxCGRT/USA-covid-policy/master/data/OxCGRT_US_latest.csv'
    us_states_oxcgrt_df = pd.read_csv(us_oxcgrt_data_url)
    us_states_oxcgrt_df = us_states_oxcgrt_df[us_states_oxcgrt_df['RegionCode'].notna()]
    us_states_oxcgrt_df['Date'] = pd.to_datetime(us_states_oxcgrt_df['Date'], format='%Y%m%d', errors='coerce')
    us_states_oxcgrt_df.to_csv('us_states_oxcgrt.csv', index=False)

    world_oxcgrt_data_url = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv'
    world_oxcgrt_data_df = pd.read_csv(world_oxcgrt_data_url)
    world_oxcgrt_data_df['Date'] = pd.to_datetime(world_oxcgrt_data_df['Date'], format='%Y%m%d', errors='coerce')
    world_oxcgrt_data_df.to_csv('world_oxcgrt.csv', index=False)

Select whether to generate events for world contries or US States

In [3]:
level = 'world' # 'world' or 'us_states'
if level == 'world':
    file_name = 'oxford_si_global'
    name = 'CountryName'
    code = 'CountryCode'
    index = 'world'
    oxcgrt_df = world_oxcgrt_data_df
elif level == 'us_states':
    file_name = 'oxford_si'
    name = 'RegionName'
    code = 'RegionCode'
    index = 'us_states'
    oxcgrt_df = us_states_oxcgrt_df
else:
    raise SystemExit("Invalid level!")

For each country or US state in the timeseries OxCGRT csv we identify the dates where each of the event types changes the levels assigned.
A change from one level to another is then considered to be an event.

In [4]:
max_date = datetime.datetime.strptime("2021-01-11", "%Y-%m-%d")
oxcgrt_df['Date'] = pd.to_datetime(oxcgrt_df['Date'])
date_mask = oxcgrt_df['Date'] <= max_date
oxcgrt_df = oxcgrt_df.loc[date_mask]
oxcgrt_red__df = oxcgrt_df[['CountryName', 'CountryCode', 'RegionName', 'RegionCode', 'Date', 'C1_School closing',
       'C2_Workplace closing', 'C3_Cancel public events', 'C4_Restrictions on gatherings', 'C5_Close public transport',
       'C6_Stay at home requirements', 'C7_Restrictions on internal movement', 'C8_International travel controls',
       'E1_Income support', 'E2_Debt/contract relief', 'E3_Fiscal measures', 'E4_International support',
       'H1_Public information campaigns', 'H2_Testing policy', 'H3_Contact tracing',
       'H4_Emergency investment in healthcare', 'H5_Investment in vaccines',
       'H6_Facial Coverings', 'H7_Vaccination policy', 'M1_Wildcard']]

states = list(oxcgrt_red__df[code].unique())
frames=[]
events=['C1_School closing',
       'C2_Workplace closing', 'C3_Cancel public events', 'C4_Restrictions on gatherings', 'C5_Close public transport',
       'C6_Stay at home requirements', 'C7_Restrictions on internal movement', 'C8_International travel controls',
       'E1_Income support', 'E2_Debt/contract relief', 'E3_Fiscal measures', 'E4_International support',
       'H1_Public information campaigns', 'H2_Testing policy', 'H3_Contact tracing',
       'H4_Emergency investment in healthcare', 'H5_Investment in vaccines',
       'H6_Facial Coverings', 'H7_Vaccination policy', 'M1_Wildcard']
outliers = ['E4_International support', 'H4_Emergency investment in healthcare']
for state in states:
    if pd.isnull(state):
        continue
    if level == 'world':
        state_mask = (oxcgrt_red__df[code] == state) # & (oxcgrt_red__df['RegionCode'].isnull())
    else:
        state_mask = oxcgrt_red__df[code] == state
    this_state_df = oxcgrt_red__df.loc[state_mask] 

    for event in events:
        if (event in outliers ):
            fil_df = this_state_df[this_state_df[event] != 0]
            fil_df = fil_df[fil_df[event].notna()] 
        else:
            this_state_df[event+'_diff'] = this_state_df[event].diff()

            fil_df = this_state_df[this_state_df[event+'_diff'] != 0]
            fil_df = fil_df[fil_df[event+'_diff'].notna()]
        fil_df.rename(columns={event: 'Category'}, inplace=True)
        fil_df = fil_df[['CountryName', 'CountryCode', 'RegionName', 'RegionCode', 'Date', 'Category']]
        fil_df['Type'] = event
        frames.append(fil_df)
    
events_df = pd.concat(frames)
# events_df.to_csv(index + '_timeseries_events.csv', index=False)
events_df


Unnamed: 0,CountryName,CountryCode,RegionName,RegionCode,Date,Category,Type
75,Aruba,ABW,,,2020-03-16,3.0,C1_School closing
138,Aruba,ABW,,,2020-05-18,2.0,C1_School closing
160,Aruba,ABW,,,2020-06-09,1.0,C1_School closing
194,Aruba,ABW,,,2020-07-13,0.0,C1_School closing
230,Aruba,ABW,,,2020-08-18,3.0,C1_School closing
...,...,...,...,...,...,...,...
108484,Zimbabwe,ZWE,,,2020-05-04,4.0,H6_Facial Coverings
108607,Zimbabwe,ZWE,,,2020-09-04,3.0,H6_Facial Coverings
108632,Zimbabwe,ZWE,,,2020-09-29,1.0,H6_Facial Coverings
108659,Zimbabwe,ZWE,,,2020-10-26,3.0,H6_Facial Coverings


Number of NPI events per event type

In [5]:
count = events_df.groupby(['Type']).count()
count['type'] = count.index
count['no_of_events'] = count[code]
count = count[['type', 'no_of_events']]
# count.to_csv(index + '_no_per_event_types.csv', index=False)
count

Unnamed: 0_level_0,type,no_of_events
Type,Unnamed: 1_level_1,Unnamed: 2_level_1
C1_School closing,C1_School closing,1085
C2_Workplace closing,C2_Workplace closing,1307
C3_Cancel public events,C3_Cancel public events,824
C4_Restrictions on gatherings,C4_Restrictions on gatherings,1132
C5_Close public transport,C5_Close public transport,634
C6_Stay at home requirements,C6_Stay at home requirements,1074
C7_Restrictions on internal movement,C7_Restrictions on internal movement,942
C8_International travel controls,C8_International travel controls,1139
E1_Income support,E1_Income support,645
E2_Debt/contract relief,E2_Debt/contract relief,555


Number of NPI events per event country or US state

In [6]:
count = events_df.groupby([name]).count()
count['state'] = count.index
count['no_of_events'] = count[code]
count = count[['state', 'no_of_events']]
# count.to_csv(index + '_no_per_state.csv', index=False)
count

Unnamed: 0_level_0,state,no_of_events
CountryName,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,Afghanistan,43
Albania,Albania,50
Algeria,Algeria,45
Andorra,Andorra,40
Angola,Angola,53
...,...,...
Venezuela,Venezuela,46
Vietnam,Vietnam,64
Yemen,Yemen,29
Zambia,Zambia,39


Number of NPI events per event type per country or US state

In [7]:
# Number per events per country 
count = events_df.groupby([name, 'Type']).count()
count['state'] = count.index.get_level_values(name)
count['type'] = count.index.get_level_values('Type')
count['no_of_events'] = count[code]
count = count[['state', 'type', 'no_of_events']]
# count.to_csv(index + '_no_per_state_per_type.csv', index=False)
count

Unnamed: 0_level_0,Unnamed: 1_level_0,state,type,no_of_events
CountryName,Type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,C1_School closing,Afghanistan,C1_School closing,3
Afghanistan,C2_Workplace closing,Afghanistan,C2_Workplace closing,5
Afghanistan,C3_Cancel public events,Afghanistan,C3_Cancel public events,2
Afghanistan,C4_Restrictions on gatherings,Afghanistan,C4_Restrictions on gatherings,2
Afghanistan,C5_Close public transport,Afghanistan,C5_Close public transport,2
...,...,...,...,...
Zimbabwe,H2_Testing policy,Zimbabwe,H2_Testing policy,1
Zimbabwe,H3_Contact tracing,Zimbabwe,H3_Contact tracing,1
Zimbabwe,H4_Emergency investment in healthcare,Zimbabwe,H4_Emergency investment in healthcare,1
Zimbabwe,H5_Investment in vaccines,Zimbabwe,H5_Investment in vaccines,2
