# Initial Set Up

In [10]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import squarify
import seaborn as sns

In [12]:
data = pd.read_csv('state-policies.csv')

# # About the Data

You can find the legends regarding what the values in the data mean at the following website: https://github.com/OxCGRT/covid-policy-tracker/tree/master/documentation. 

# Cleaning Up Data

In [13]:
data.head()

Unnamed: 0,CountryName,CountryCode,RegionName,RegionCode,Jurisdiction,Date,C1_School closing,C1_Flag,C1_Notes,C2_Workplace closing,...,StringencyIndex,StringencyIndexForDisplay,StringencyLegacyIndex,StringencyLegacyIndexForDisplay,GovernmentResponseIndex,GovernmentResponseIndexForDisplay,ContainmentHealthIndex,ContainmentHealthIndexForDisplay,EconomicSupportIndex,EconomicSupportIndexForDisplay
0,United States,USA,,,NAT_GOV,20200101,0.0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,United States,USA,,,NAT_GOV,20200102,0.0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,United States,USA,,,NAT_GOV,20200103,0.0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,United States,USA,,,NAT_GOV,20200104,0.0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,United States,USA,,,NAT_GOV,20200105,0.0,,,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
#isolating data only about the current state of interest, Minnesota
clean_data = data[data['RegionName'] == 'Minnesota']
#deleting rows whose dates are outside of the scope of this project
    #clean_data['Date'] = clean_data['Date'].astype('datetime64[ns]')
    #clean_data = clean_data.set_index('Date')
clean_data = clean_data.iloc[67:] #delete the first 60 rows due to their January - February dates
clean_data = clean_data.iloc[:-4,] #as well as the last 3 rows due to their December dates

#declaring and extracting columns of interest from the original dataset
columns_of_interest = ['RegionName', 'Jurisdiction', 'Date', 'C1_School closing', 'C2_Workplace closing', 
                       'C3_Cancel public events', 'C6_Stay at home requirements', 
                       'C7_Restrictions on internal movement', 'C8_International travel controls', 
                       'H1_Public information campaigns', 'H2_Testing policy', 'H3_Contact tracing', 
                       'H4_Emergency investment in healthcare', 'H5_Investment in vaccines', 
                       'H6_Facial Coverings', 'M1_Wildcard']
clean_data = clean_data[columns_of_interest]


In [31]:
clean_data
#print(clean_data.dtypes)

Unnamed: 0,RegionName,Jurisdiction,Date,C1_School closing,C2_Workplace closing,C3_Cancel public events,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H4_Emergency investment in healthcare,H5_Investment in vaccines,H6_Facial Coverings,M1_Wildcard
8179,Minnesota,STATE_WIDE,20200308,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,,,0.0,
8180,Minnesota,STATE_WIDE,20200309,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,,,0.0,
8181,Minnesota,STATE_WIDE,20200310,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,20889000.0,,0.0,
8182,Minnesota,STATE_WIDE,20200311,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,,,0.0,
8183,Minnesota,STATE_WIDE,20200312,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.0,1.0,,,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8441,Minnesota,STATE_WIDE,20201125,3.0,2.0,2.0,1.0,0.0,2.0,2.0,3.0,1.0,0.0,0.0,2.0,
8442,Minnesota,STATE_WIDE,20201126,3.0,2.0,2.0,1.0,0.0,2.0,2.0,3.0,1.0,0.0,0.0,2.0,
8443,Minnesota,STATE_WIDE,20201127,3.0,2.0,2.0,1.0,0.0,2.0,2.0,3.0,1.0,0.0,0.0,2.0,
8444,Minnesota,STATE_WIDE,20201128,3.0,2.0,2.0,1.0,0.0,2.0,2.0,3.0,1.0,0.0,0.0,2.0,
