# First 100 cases and policy enactment dates

- In this notebook we used "https://datahub.io/core/covid-19" to find the number of confirmed COVID-19 cases and deaths in the USA by state.
- We used this site "https://github.com/COVID19StatePolicy/SocialDistancing" to find policy data per state

## 1. Importing data for confirmed cases and deaths

In [1]:
import pandas as pd
import numpy as np


In [2]:
df_confirmed = pd.read_csv("raw_data/us_confirmed.csv")
df_confirmed.head()


Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Lat,Combined_Key,Date,Case,Long,Country/Region,Province/State
0,16,AS,ASM,16,60.0,,-14.271,"American Samoa, US",2020-01-22,0,-170.132,US,American Samoa
1,16,AS,ASM,16,60.0,,-14.271,"American Samoa, US",2020-01-23,0,-170.132,US,American Samoa
2,16,AS,ASM,16,60.0,,-14.271,"American Samoa, US",2020-01-24,0,-170.132,US,American Samoa
3,16,AS,ASM,16,60.0,,-14.271,"American Samoa, US",2020-01-25,0,-170.132,US,American Samoa
4,16,AS,ASM,16,60.0,,-14.271,"American Samoa, US",2020-01-26,0,-170.132,US,American Samoa


In [3]:
df_deaths = pd.read_csv("raw_data/us_deaths.csv")
df_deaths.head()


Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Lat,Combined_Key,Population,Date,Case,Long,Country/Region,Province/State
0,16,AS,ASM,16,60.0,,-14.271,"American Samoa, US",55641,2020-01-22,0,-170.132,US,American Samoa
1,16,AS,ASM,16,60.0,,-14.271,"American Samoa, US",55641,2020-01-23,0,-170.132,US,American Samoa
2,16,AS,ASM,16,60.0,,-14.271,"American Samoa, US",55641,2020-01-24,0,-170.132,US,American Samoa
3,16,AS,ASM,16,60.0,,-14.271,"American Samoa, US",55641,2020-01-25,0,-170.132,US,American Samoa
4,16,AS,ASM,16,60.0,,-14.271,"American Samoa, US",55641,2020-01-26,0,-170.132,US,American Samoa


## 2. Subset data to find date of first 100 cases of infections and deaths by state

#### 2.1 Confirmed infections

In [4]:
# Subsets DataFrame to columns "Date", "Case" and "State"
# "topic" arg is the specific findings of the csv, in this case either "infections" or "deaths"

def find_first_100(df, topic):
    df = df.loc[df["Case"] >= 100].drop_duplicates(subset=["Province/State"], keep="first")
    df = df[["Date","Case", "Province/State"]].reset_index(drop=True)
    df.columns = ["Date_first_100_" + topic, topic.title(), "State"]
    return df
    

In [5]:
df_first_100_infections = find_first_100(df_confirmed, "infections")
df_first_100_infections.head()


Unnamed: 0,Date_first_100_infections,Infections,State
0,2020-04-05,112,Guam
1,2020-03-28,100,Puerto Rico
2,2020-07-04,111,Virgin Islands
3,2020-05-14,104,Alabama
4,2020-04-08,103,Alaska


#### 2.2 Confirmed infections

In [6]:
df_first_100_deaths = find_first_100(df_deaths, "deaths")
df_first_100_deaths.head()

Unnamed: 0,Date_first_100_deaths,Deaths,State
0,2020-05-07,102,Puerto Rico
1,2020-05-28,100,Alabama
2,2020-04-23,115,Arizona
3,2020-06-05,101,California
4,2020-05-16,102,Colorado


#### 2.3 Merge the new data-sets

In [7]:
df_infections_deaths = df_first_100_infections.merge(df_first_100_deaths, how="outer")

df_infections_deaths.head()

Unnamed: 0,Date_first_100_infections,Infections,State,Date_first_100_deaths,Deaths
0,2020-04-05,112,Guam,,
1,2020-03-28,100,Puerto Rico,2020-05-07,102.0
2,2020-07-04,111,Virgin Islands,,
3,2020-05-14,104,Alabama,2020-05-28,100.0
4,2020-04-08,103,Alaska,,


## 3. Working with policy data

- subset dataframe for "PublicMask" and "StayAtHome" policies
- policies must be mandatory and statewide
- find dates for when these policies were enacted


#### 3.1 Importing policy data

In [8]:
df_policies = pd.read_csv("raw_data/USstatesCov19distancingpolicy.csv", delimiter=";")
df_policies.head()

Unnamed: 0,location_id,StateFIPS,StatePostal,StateName,StatePolicy,Mandate,StateWide,DateIssued,DateEnacted,DateExpiry,DateEased,DateEnded,DateReexpanded1,PolicyCodingNotes,PolicySource,LastUpdated,LastUpdatedNotes
0,523,1,AL,Alabama,EmergDec,1,1,20200313,20200313,,,,,Public Health Emergency,https://governor.alabama.gov/newsroom/2020/03/...,20200324,
1,523,1,AL,Alabama,SchoolClose,1,1,20200314,20200318,20200731.0,20200601.0,,,in effect 20200318. 20200319 issuance also clo...,http://alabamapublichealth.gov/legal/assets/42...,20200701,Updated DateExpiry to 20200731
2,523,1,AL,Alabama,GathRestrict25,1,1,20200319,20200319,,,,,"In effect as of 5 pm 20200319. On 20200320, am...",https://governor.alabama.gov/assets/2020/03/Al...,20200420,Updated DateEnacted to 20200319 per state heal...
3,523,1,AL,Alabama,GathRestrictAny,1,1,20200319,20200319,20200731.0,20200511.0,,,"In effect as of 5 pm 20200319. On 20200320, am...",https://governor.alabama.gov/assets/2020/03/Al...,20200701,Updated DateExpiry to 20200731
4,523,1,AL,Alabama,OtherBusinessClose,1,1,20200319,20200319,20200731.0,20200430.0,,,All beaches close effective at 5 pm 20200319,https://governor.alabama.gov/assets/2020/03/Al...,20200701,Updated DateExpiry to 20200731


#### 3.2 Subsetting data for Public Mask policies per state where policy is mandatory and statewide

In [9]:
def subset_state_policy(df, policy):
    df = df.loc[(df["StatePolicy"] == policy) & (df["Mandate"] == 1) & (df["StateWide"] == 1)]
    df = df[["StateName", "DateEnacted", "DateEased", "DateEnded"]]
    
    # Converting date columns to datetime
    df["DateEnacted"] = pd.to_datetime(df["DateEnacted"], format="%Y%m%d")
    df["DateEased"] = pd.to_datetime(df["DateEased"], format="%Y%m%d")
    df["DateEnded"] = pd.to_datetime(df["DateEnded"], format="%Y%m%d")
    
    # Renaming columns to unique dates
    df.columns = ["State", policy + "_start", 
                  policy + "_eased", policy + "_ended"]

    df = df.drop_duplicates(subset=["State"], keep="first").reset_index(drop=True)

    return df
    

In [10]:
df_public_mask = subset_state_policy(df_policies, "PublicMask")
df_public_mask

Unnamed: 0,State,PublicMask_start,PublicMask_eased,PublicMask_ended
0,California,2020-06-18,NaT,NaT
1,Connecticut,2020-04-20,NaT,NaT
2,Delaware,2020-04-28,NaT,NaT
3,District of Columbia,2020-04-17,2020-06-22,NaT
4,Illinois,2020-05-01,NaT,NaT
5,Kansas,2020-07-03,NaT,NaT
6,Maine,2020-05-01,NaT,NaT
7,Maryland,2020-04-18,NaT,NaT
8,Massachusetts,2020-05-06,NaT,NaT
9,Michigan,2020-04-27,NaT,NaT


#### 3.3 Subsetting data for Stay at Home policies per state

In [11]:
df_stay_home = subset_state_policy(df_policies, "StayAtHome")
df_stay_home

Unnamed: 0,State,StayAtHome_start,StayAtHome_eased,StayAtHome_ended
0,Alabama,2020-04-04,NaT,2020-04-30
1,Alaska,2020-03-28,NaT,2020-04-24
2,Arizona,2020-03-31,NaT,2020-05-16
3,California,2020-03-19,NaT,NaT
4,Colorado,2020-03-26,2020-04-27,NaT
5,Delaware,2020-03-24,2020-06-01,NaT
6,District of Columbia,2020-04-01,NaT,2020-05-29
7,Florida,2020-04-03,2020-05-04,NaT
8,Georgia,2020-04-03,2020-05-01,NaT
9,Hawaii,2020-03-25,2020-06-10,NaT


## 4. Creating final data-set

- merge policy, infection and death data
- find date differences between first 100 cases (infections and deaths) and policy start dates
- find length of policy

#### 4.1 Merging policy dataframes with infections and death dataframe

In [12]:
df_mask_and_stay_home = df_public_mask.merge(df_stay_home, how="outer")
df_mask_and_stay_home.head(10)

Unnamed: 0,State,PublicMask_start,PublicMask_eased,PublicMask_ended,StayAtHome_start,StayAtHome_eased,StayAtHome_ended
0,California,2020-06-18,NaT,NaT,2020-03-19,NaT,NaT
1,Connecticut,2020-04-20,NaT,NaT,NaT,NaT,NaT
2,Delaware,2020-04-28,NaT,NaT,2020-03-24,2020-06-01,NaT
3,District of Columbia,2020-04-17,2020-06-22,NaT,2020-04-01,NaT,2020-05-29
4,Illinois,2020-05-01,NaT,NaT,2020-03-21,2020-05-29,NaT
5,Kansas,2020-07-03,NaT,NaT,2020-03-30,NaT,2020-05-04
6,Maine,2020-05-01,NaT,NaT,2020-04-02,2020-05-29,NaT
7,Maryland,2020-04-18,NaT,NaT,2020-03-30,NaT,2020-05-15
8,Massachusetts,2020-05-06,NaT,NaT,NaT,NaT,NaT
9,Michigan,2020-04-27,NaT,NaT,2020-03-24,NaT,2020-06-01


In [13]:
df_final = df_infections_deaths.merge(df_mask_and_stay_home, on="State", how="left")
df_final.head()

Unnamed: 0,Date_first_100_infections,Infections,State,Date_first_100_deaths,Deaths,PublicMask_start,PublicMask_eased,PublicMask_ended,StayAtHome_start,StayAtHome_eased,StayAtHome_ended
0,2020-04-05,112,Guam,,,NaT,NaT,NaT,NaT,NaT,NaT
1,2020-03-28,100,Puerto Rico,2020-05-07,102.0,NaT,NaT,NaT,NaT,NaT,NaT
2,2020-07-04,111,Virgin Islands,,,NaT,NaT,NaT,NaT,NaT,NaT
3,2020-05-14,104,Alabama,2020-05-28,100.0,NaT,NaT,NaT,2020-04-04,NaT,2020-04-30
4,2020-04-08,103,Alaska,,,NaT,NaT,NaT,2020-03-28,NaT,2020-04-24


In [14]:
df_final["Date_first_100_infections"] = pd.to_datetime(df_final["Date_first_100_infections"])

In [15]:
df_final["Date_first_100_deaths"] = pd.to_datetime(df_final["Date_first_100_deaths"])

In [16]:
# Number of days difference from first 100 infections to Stay Home policy enactment

df_final = df_final.assign(StayAtHome_Delta = pd.to_timedelta(np.where(df_final["StayAtHome_start"].notna(), 
                                                        df_final["StayAtHome_start"] - df_final["Date_first_100_infections"], 
                                                        pd.NA)))

In [17]:
# Number of days difference from first 100 infections to Public Mask policy enactment

df_final = df_final.assign(PublicMask_Delta = pd.to_timedelta(np.where(df_final["PublicMask_start"].notna(), 
                         df_final["PublicMask_start"] - df_final["Date_first_100_infections"], 
                         pd.NA)))

In [18]:
# Number of days difference from first 100 deaths to Stay Home policy enactment

df_final = df_final.assign(StayAtHome_deaths_Delta = pd.to_timedelta(np.where(df_final["StayAtHome_start"].notna(), 
                                                        df_final["StayAtHome_start"] - df_final["Date_first_100_deaths"], 
                                                        pd.NA)))

In [19]:
# Number of days difference from first 100 infections to Public Mask policy enactment

df_final = df_final.assign(PublicMask_deaths_Delta = pd.to_timedelta(np.where(df_final["PublicMask_start"].notna(), 
                         df_final["PublicMask_start"] - df_final["Date_first_100_deaths"], 
                         pd.NA)))

In [20]:
# Length of Public Mask Policy of enactment to easing

df_final = df_final.assign(PublicMask_policy_eased = pd.to_timedelta(np.where(df_final["PublicMask_start"].notna(), 
                         df_final["PublicMask_eased"] - df_final["PublicMask_start"], 
                         pd.NA)))

In [21]:
# Length of Stay at Home Policy of enactment to easing

df_final = df_final.assign(StayAtHome_policy_eased = pd.to_timedelta(np.where(df_final["StayAtHome_start"].notna(), 
                         df_final["StayAtHome_eased"] - df_final["StayAtHome_start"], 
                         pd.NA)))

In [22]:
# Length of Public Mask Policy of enactment to ending

df_final = df_final.assign(PublicMask_policy_days_ended = pd.to_timedelta(np.where(df_final["PublicMask_start"].notna(), 
                         df_final["PublicMask_ended"] - df_final["PublicMask_start"], 
                         pd.NA)))

In [23]:
# Length of Stay at Home Policy of enactment to ending

df_final = df_final.assign(StayAtHome_policy_days_ended = pd.to_timedelta(np.where(df_final["StayAtHome_start"].notna(), 
                         df_final["StayAtHome_ended"] - df_final["StayAtHome_start"], 
                         pd.NA)))

In [24]:
df_final.head(30)

Unnamed: 0,Date_first_100_infections,Infections,State,Date_first_100_deaths,Deaths,PublicMask_start,PublicMask_eased,PublicMask_ended,StayAtHome_start,StayAtHome_eased,StayAtHome_ended,StayAtHome_Delta,PublicMask_Delta,StayAtHome_deaths_Delta,PublicMask_deaths_Delta,PublicMask_policy_eased,StayAtHome_policy_eased,PublicMask_policy_days_ended,StayAtHome_policy_days_ended
0,2020-04-05,112,Guam,NaT,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
1,2020-03-28,100,Puerto Rico,2020-05-07,102.0,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
2,2020-07-04,111,Virgin Islands,NaT,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
3,2020-05-14,104,Alabama,2020-05-28,100.0,NaT,NaT,NaT,2020-04-04,NaT,2020-04-30,-40 days,NaT,-54 days,NaT,NaT,NaT,NaT,26 days
4,2020-04-08,103,Alaska,NaT,,NaT,NaT,NaT,2020-03-28,NaT,2020-04-24,-11 days,NaT,NaT,NaT,NaT,NaT,NaT,27 days
5,2020-04-15,110,Arizona,2020-04-23,115.0,NaT,NaT,NaT,2020-03-31,NaT,2020-05-16,-15 days,NaT,-23 days,NaT,NaT,NaT,NaT,46 days
6,2020-04-29,101,Arkansas,NaT,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT
7,2020-03-23,118,California,2020-06-05,101.0,2020-06-18,NaT,NaT,2020-03-19,NaT,NaT,-4 days,87 days,-78 days,13 days,NaT,NaT,NaT,NaT
8,2020-03-29,110,Colorado,2020-05-16,102.0,NaT,NaT,NaT,2020-03-26,2020-04-27,NaT,-3 days,NaT,-51 days,NaT,NaT,32 days,NaT,NaT
9,2020-03-19,102,Connecticut,2020-04-06,101.0,2020-04-20,NaT,NaT,NaT,NaT,NaT,NaT,32 days,NaT,14 days,NaT,NaT,NaT,NaT


In [25]:
df_final.to_csv("data/mask_and_lockdown_policy_data.csv", index_label="index")