In [187]:
import pandas as pd
import glob
import re
from zipfile import ZipFile

In [188]:
def init_dataframe_from_zip(path, index_col):
    df_master = pd.DataFrame()
    flag = False
    files_in_folder = glob.glob(path)
    for filename in files_in_folder:    
        zip_file = ZipFile(filename)
        for text_file in zip_file.infolist():
            if text_file.filename.endswith('.csv'):
                df = pd.read_csv(zip_file.open(text_file.filename),header=0,index_col=[index_col])
            if not flag:
                df_master = df
                flag = True
            else:
                df_master = pd.concat([df_master, df])
    return df_master

csse_covid_19_daily_reports_us = init_dataframe_from_zip(r'**csv_files/csse_covid_19_daily_reports_us.zip', 'UID')
csse_covid_19_daily_reports_us.head()


Unnamed: 0_level_0,Province_State,Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active,FIPS,Incident_Rate,Total_Test_Results,People_Hospitalized,Case_Fatality_Ratio,ISO3,Testing_Rate,Hospitalization_Rate
UID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
84000001.0,Alabama,US,2022-01-03 02:33:49,32.3182,-86.9023,909541,16455,,,1.0,18550.003722,6562722.0,,1.809154,USA,133846.102074,
84000002.0,Alaska,US,2022-01-03 02:33:49,61.3707,-152.4044,157169,978,,,2.0,21484.529318,3685702.0,,0.62226,USA,503824.371706,
16.0,American Samoa,US,2022-01-03 02:33:49,-14.271,-170.132,10,0,,,60.0,17.972359,2140.0,,0.0,ASM,3846.084722,
84000004.0,Arizona,US,2022-01-03 02:33:49,33.7298,-111.4312,1390409,24355,,,4.0,19102.391259,15679982.0,,1.751643,USA,215422.333359,
84000005.0,Arkansas,US,2022-01-03 02:33:49,34.9697,-92.3731,572822,9196,,,5.0,18981.418276,4441472.0,,1.605385,USA,147175.628371,


In [193]:
def init_state_social_distancing_actions(path):
    df_master = pd.DataFrame()
    flag = False
    files_in_folder = glob.glob(path)
    for filename in files_in_folder:    
        zip_file = ZipFile(filename)
        for text_file in zip_file.infolist():
            # if not text_file.filename.startswith('__MACOSX/'):
            if text_file.filename.endswith('.csv'):
                date = re.search('\d*-\d*-\d*', text_file.filename)[0]
                if date == "20201-06-01":
                    date = "2021-06-01"
                date_time_value = pd.to_datetime(date)
                df = pd.read_csv(zip_file.open(text_file.filename), sep=",", header=0)
                df["Date"] = date_time_value
                df.rename(columns = {'Unnamed: 0':'State'}, inplace = True)
                df.drop((df[df.State.isin(["United States"])].index) | (df[df.State.isnull()].index), inplace=True)
            if not flag:
                df_master = df
                flag = True
            else:
                df_master = pd.concat([df_master, df])
    df_master.set_index(["Date", "State"], inplace=True)
    df_master.sort_index(inplace=True)
    return df_master

state_social_distancing_actions = init_state_social_distancing_actions(r'**csv_files/state_social_distancing_actions.zip')

In [194]:
def clean_state_social_distancing_actions(df):
  # TODO: Remove cells we don't want
  df = df.drop(columns=["Primary Election Postponement"])
  return df

cleaned_state_social_distancing_actions = clean_state_social_distancing_actions(state_social_distancing_actions)
cleaned_state_social_distancing_actions

Unnamed: 0_level_0,Unnamed: 1_level_0,State Is Easing Social Distancing Measures,Stay at Home Order,Mandatory Quarantine for Travelers,Non-Essential Business Closures,Large Gatherings Ban,School Closures,Restaurant Limits,Emergency Declaration,Face Covering Requirement,Status of Reopening,Bar Closures,Bar Closures*,Statewide Face Mask Requirement
Date,State,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-06-04,Alabama,Yes,Lifted,-,All Non-Essential Businesses Permitted to Reop...,Lifted,Closed for School Year,Reopened to Dine-in Service,Yes,,,,,
2020-06-04,Alaska,Yes,Lifted,All Travelers,All Non-Essential Businesses Permitted to Reopen,Lifted,Closed for School Year,Reopened to Dine-in Service,Yes,,,,,
2020-06-04,Arizona,Yes,Lifted,Lifted,All Non-Essential Businesses Permitted to Reop...,Lifted,Closed for School Year,Reopened to Dine-in Service with Capacity Limits,Yes,,,,,
2020-06-04,Arkansas,Yes,-,From Certain States,-,>10 People Prohibited,Closed for School Year,Reopened to Dine-in Service with Capacity Limits,Yes,,,,,
2020-06-04,California,Yes,Statewide,-,Some Non-Essential Businesses Permitted to Reo...,All Gatherings Prohibited,Recommended Closure for School Year,Closed Except for Takeout/Delivery,Yes,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-09,Virginia,,,,,,,,No,,Reopened,,,No
2021-11-09,Washington,,,,,,,,Yes,,Reopened,,,Indoor Only
2021-11-09,West Virginia,,,,,,,,Yes,,Reopened,,,No
2021-11-09,Wisconsin,,,,,,,,No,,Reopened,,,No


In [195]:
california_data = cleaned_state_social_distancing_actions[cleaned_state_social_distancing_actions.index.get_level_values('State').isin(['California'])]
california_data

Unnamed: 0_level_0,Unnamed: 1_level_0,State Is Easing Social Distancing Measures,Stay at Home Order,Mandatory Quarantine for Travelers,Non-Essential Business Closures,Large Gatherings Ban,School Closures,Restaurant Limits,Emergency Declaration,Face Covering Requirement,Status of Reopening,Bar Closures,Bar Closures*,Statewide Face Mask Requirement
Date,State,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-06-04,California,Yes,Statewide,-,Some Non-Essential Businesses Permitted to Reo...,All Gatherings Prohibited,Recommended Closure for School Year,Closed Except for Takeout/Delivery,Yes,,,,,
2020-06-05,California,Yes,Statewide,-,Some Non-Essential Businesses Permitted to Reo...,All Gatherings Prohibited,Recommended Closure for School Year,Closed Except for Takeout/Delivery,Yes,,,,,
2020-06-08,California,Yes,Statewide,-,Some Non-Essential Businesses Permitted to Reo...,All Gatherings Prohibited,Recommended Closure for School Year,Closed Except for Takeout/Delivery,Yes,,,,,
2020-06-12,California,Yes,Statewide,-,Some Non-Essential Businesses Permitted to Reo...,All Gatherings Prohibited,Recommended Closure for School Year,Closed Except for Takeout/Delivery,Yes,,,,,
2020-06-15,California,Yes,Statewide,-,Some Non-Essential Businesses Permitted to Reo...,All Gatherings Prohibited,Recommended Closure for School Year,Closed Except for Takeout/Delivery,Yes,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-10-05,California,,,,,,,,Yes,,Reopened,,,Unvaccinated People Only
2021-10-12,California,,,,,,,,Yes,,Reopened,,,Unvaccinated People Only
2021-10-19,California,,,,,,,,Yes,,Reopened,,,Unvaccinated People Only
2021-11-02,California,,,,,,,,Yes,,Reopened,,,Unvaccinated People Only
