In [None]:
import pandas as pd
import numpy as np
import glob
import re
import matplotlib.pyplot as plt
import seaborn as sns
from zipfile import ZipFile
import sys
!{sys.executable} -m pip install xlrd

In [None]:
def init_restriction_df(path, index_col):
    df_master = pd.read_csv(glob.glob(path)[0], compression='zip', header=0, sep=',', index_col=index_col)
    return df_master

restrictions_df = init_restriction_df(r'csv_files/updated_restrictions.csv.zip', ["STATE"])
restrictions_df = restrictions_df[restrictions_df.index.notnull()]
restrictions_df.replace("0", np.nan, inplace=True)
restrictions_df


In [None]:
restrictions_df = restrictions_df.loc["California":] 
masks_df = restrictions_df.loc[:, ["FM_ALL", "FM_ALL2", "FM_END", "FM_END2"]] # Here there are more fields that one could look at
day_care_df = restrictions_df.loc[:, ["CLDAYCR", "OPNCLDCR"]] # Dataset don't include school reopening
stay_at_home_df = restrictions_df.loc[:, ["STAYHOME", "END_STHM"]]
close_businesses = restrictions_df.loc[:, ["CLBSNS", "END_BSNS"]]
double_restrictions = restrictions_df.loc[:, ["CLREST", "ENDREST", "CLRST2", "ENDREST2", "CLGYM", "ENDGYM", "CLGYM2", "END_CLGYM2", "CLMOVIE", "END_MOV", "CLMV2", "END_CLMV2", "CLOSEBAR", "END_BRS", "BCLBAR2", "END_BRS2"]]


In [None]:
start_date = "2020-06-04"
end_date = "2022-03-28"

def add_missing_data(df, start, end):
  days_idx = pd.date_range(start=start, end=end, freq="D")
  days_with_missing_data = df.reindex(days_idx, fill_value=0)
  days_with_missing_data.index.names = ['date']
  df = days_with_missing_data.append(df)
  return df

In [None]:
single_restr = restrictions_df.loc[:, ["CLDAYCR", "OPNCLDCR", "STAYHOME", "END_STHM", "CLBSNS", "END_BSNS"]]

single_restrictions = pd.DataFrame({})

for i, j in zip(range(0, len(single_restr.columns), 2), range(1, len(single_restr.columns)+1, 2)):
  start_day = single_restr.columns[i]
  end_day = single_restr.columns[j]
  dates = single_restr.loc["California", [start_day, end_day]]
  
  if (isinstance(dates[0], str) and isinstance(dates[1], str)):
    new_df = pd.DataFrame({'date': pd.date_range(dates[0], dates[1]), 
                    single_restr.columns[i]: 1
                    }).set_index("date")
    fill_start = add_missing_data(new_df, start_date, dates[0]) # now we will overwrite the first and last date
    fill_end = add_missing_data(fill_start, dates[1], end_date)#.loc["2020-06-04" : "2022-03-28"]
    fill_end = fill_end[~fill_end.index.duplicated(keep='first')]
    single_restrictions = pd.concat([single_restrictions, fill_end], axis=1)
single_restrictions.fillna(0)
single_restrictions.rename(columns={"STAYHOME": "Stay at home order", "CLBSNS": "Closed other non-essential businesses", "CLDAYCR": "Closed day cares"}, inplace=True)
single_restrictions

In [None]:
double_restrictions_conv = pd.DataFrame({})

for i in range(0, len(double_restrictions.columns), 4):
  start_day_first = double_restrictions.columns[i]
  end_day_first = double_restrictions.columns[i+1]
  start_day_second = double_restrictions.columns[i+2]
  end_day_second = double_restrictions.columns[i+3]
  dates_first = double_restrictions.loc["California", [start_day_first, end_day_first]]
  dates_second = double_restrictions.loc["California", [start_day_second, end_day_second]]
  
  if (isinstance(dates_first[0], str) and isinstance(dates_first[1], str)):
    new_df = pd.DataFrame({'date': pd.date_range(dates_first[0], dates_first[1]), 
                    double_restrictions.columns[i]: 1
                    }).set_index("date")
    fill_start = add_missing_data(new_df, start_date, dates_first[0]) # now we might overwrite the first and last date
    fill_end = add_missing_data(fill_start, dates_first[1], end_date)#.loc["2020-06-04" : "2022-03-28"]
    fill_end = fill_end[~fill_end.index.duplicated(keep='first')]

    if (isinstance(dates_second[0], str) and isinstance(dates_second[1], str)):
      fill_end.loc[dates_second[0]:dates_second[1], double_restrictions.columns[i]] = 1

    double_restrictions_conv = pd.concat([double_restrictions_conv, fill_end], axis=1)

double_restrictions_conv = double_restrictions_conv.fillna(0)
double_restrictions_conv.rename(columns={"CLREST": "Closed restaurants", "CLGYM": "Closed gym", "CLMOVIE": "Closed movie theaters", "CLOSEBAR": "Closed bars"}, inplace=True)
double_restrictions_conv


In [None]:

combined_restrictions = pd.concat([double_restrictions_conv, single_restrictions], axis=1)
sns.heatmap(combined_restrictions==1, yticklabels = False, cbar = False, cmap='viridis');
