# Setup

In [None]:
import pandas as pd
import numpy as np

import os
import glob
import requests

import seaborn as sns
import matplotlib.pyplot as plt

import geopandas as gp

# Import us state abbreviation dict
%run scripts/us_state_abbrev.py

## Import the preprocessed dataframes

In [None]:
#Note: If you do not have the preprocessed files in the ../data/processed directory things will not work and some states will be missing.
# You need to run the state file to make this happen!

byCountyDf = pd.concat(map(pd.read_csv,  glob.glob('../data/processed/by_county/*')))
byStateDf = pd.concat(map(pd.read_csv,  glob.glob('../data/processed/by_state/*')))

In [None]:
# CDC Death data
excessCdcDeathsDf = pd.read_csv("https://data.cdc.gov/api/views/muzy-jte6/rows.csv?accessType=DOWNLOAD")

# tests from https://covidtracking.com/data
df_tracker = pd.read_csv("https://covidtracking.com/data/download/all-states-history.csv")

## Setup data types for dataframes

In [None]:
byCountyDf['Date']       = pd.to_datetime(byCountyDf['Date'])
byCountyDf['State']      = byCountyDf['State'].astype('category')
byCountyDf['County']     = byCountyDf['County'].astype('category')
byCountyDf["WeekNumber"] = byCountyDf["Date"].dt.isocalendar().week

byStateDf['Date']        = pd.to_datetime(byStateDf['Date'])
byStateDf['State']       = byStateDf['State'].astype('category')
byStateDf["WeekNumber"]  = byStateDf["Date"].dt.isocalendar().week

excessCdcDeathsDf["Week Ending Date"] = pd.to_datetime( excessCdcDeathsDf["Week Ending Date"])
excessCdcDeathsDf["WeekNumber"] = excessCdcDeathsDf["Week Ending Date"].dt.isocalendar().week

df_tracker["date"] = pd.to_datetime(df_tracker["date"])
df_tracker["WeekNumber"] = df_tracker["date"].dt.isocalendar().week

## Additional setups

In [None]:
states = { state: us_state_abbrev[state] for state in byStateDf['State'].unique() }
states

# Analysis
## Compare with COVID Tracker

In [None]:
fig, axes = plt.subplots(figsize = (20, 7), nrows = 1, ncols = 3, sharey=True)
for state, st, ax in zip(list(states.keys()), list(states.values()), axes):
    byStateDf[ byStateDf["State"] == state].groupby("Date")["Deaths"].sum().plot(ax = ax, label = "state")
    df_tracker[ df_tracker["state"] == st].groupby("date")["deathIncrease"].sum().plot(ax=ax, label = "tracker")
    ax.set_title(state)
axes[0].legend()
axes[0].set_ylabel("Deaths")
plt.tight_layout()

In [None]:
fig, axes = plt.subplots(figsize = (20, 7), nrows = 1, ncols = 3, sharey=True)
for state, st, ax in zip(list(states.keys()), list(states.values()), axes):
    byStateDf[ byStateDf["State"] == state].groupby("Date")["Deaths"].sum().cumsum().plot(ax = ax, label = "state")
    df_tracker[ df_tracker["state"] == st].groupby("date")["death"].sum().plot(ax=ax, label = "tracker")
    ax.set_title(state)
axes[0].legend()
axes[0].set_ylabel("Cummulative Deaths")
plt.tight_layout()

## Compare Deaths statistics

In [None]:
for st in byCountyDf["State"].unique():
    byStateDf[ byStateDf["State"] == st].groupby("WeekNumber")["Deaths"].sum().plot(label=st)
plt.legend()

In [None]:
st = "Ohio"
fig = plt.figure(figsize = (10,10))
byStateDf[ byStateDf["State"] == st].groupby("WeekNumber")["Deaths"].sum().plot(label="COVID")
for cause in [
    'Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified (R00-R99)',
    'Malignant neoplasms (C00-C97)',
    'Alzheimer disease (G30)',
    "Diseases of heart (I00-I09,I11,I13,I20-I51)",
    "Influenza and pneumonia (J09-J18)",
    'COVID-19 (U071, Multiple Cause of Death)']:
    data = excessCdcDeathsDf[ excessCdcDeathsDf["Jurisdiction of Occurrence"]==st]\
        .groupby("WeekNumber")[cause].mean().rolling(window=3).mean()
    data.plot()
plt.ylim(0, 1200)
plt.legend()
plt.grid()
plt.title(st)
plt.show()

In [None]:
fig, axes = plt.subplots(figsize = (20, 10), nrows = 1, ncols = 3, sharey=True)
for st, ax in zip(["Michigan","Ohio",  "Indiana"], axes):
    byStateDf[ byStateDf["State"] == st].groupby("WeekNumber")["Deaths"].sum().plot(label="COVID", ax=ax)
    for cause in [
        'Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified (R00-R99)',
        'Malignant neoplasms (C00-C97)',
        'Alzheimer disease (G30)',
        "Diseases of heart (I00-I09,I11,I13,I20-I51)",
        "Influenza and pneumonia (J09-J18)"]:
        data = excessCdcDeathsDf[ excessCdcDeathsDf["Jurisdiction of Occurrence"]==st]\
            .groupby("WeekNumber")[cause].mean().rolling(window=2).mean()
        data.plot(ax=ax)
    ax.set_ylim(0)
    ax.set_title(st)
axes[-1].legend()
plt.show()