In [None]:
byStateDf["State"].unique()
byStateDf

# Setup

In [None]:
import pandas as pd
import numpy as np

import os
import glob
import requests

import seaborn as sns
import matplotlib.pyplot as plt

import geopandas as gp

from zipfile import ZipFile
import urllib.request
from os import path

# Import us state abbreviation dict
%run scripts/us_state_abbrev.py

## Import the preprocessed dataframes

In [None]:
#Note: If you do not have the preprocessed files in the ../data/processed directory things will not work and some states will be missing.
# You need to run the state file to make this happen!

byCountyDf = pd.concat(map(pd.read_csv,  glob.glob('../data/processed/by_county/*')))
byStateDf = pd.concat(map(pd.read_csv,  glob.glob('../data/processed/by_state/*')))

In [None]:
# CDC Death data
excessCdcDeathsDf = pd.read_csv("https://data.cdc.gov/api/views/muzy-jte6/rows.csv?accessType=DOWNLOAD")

# tests from https://covidtracking.com/data
df_tracker = pd.read_csv("https://covidtracking.com/data/download/all-states-history.csv")

## Import GeoJSON files

In [None]:
zipPath = "../data/raw/hcc-survival.zip"
if not path.exists(zipPath):
    url = "https://github.com/deldersveld/topojson/archive/master.zip"
    exportIntermediatePath = '../data/raw/'
    urllib.request.urlretrieve(url, zipPath)
    with ZipFile(zipPath, 'r') as zippedFile:
        zippedFile.extractall(exportIntermediatePath)

## Setup data types for dataframes

In [None]:
byCountyDf['Date']       = pd.to_datetime(byCountyDf['Date'])
byCountyDf['State']      = byCountyDf['State'].astype('category')
byCountyDf['County']     = byCountyDf['County'].astype('category')
byCountyDf["WeekNumber"] = byCountyDf["Date"].dt.isocalendar().week

byStateDf['Date']        = pd.to_datetime(byStateDf['Date'])
byStateDf['State']       = byStateDf['State'].astype('category')
byStateDf["WeekNumber"]  = byStateDf["Date"].dt.isocalendar().week
byStateDf["Tests"]       = byStateDf["Positive"] + byStateDf["Negative"]

excessCdcDeathsDf["Week Ending Date"] = pd.to_datetime( excessCdcDeathsDf["Week Ending Date"])
excessCdcDeathsDf["WeekNumber"] = excessCdcDeathsDf["Week Ending Date"].dt.isocalendar().week

df_tracker["date"] = pd.to_datetime(df_tracker["date"])
df_tracker["WeekNumber"] = df_tracker["date"].dt.isocalendar().week

## Additional setups

In [None]:
states = { state: us_state_abbrev[state] for state in byStateDf['State'].unique() }
states

# Analysis
## Compare with COVID Tracker

In [None]:
fig, axes = plt.subplots(ncols = len(states), sharex=True, sharey=True, figsize = (12, 5))
for state, st, ax in zip(list(states.keys()), list(states.values()), axes):
    byStateDf[ byStateDf["State"] == state].groupby("Date")["Deaths"].sum().plot(ax = ax, label = "State Health Dep.")
    df_tracker[ df_tracker["state"] == st].groupby("date")["deathIncrease"].sum().plot(ax=ax, label = "COVIDTracking Proj.")
    ax.set_title(state)
axes[0].legend()
axes[0].set_ylabel("Deaths")
plt.tight_layout()
plt.savefig("../report/figs/raw_tracker_comp_nc.pdf")

In [None]:
fig, axes = plt.subplots(ncols = len(states), sharex=True, sharey=True, figsize = (12, 5))
for state, st, ax in zip(list(states.keys()), list(states.values()), axes):
    byStateDf[ byStateDf["State"] == state].groupby("Date")["Deaths"].sum().cumsum().plot(ax = ax, label = "State Health Dep.")
    df_tracker[ df_tracker["state"] == st].groupby("date")["death"].sum().plot(ax=ax, label = "COVIDTracking Proj.")
    ax.set_title(state)
axes[0].legend()
axes[0].set_ylabel("Cummulative Deaths")
plt.tight_layout()
plt.savefig("../report/figs/raw_tracker_comp_cum.pdf")

## Compare Deaths statistics

In [None]:
for st in byCountyDf["State"].unique():
    byStateDf[ byStateDf["State"] == st].groupby("WeekNumber")["Deaths"].sum().plot(label=st)
plt.legend()
plt.show()

In [None]:
st = "Ohio"
fig = plt.figure(figsize = (10,10))
byStateDf[ byStateDf["State"] == st].groupby("WeekNumber")["Deaths"].sum().plot(label="COVID-19")
for cause in [
    'Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified (R00-R99)',
    'Malignant neoplasms (C00-C97)',
    'Alzheimer disease (G30)',
    "Diseases of heart (I00-I09,I11,I13,I20-I51)",
    "Influenza and pneumonia (J09-J18)",
    'COVID-19 (U071, Multiple Cause of Death)']:
    data = excessCdcDeathsDf[ excessCdcDeathsDf["Jurisdiction of Occurrence"]==st]\
        .groupby("WeekNumber")[cause].mean().rolling(window=3).mean()
    data.plot()
plt.ylim(0, 1200)
plt.legend()
plt.grid()
plt.title(st)
plt.show()

In [None]:
fig, axes = plt.subplots(ncols = len(states), sharex=True, sharey=True, figsize = (12, 5))
for st, ax in zip(states.keys(), axes):
    byStateDf[ byStateDf["State"] == st].groupby("WeekNumber")["Deaths"].sum().plot(label="COVID-19", ax=ax)
    for cause in [
        'Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified (R00-R99)',
        'Malignant neoplasms (C00-C97)',
        'Alzheimer disease (G30)',
        "Diseases of heart (I00-I09,I11,I13,I20-I51)",
        "Influenza and pneumonia (J09-J18)"]:
        data = excessCdcDeathsDf[ excessCdcDeathsDf["Jurisdiction of Occurrence"]==st]\
            .groupby("WeekNumber")[cause].mean().rolling(window=2).mean()
        data.plot(ax=ax)
    ax.set_ylim(0)
    ax.set_title(st)
axes[-1].legend()
plt.savefig("../report/figs/weekly_deaths.pdf")
plt.show()


## Look at population changes

In [None]:
# from https://www.census.gov/data/datasets/time-series/demo/popest/2010s-state-total.html#par_textimage_1873399417
df_population = \
    pd.read_excel("https://www2.census.gov/programs-surveys/popest/tables/2010-2019/state/totals/nst-est2019-01.xlsx")

df_Population = pd.DataFrame(
    index = df_population.iloc[3:59,0].str.replace(".","").str.upper(), 
    columns =list(df_population.iloc[2,3:])
)
df_Population = pd.DataFrame( df_population.iloc[3:59, 3:] )
df_Population = df_Population.set_index(df_population.iloc[3:59,0].str.replace(".","").str.upper())
df_Population = df_Population.rename(columns=df_population.iloc[2,3:].astype(int))
df_Population.index.names = ["Territory"]

In [None]:
for state in states:
    pop_State = df_Population.loc[state.upper(),:]
    print(state + " population: %d +- %d ( %.2f %%)" % 
        ( pop_State.mean(), pop_State.std(), 100*pop_State.std()/pop_State.mean())
        )


## Testing

In [None]:
for state in states.keys():
    indStateDf = byStateDf[byStateDf["State"] == state]
    sns.lineplot(data = indStateDf, x="Date", y="Tests", label = "totalTestResults")
    sns.lineplot(data = indStateDf, x="Date", y="Positive", label = "positive")
    sns.lineplot(data = indStateDf, x="Date", y="Negative", label = "negative")
    plt.xticks(rotation = 90)
    ax.set_title(state)
    axes[0].legend()
    plt.show()

## GeoJSON

In [None]:
# Get the right states from https://github.com/deldersveld/topojson/tree/master/countries/us-states

geoJsonPaths = {}
for st in states.values():
    geoJsonPaths[st] = glob.glob('../data/raw/topojson-master/countries/us-states/' + st + "*")

In [None]:
for state, st in states.items():
    geoDataFrame = gp.read_file(geoJsonPaths[st][0])

    countyDataFrame = byCountyDf[byCountyDf["State"] == state].groupby(["County"]).sum()
    geoDataFrameMerged = geoDataFrame.merge(countyDataFrame, right_on="County", left_on="NAME")

    geoDataFrameMerged["center"] = geoDataFrameMerged["geometry"].centroid
    countyNames = geoDataFrameMerged.copy()
    countyNames.set_geometry("center", inplace=True)

    # Plot cases
    ax = geoDataFrameMerged.plot(column="Cases", legend=True, figsize=(15,10), cmap='YlOrRd')

    for x, y, label in zip(countyNames.geometry.x, countyNames.geometry.y, countyNames["NAME"]):
        plt.text(x, y, label, fontsize = 10)

    # Plot deaths
    ax = geoDataFrameMerged.plot(column="Deaths", legend=True, figsize=(15,10), cmap='YlOrRd')

    for x, y, label in zip(countyNames.geometry.x, countyNames.geometry.y, countyNames["NAME"]):
        plt.text(x, y, label, fontsize = 10)