In [None]:
import datetime
import math

import numpy
import pandas
import matplotlib.pyplot as plt

from common import calc_mid_weekly_average

In [None]:
# Earliest date that there is sufficient data for all states, including MA
earliest_date = pandas.Period('2020-03-10', freq='D')

# Set a latest date when the most recent days have garbage (like on or after holidays)
latest_date = None
if not latest_date:
    latest_date = pandas.Period((datetime.datetime.now() - datetime.timedelta(hours=19)).date(), freq='D')

In [None]:
raw_us_stats = pandas.read_csv('https://covidtracking.com/api/v1/us/daily.csv', low_memory=False)

In [None]:
# Remove territories
us_stats = raw_us_stats.copy()
us_stats.date = [pandas.Period(str(v)) for v in us_stats.date]

# Choose and rename a subset of columns
us_stats = us_stats[['date', 'hospitalizedCurrently', 'inIcuCurrently']]
us_stats.columns = ['Date', 'HospitalRaw', 'ICURaw']

us_stats = us_stats[us_stats.Date >= earliest_date]
us_stats = us_stats[us_stats.Date <= latest_date]
us_stats = us_stats.set_index(['Date']).sort_index()

# us_stats['Hospital'], us_stats['Hospital7'] = calc_mid_weekly_average(us_stats.HospitalRaw.cumsum())
# us_stats['ICU'], us_stats['ICU7'] = calc_mid_weekly_average(us_stats.ICURaw.cumsum())
us_stats['Hospital5'] = us_stats.HospitalRaw.rolling(window=3, center=True, min_periods=1).mean()
us_stats['ICU5'] = us_stats.ICURaw.rolling(window=3, center=True, min_periods=1).mean()

# us_stats.Hospital5 = us_stats.Hospital5.rolling(window=3, center=True, min_periods=2).mean()
# us_stats.ICU5 = us_stats.ICU5.rolling(window=3, center=True, min_periods=2).mean()

In [None]:
us_stats.loc['2021-01-05':, :]

In [None]:
foo = us_stats[['Hospital5', 'ICU5']].iloc[200:].copy()
foo.columns = ['Hospitalizations', 'ICU Beds']
fam = foo.plot(
    title="Current Hospitalizations and ICU Beds", secondary_y='ICU Beds', figsize=(15,5),
)

In [None]:
meta = pandas.read_csv('nyt_states_meta.csv')
meta = meta.set_index('ST')
meta.tail()

In [None]:
raw_st_stats = pandas.read_csv('https://covidtracking.com/api/v1/states/daily.csv', low_memory=False)
raw_st_stats.tail(3)

In [None]:
st_stats = raw_st_stats[['date', 'state', 'hospitalizedCurrently', 'inIcuCurrently']]
st_stats.columns = ['Date', 'ST', 'HospitalRaw', 'ICURaw']
st_stats = st_stats[~st_stats.ST.isin(['AS', 'GU', 'MP', 'PR', 'VI'])].copy()
st_stats = st_stats.set_index(['ST', 'Date'])
st_stats = st_stats.join(meta)
st_stats.tail()

In [None]:
raw_st_stats.tail()

In [None]:
# Remove territories
st_stats = raw_st_stats[~raw_st_stats.state.isin(['AS', 'GU', 'MP', 'PR', 'VI'])].copy()
st_stats.date = [pandas.Period(str(v)) for v in st_stats.date]

# Choose and rename a subset of columns
st_stats = st_stats[['date', 'state', 'hospitalizedCurrently', 'inIcuCurrently']]
st_stats.columns = ['Date', 'ST', 'HospitalRaw', 'ICURaw']

st_stats = st_stats[st_stats.Date >= earliest_date]
st_stats = st_stats[st_stats.Date <= latest_date]
st_stats = st_stats.set_index(['ST', 'Date']).sort_index()

st_stats = st_stats.join(meta[['Pop']])
st_stats['HospPerM'] = st_stats.HospitalRaw / st_stats.Pop
st_stats['HospPerM5'] = 1.0
st_stats.tail(10)
# us_stats['Hospital5'] = us_stats.HospitalRaw.rolling(window=5, center=True, min_periods=2).mean()
# us_stats['ICU5'] = us_stats.ICURaw.rolling(window=5, center=True, min_periods=2).mean()

In [None]:
st_stats.loc['NY', :].tail(30)

In [None]:
threshold = 525
states = []
dfs = []
foo = st_stats.reset_index()
foo = foo[foo.Date >= '2020-09-01'].copy()
for st, df in foo.groupby('ST'):
    df.HospPerM5 = df.HospPerM.rolling(window=5, center=True, min_periods=2).mean()
    max_ = df.HospPerM.max()
    latest = df.HospPerM.iloc[-1]
    diff = ((max_ - latest) / max_) * 100.0
    print(f"{st}, {max_}, {latest}, {diff:.2f}")
    # if (max_ > threshold) or (st in ['NY', 'FL']):
    if st in ['CA', 'NY', 'TX', 'FL', 'PA', 'AZ', 'NJ', 'IL', 'MA',]:
        states.append(st)
        dfs.append(df.copy())
fam = pandas.concat(dfs)

In [None]:
__ = pandas.pivot_table(fam, values = 'HospPerM5', index=['Date'],
                        columns = 'ST').plot(title="Current Hospitalizations per Million (5-day smooth)",
                                             figsize=(15,5))

In [None]:
# threshold = 525
# st_names = ['AL', 'AZ', 'CT', 'FL', 'GA', 'IN', 'MA', 'MI', 'MO', 'MS', 'NC',
#             'ND', 'NJ', 'NV', 'OH', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'VA']
# states = []
# dfs = []
# foo = st_stats.reset_index().copy()
# for st, df in foo.groupby('ST'):
#     df.HospPerM5 = df.HospPerM.rolling(window=5, center=True, min_periods=2).mean()
#     if st in st_names:
#         states.append(st)
#         dfs.append(df.copy())
# fam = pandas.concat(dfs)

# num_plots = max(len(st_names), 2)
# fig, axes = plt.subplots(num_plots, figsize=(15, 5*num_plots))
# for i, st in enumerate(st_names):
#     data = fam[fam.ST == st].reset_index()[['Date', 'HospPerM5']].copy()
#     data = data[data.Date >= '2020-11-01']
#     __ = data.groupby('Date').sum().plot(
#         ax=axes[i], title=st, ylim=0, legend=None, xlabel=None,
#     )

In [None]:
threshold = 525
st_names = ['AL', 'AZ', 'CT', 'FL', 'GA', 'IN', 'MA', 'MI', 'MO', 'MS', 'NC',
            'ND', 'NJ', 'NV', 'OH', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'VA']
states = []
dfs = []
foo = st_stats.reset_index().copy()
for st, df in foo.groupby('ST'):
    df.HospPerM5 = df.HospPerM.rolling(window=5, center=True, min_periods=2).mean()
    if st in st_names:
        states.append(st)
        dfs.append(df.copy())
fam = pandas.concat(dfs)

num_plots = len(st_names)
data = fam[fam.Date >= '2020-11-01'].copy()
#         ax=axes[i], title=st, ylim=0, legend=None, xlabel=None,
fam = pandas.pivot_table(data, values = 'HospPerM5', index=['Date'],
                     columns = 'ST').plot(title="Hospitalizations",
                                          subplots=True, layout=(num_plots, 1), ylim=0,
                                         figsize=(15, num_plots*5))
