In [None]:
%matplotlib inline
import pandas
import matplotlib
import matplotlib.pyplot as plt

from common import load_data, calc_state_stats, get_infections_df

In [None]:
EARLIEST_DATE = pandas.Period('2020-03-12', freq='D')
LATEST_DATE = pandas.Period('2020-08-15', freq='D')
LATEST_DATE = None

In [None]:
latest_date, meta, nyt_stats, ct_stats = load_data(EARLIEST_DATE, LATEST_DATE)
print(f"Latest date = {str(latest_date)}")

In [None]:
ct1 = ct_stats.set_index(['ST', 'Date']).sort_index()[['Pos', 'Neg', 'Tests']]
nyt1 = nyt_stats.set_index(['ST', 'Date']).sort_index()[['Deaths']]
stats = ct1.join(nyt1)
meta_tmp = meta.set_index('ST')
states = [calc_state_stats(state, df, meta_tmp)
          for state, df in stats.reset_index().groupby('ST')]
stats = pandas.concat(states).reset_index()

In [None]:
stats.tail(2)

### Group on a category from the state metadata and calculate stats from it

In [None]:
states_filter = ['NY', 'MA', 'CA', 'AZ', 'TX', 'GA', 'FL']
states_filter = ['NY', 'MA', 'NJ', 'CT', 'RI', 'TX', 'FL']
states_filter = ['NY']
states_filter = ['NY', 'NJ', 'MA', 'TX', 'FL', 'AZ', 'GA']
states_filter = ['CA', 'TX', 'FL', 'AZ', 'GA']
states_filter = ['AL', 'FL', 'MS', 'NV', 'SC', 'TX']
states_filter = ['AL', 'AZ', 'CA', 'FL', 'LA', 'MS', 'NJ', 'NV', 'SC', 'TX']
states_filter = ['NJ']
states_filter = ['AL', 'AZ', 'CA', 'FL', 'GA', 'LA', 'MS', 'NJ', 'NV', 'SC', 'TX']
states_filter = ['AL', 'AZ', 'FL', 'GA', 'LA', 'MS', 'NV', 'SC', 'TX']
states_filter = None

MIN_DATE = '2020-05-01'
MIN_DATE = '2020-05-15'
MIN_DATE = None

# Choose either 'Vote2016', 'Gov', 'Region', or 'Voting'
grouper = 'Vote2016'
grouper = 'Gov'
grouper = 'ST'
grouper = 'Voting'
grouper = 'Country'
grouper = 'Region'

if states_filter and (grouper == 'ST'):
    spaz = stats[stats.ST.isin(states_filter)].copy()
else:
    spaz = stats.copy()

if MIN_DATE:
    spaz = spaz[spaz.Date >= MIN_DATE]

# Focus on testing and deaths, starting on a date when there are
# relevant numbers for all states
nyt = spaz[['State', 'Date', 'Deaths', grouper]]
if grouper == 'ST':
    ct = spaz[['Date', 'Pos', 'Neg', grouper]]
else:
    ct = spaz[['ST', 'Date', 'Pos', 'Neg', grouper]]

# Group by date and grouper to sum up other columns by date/political party
nyt = nyt.groupby(['Date', grouper]).sum().sort_index()
ct = ct.groupby(['Date', grouper]).sum().sort_index()

# Add a column with total population of states for each party
nyt = nyt.join(meta[['Pop', grouper]].groupby(grouper).sum())
ct = ct.join(meta[['Pop', grouper]].groupby(grouper).sum())

# Calculate per-capita values
nyt['DMill'] = nyt.Deaths / nyt.Pop
ct['PctPos'] = ct.Pos / (ct.Pos + ct.Neg)

# Calculate a few values based on 7-day smoothing
shift_days = int(7*len(nyt.index.levels[1]))

nyt['Daily'] = (nyt.Deaths - nyt.shift(shift_days//7).Deaths)
nyt7 = nyt.shift(shift_days)[['Deaths']]
nyt['Deaths7'] = (nyt.Deaths - nyt7.Deaths) / 7
nyt['DMill7'] = (nyt.Deaths7 / nyt.Pop)

ct7 = ct.shift(shift_days)[['Pos', 'Neg']]
ct['NRatio'] = (ct.Neg - ct7.Neg) / (ct.Pos - ct7.Pos)

# st.tail(6)
nyt.tail(5)

## Now for the charts...

In [None]:
foo = pandas.pivot_table(ct, values = 'NRatio', index=['Date'],
                         columns = grouper).plot(title="Neg/Pos Ratio (over prior week)", figsize=(10,4))

In [None]:
foo = pandas.pivot_table(nyt, values = 'Deaths7', index=['Date'],
                         columns = grouper).plot(title="Daily Deaths (weekly smoothing)",
                                                 figsize=(10,4))

In [None]:
nyt.tail()

In [None]:
foo = pandas.pivot_table(nyt.loc['2020-07-01':, :], 
                         values = 'DMill7', index=['Date'],
                         columns = grouper).plot(title="Daily Deaths Per Million (weekly smoothing)",
                                                 figsize=(10,5))

In [None]:
foo = pandas.pivot_table(nyt, values = 'Deaths', index=['Date'],
                         columns = grouper).plot(title="Total Deaths", figsize=(10,4))

In [None]:
foo = pandas.pivot_table(nyt, values = 'DMill', index=['Date'],
                         columns = grouper).plot(title="Cumulative Deaths/Million", figsize=(10,4))

In [None]:
foo = pandas.pivot_table(ct, values = 'PctPos', index=['Date'],
                         columns = grouper).plot(title="Cumulative Percent Tests Positive", figsize=(10,4))

In [None]:
for d in nyt_stats.groupby('Date').sum().sort_index(ascending=False).Deaths.values:
    print(d)

In [None]:
def print_st(st):
    for d in nyt_stats[nyt_stats.ST == st].sort_index(ascending=False).Deaths.values:
        print(d)

print_st('NY')

In [None]:
raise ValueError()

In [None]:
counties = pandas.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv')
counties[counties.county == 'Los Angeles'].tail()

In [None]:
la = counties[counties.county == 'Hennepin'][['date', 'cases', 'deaths']]
la.tail()

In [None]:
uri = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
foo = pandas.read_csv(uri)

In [None]:
fam = foo[foo.state == 'New York'][['date', 'county', 'cases', 'deaths']].copy()
fam = fam[fam.county == 'New York City']
fam.columns = ['Date', 'County', 'Cases', 'Deaths']
fam.Date = [pandas.Period(str(v)) for v in fam.Date]
fam = fam[fam.Date >= pandas.Period('2020-06-01', freq='D')]
fam = fam.set_index('Date').sort_index()
fam.tail()