# Unbiasing COVID-19 Case Count
Kartik Chugh - May 3, 2020

 <img src="china-data.png" />

In [None]:
import numpy as np
import pandas as pd
from pandas import Timestamp
from matplotlib import pyplot as plt
from datetime import datetime, timedelta
np.set_printoptions(suppress=True,
   formatter={'float_kind':'{:0.3f}'.format})

In [None]:
ONSET_TO_DEATH = 17 # https://midasnetwork.us/covid-19/
MORTALITY_RATE = 0.01 * 10
#DOUBLING_TIME = 2.5

In [None]:
url = 'https://covidtracking.com/api/v1/us/daily.csv'
df = pd.read_csv(url, 
            usecols=['date', 'positiveIncrease', 'deathIncrease', 'positive'], 
            parse_dates=['date'], 
            index_col=['date']).sort_index().iloc[31:]

time = df.index
positive = np.array(df['positive'])

In [None]:
pd.set_option('display.max_rows', None)
df

In [None]:
def doublingTime(a, b):
    return np.log(2)/np.log(b/a) if (a != b) else NaN

doubling = np.empty(positive.shape)
for t_2 in range(1, len(doubling)):
    doubling[t_2] = doublingTime(positive[t_2-1], positive[t_2])
    
doubling_orig = pd.DataFrame(doubling, columns=['doubling'])
doubling_orig = doubling_orig.set_index(time)
    
doubling_smooth = doubling_orig.rolling(7,
        win_type='gaussian',
        center=True).mean(std=2).round()

In [None]:
plt.plot(doubling_orig)
plt.plot(doubling_smooth)

In [None]:
doubling_orig.plot(title='Coronavirus Doubling Time',
                   c='k',
                   linestyle=':',
                   alpha=.5,
                   label='Actual',
                   legend=True)

ax = doubling_smooth.plot(label='Smoothed',
                   legend=True)

ax.get_figure().set_facecolor('w')

In [None]:
firstDeathDate = data['deathIncrease'].ne(0).idxmax()
#firstDeathDate = Timestamp('2020-03-15')
firstDeathDate

In [None]:
daysLeft = (data.index[-1] - firstDeathDate).days
daysLeft

In [None]:
firstDeathCount = data['deathIncrease'][firstDeathDate]
firstDeathCount

In [None]:
firstTrueCaseDate = firstDeathDate - timedelta(days = ONSET_TO_DEATH)
firstTrueCaseDate

In [None]:
firstTrueCaseCount = firstDeathCount / MORTALITY_RATE
firstTrueCaseCount

In [None]:
trueCaseCounts = np.logspace(1/DOUBLING_TIME, daysLeft/DOUBLING_TIME, base=2, num=daysLeft)*firstTrueCaseCount

In [None]:
new = pd.DataFrame(data=trueCaseCounts, index=data.index[-daysLeft:])

In [None]:
plt.plot(data)