In [1]:
import git
import pandas as pd
import numpy as np
import scipy.integrate


import bokeh.io
import bokeh.application
import bokeh.application.handlers
import bokeh.models


import holoviews as hv

import matplotlib.pyplot as plt

In [2]:
repo = git.Repo("./", search_parent_directories=True)
homedir = repo.working_dir
datadir = f"{homedir}/data/us/"

In [50]:
data = pd.read_csv(datadir + "covid/deaths.csv")
data.columns

Index(['countyFIPS', 'County Name', 'State', 'stateFIPS', '1/22/20', '1/23/20',
       '1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20', '1/29/20',
       '1/30/20', '1/31/20', '2/1/20', '2/2/20', '2/3/20', '2/4/20', '2/5/20',
       '2/6/20', '2/7/20', '2/8/20', '2/9/20', '2/10/20', '2/11/20', '2/12/20',
       '2/13/20', '2/14/20', '2/15/20', '2/16/20', '2/17/20', '2/18/20',
       '2/19/20', '2/20/20', '2/21/20', '2/22/20', '2/23/20', '2/24/20',
       '2/25/20', '2/26/20', '2/27/20', '2/28/20', '2/29/20', '3/1/20',
       '3/2/20', '3/3/20', '3/4/20', '3/5/20', '3/6/20', '3/7/20', '3/8/20',
       '3/9/20', '3/10/20', '3/11/20', '3/12/20', '3/13/20', '3/14/20',
       '3/15/20', '3/16/20', '3/17/20', '3/18/20', '3/19/20', '3/20/20',
       '3/21/20', '3/22/20', '3/23/20', '3/24/20', '3/25/20', '3/26/20',
       '3/27/20', '3/28/20', '3/29/20', '3/30/20', '3/31/20', '4/1/20',
       '4/2/20', '4/3/20', '4/4/20', '4/5/20', '4/6/20', '4/7/20', '4/8/20',
       '4/9/20'],
      dt

In [5]:
demo = pd.read_csv(f"{homedir}/data/us/demographics/county_populations.csv")
demo.columns

Index(['FIPS', 'total_pop', '60plus'], dtype='object')

In [15]:
data.head()

Unnamed: 0,countyFIPS,County Name,State,stateFIPS,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20,4/6/20,4/7/20,4/8/20,4/9/20
0,0,Statewide Unallocated,AL,1,0,0,0,0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1001,Autauga County,AL,1,0,0,0,0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
2,1003,Baldwin County,AL,1,0,0,0,0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1005,Barbour County,AL,1,0,0,0,0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1007,Bibb County,AL,1,0,0,0,0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
# countyFIPS is fucked up in the data for values 0... this fixes that shit by adding the stateFIPS to
# places with countyFITS = 0 and countyFIPS thus becomes unique
for i, check in enumerate(data['countyFIPS'] == 0):
    if data.at[i, 'stateFIPS'] == 0:
        if data.at[i, 'stateFIPS'] > 10:
            prefix = data.at[i, 'stateFIPS'] * 100
        else:
            prefix = data.at[i, 'stateFIPS'] * 1000
        data.at[i, 'countyFIPS'] = prefix + data.at[i, 'countyFIPS']

In [17]:
data.shape

(3196, 83)

# SIR Model

Simple SIR model.

Parameters:


In [20]:
def sir(dat, t, params, N):
    beta = params[0] / N
    delta = params[1]
    
    s = dat[0]
    i = dat[1]
    r = dat[2]
    # sa = dat[6]
    
    dsdt = - beta * s * i
    didt = beta * s * i - delta * i
    drdt = delta * i
    
    # susceptible, infected, recovered
    return [dsdt, didt, drdt]

In [25]:
from sklearn.metrics import mean_squared_error

def mse(A, B):
    Ap = np.nan_to_num(A)
    Bp = np.nan_to_num(B)
    Ap[A == -np.inf] = 0
    Bp[B == -np.inf] = 0
    Ap[A == np.inf] = 0
    Bp[B == np.inf] = 0
    return mean_squared_error(Ap, Bp)

def model_z(params, data, pop, tmax=-1):
    # initial conditions
    N = pop # total population
    initial_conditions = N * np.array(params[-2:]) # the parameters are a fraction of the population so multiply by the population
    
    i0 = initial_conditions[0]
    r0 = initial_conditions[1]
    
    s0 = N - np.sum(initial_conditions)

    yz_0 = np.array([s0, i0, r0])
    
    n = data.shape[0]
    if tmax > 0:
        n = tmax
    
    # Package parameters into a tuple
    args = (params, N)
    
    # Integrate ODEs
    try:
        s = scipy.integrate.odeint(sir, yz_0, np.arange(0, n), args=args)
    except RuntimeError:
#         print('RuntimeError', params)
        return np.zeros((n, len(yz_0)))

    return s

def fit_leastsq_z(params, data):
    Ddata = (data['Deaths'].values)
    Idata = (data['TotalCurrentlyPositive'].values)
    s = model_z(params, data)

    S = s[:,0]
    I = s[:,1]
    R = s[:,2]
    
    error = np.concatenate((D-Ddata, I_S - Idata))
    return error

In [26]:
# return data ever since first min_cases cases
def select_region(df, region, min_deaths=50):
    d = df.loc[df['countyFIPS'] == region]
    d = d[d.columns[4:]].values
    start = np.where(d > min_deaths)[1]
    infect = np.where(d > 0)[1]
    if start.size > 0:
         return (d[0][start[0]:], start[0] - infect[0])
    return (np.array([]), 0)

In [27]:
select_region(data, 6037)

(array([ 54.,  65.,  78.,  89., 117., 132., 147., 169., 198., 223.]), 20)

In [31]:
%matplotlib notebook
%matplotlib inline

plt.figure()
d, offset = select_region(data, 36047)
# parameters: beta, delta, shift
params = [1.8, 0.015, 0.5]
# conditions: E, IA, IS, R
initial_conditions = [4e-6, 0.0009, 0.0005, 0.0002]
s = model_z(params + initial_conditions, d, demo.loc[demo['FIPS'] == 36047]['total_pop'].values[0], offset)
#plt.scatter(d['date_processed'], d['Deaths'])
#plt.plot(d['date_processed'], s[:, 2] )
#plt.show()

<Figure size 432x288 with 0 Axes>

In [32]:
demo.loc[demo['FIPS'] == 6037]['total_pop'].values[0]

10105722

In [33]:
offset

11

In [38]:
d[0]

64.0

In [39]:
s

array([[2.63327642e+06, 1.31756050e+03, 5.27024200e+02],
       [2.62670954e+06, 7.82960152e+03, 5.81854861e+02],
       [2.58825915e+06, 4.59561718e+04, 9.05676792e+02],
       [2.38073131e+06, 2.51648697e+05, 2.74099010e+03],
       [1.61234280e+06, 1.01147923e+06, 1.12989723e+04],
       [5.55206014e+05, 2.04520507e+06, 3.47099206e+04],
       [1.15608623e+05, 2.45034542e+06, 6.91669546e+04],
       [2.10780978e+04, 2.50750167e+06, 1.06541228e+05],
       [3.82183503e+03, 2.48726228e+06, 1.44036887e+05],
       [7.06850974e+02, 2.45331722e+06, 1.81096924e+05],
       [1.33923114e+02, 2.41735960e+06, 2.17627476e+05]])

In [36]:
np.where(data['State'].values == "NY")

(array([1861, 1862, 1863, 1864, 1865, 1866, 1867, 1868, 1869, 1870, 1871,
        1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, 1880, 1881, 1882,
        1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893,
        1894, 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904,
        1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915,
        1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924]),)

In [43]:
data[1891:1900]

Unnamed: 0,countyFIPS,County Name,State,stateFIPS,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20,4/6/20,4/7/20,4/8/20,4/9/20
1891,36057,Montgomery County,NY,36,0,0,0,0,0,0,...,0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0
1892,36059,Nassau County,NY,36,0,0,0,0,0,0,...,63,76.0,95.0,143.0,149.0,381.0,433.0,500.0,566.0,788.0
1893,36061,New York County,NY,36,0,0,0,0,0,0,...,129,165.0,178.0,215.0,264.0,390.0,436.0,513.0,604.0,674.0
1894,36063,Niagara County,NY,36,0,0,0,0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
1895,36065,Oneida County,NY,36,0,0,0,0,0,0,...,1,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
1896,36067,Onondaga County,NY,36,0,0,0,0,0,0,...,1,1.0,2.0,4.0,4.0,4.0,5.0,5.0,6.0,10.0
1897,36069,Ontario County,NY,36,0,0,0,0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1898,36071,Orange County,NY,36,0,0,0,0,0,0,...,12,25.0,30.0,40.0,51.0,51.0,51.0,53.0,63.0,79.0
1899,36073,Orleans County,NY,36,0,0,0,0,0,0,...,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [41]:
d, offset = select_region(data, 36047)

In [42]:
import itertools

def plot_qd(res, p0_params, p0_initial_conditions, df, region, extrapolate=1, boundary=None, plot_infectious=False):
    data = select_region(df, region)
    
    s = model_qd(res.x, data, len(data)*extrapolate)
    S = s[:,0]
    E = s[:,1]
    I = s[:,2]
    Q = s[:,3]
    R = s[:,4]
    D = s[:,5]
    SA = s[:,6]

    t = np.arange(0, len(data))
    tp = np.arange(0, len(data)*extrapolate)

    p = bokeh.plotting.figure(plot_width=600,
                              plot_height=400,
                             title = region + ' SEIR-QD Model',
                             x_axis_label = 't (days)',
                             y_axis_label = '# people')

    if plot_infectious:
        p.line(tp, I, color = 'red', line_width = 1, legend_label = 'All infected')
    p.line(tp, D, color = 'black', line_width = 1, legend_label = 'Deceased')

    # death
    p.circle(t, data['Deaths'], color ='black')

    # quarantined
    p.circle(t, data['TotalCurrentlyPositive'], color ='purple', legend_label='Tested infected')
    
    if boundary is not None:
        vline = Span(location=boundary, dimension='height', line_color='black', line_width=3)
        p.renderers.extend([vline])

    p.legend.location = 'top_left'
    bokeh.io.show(p)

SyntaxError: invalid syntax (<ipython-input-42-80c3d41cc28c>, line 2)