In [22]:
%%html
<style id=hide>div.input{display:none;}</style>
<button type="button" 
onclick="var myStyle = document.getElementById('hide').sheet;myStyle.insertRule('div.input{display:inherit !important;}', 0);">
Show inputs</button>

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import pymc3 as pm
from scipy import stats
import arviz as az
import seaborn as sns
import pandas as pd
from theano import shared
import theano.tensor as tt


### Relevant literature:
    
### https://www.nytimes.com/2020/04/23/nyregion/coronavirus-antibodies-test-ny.html

### https://www.cidrap.umn.edu/covid-19/chinese-study-antibodies-covid-19-patients-fade-quickly

## EDA - Post-omicron

In [7]:
post_omi = pd.read_csv('data/post_omi.csv')
post_omi.sample(20)

Unnamed: 0,Sample_Date,WRRF_Name,Gene Copies (N1/L),Per Capita Gene Copies,"Population Served, estimated",School-Aged Population,Season,Holiday
1160,2022-09-25,26th Ward,8477.0,7950000.0,290608,72091.0,Fall 2022,1
606,2022-04-24,North River,17742.58,9080000.0,658596,72242.0,Spring 2022,0
94,2021-12-05,Red Hook,2924.0,1580834.52,224029,33636.0,Fall 2021,1
907,2022-07-17,Wards Island,12625.0,8430000.0,1201485,240219.0,Summer 2022,0
1556,2023-02-05,Red Hook,8838.0,3580000.0,224029,33636.0,Winter 2022,0
968,2022-08-02,Jamaica Bay,28404.0,10100000.0,748737,131881.0,Summer 2022,0
490,2022-03-27,Bowery Bay,2951.74,1080000.0,924695,122994.0,Spring 2022,0
1307,2022-11-13,Oakwood Beach,16230.0,6010000.0,258731,44374.0,Fall 2022,0
1701,2023-03-21,Bowery Bay,6022.0,2100000.0,924695,122994.0,Spring 2023,0
1022,2022-08-16,Coney Island,9349.0,3680000.0,682342,117865.0,Summer 2022,0


In [8]:
# Double-checking data 
post_omi.isnull().sum()

Sample_Date                     0
WRRF_Name                       0
Gene Copies (N1/L)              0
Per Capita Gene Copies          0
Population Served, estimated    0
School-Aged Population          0
Season                          0
Holiday                         0
dtype: int64

In [22]:
import theano.tensor as tt
pop = post_omi['Population Served, estimated']
per_cap = post_omi['Per Capita Gene Copies']

[]
for i in per_cap:
    

0      -2861781.80
1        218799.00
2        688278.62
3        396992.23
4        332013.40
           ...    
1779    -623833.00
1780    -855971.00
1781     214539.00
1782     239907.00
1783    1858485.00
Length: 1784, dtype: float64


In [23]:

# Set initial infected population
I_initial = # This is what I need to get, either from case data or by some other extrapolation of wastewater data.

# Set intitial susceptible population
S_initial = np.full_like(I_initial, fill_value=6500000) # full_like returns the same shape of I_initial

# approximate total population minus R_initial 

# Set initial recovered population
R_initial = np.full_like(I_initial, fill_value=2000000)  

# By April 2020, approximately 20-25% of New Yorkers had Covid antibodies.
# We assume that the total number by the time our data starts was well in excess of 3 million, of whom some
# would still have antibody protection, given early estimated antibody duration of about 3 months. We would
# need to input different values here to better understand the the actual number of recovered/susceptible.

# We need to make sure all initial states are of the same length
assert len(S_initial) == len(I_initial) == len(R_initial)

# Number of time steps
n = len(I_initial) 
# if we are using "per capita" feature, this makes sense, because those are daily, and we ultimately
# want daily time steps. if we create I differently, then we need to adjust this code.

# Set up model
with pm.Model() as model:
    # Priors on parameters
    beta = pm.HalfNormal('beta', sd=1)  # Infection rate
    sigma = pm.HalfNormal('sigma', sd=1)  # Rate of loss of immunity
    
    # Recovery rate (fixed at 1/90 per day)
    gamma = 1/90
    
    # Initialize S, I, and R with initial values
    S = tt.zeros(n)
    I = tt.zeros(n)
    R = tt.zeros(n)
    
    S = tt.set_subtensor(S[0], S_initial[0])
    I = tt.set_subtensor(I[0], I_initial[0])
    R = tt.set_subtensor(R[0], R_initial[0])
    
    # SIRS equations
    for t in range(1, n):
        dS = -beta * S[t-1] * I[t-1]
        dI = beta * S[t-1] * I[t-1] - gamma * I[t-1]
        dR = gamma * I[t-1] - sigma * R[t-1]
        
        S = tt.set_subtensor(S[t], S[t-1] + dS)
        I = tt.set_subtensor(I[t], I[t-1] + dI)
        R = tt.set_subtensor(R[t], R[t-1] + dR)
    
    # Observations
    I_obs = pm.Normal('I_obs', mu=I, sd=0.1, observed=post_omi['Per Capita Gene Copies'])
    
    # Sample
    trace = pm.sample(2000)

AssertionError: 

In [None]:
az.plot_trace(trace)

let's re-do this based on the following assumptions, backed by data:  1) the total population at the start of data collection is between 7.7 and 8.7 million. 2) by the time our data starts, 1 million people have been infected and half lack antibodies by this point. 1/5 of the 1 million infected are no longer living in the jurisdictions (having left for other cities).  we estimate infection numbers by taking each  "per capita" value and dividing it by its respective "population served" 