In [1]:
import pandas as pd
import numpy as np
from scipy.integrate import odeint
from Source.Classes import Model

In [2]:
# Define Generalized Logistic Growth Model

def GLM(t, r, p, K, C_0 = 1):
    def GLM_ode(C_t, t):
        return r*pow(C_t, p)*(1-C_t/K)
    GLM_int = odeint(GLM_ode, C_0, t)
    return GLM_int[:,0]

GLM_Model = Model("GLM",GLM)

In [3]:
# Load data

source = "https://raw.githubusercontent.com/tomwhite/covid-19-uk-data/master/data/covid-19-cases-uk.csv"
utla_raw = pd.read_csv(source) # Raw Upper Tier Local Authority (UTLA) Case Counts utla_raw.to_csv("dailycases.csv")

In [4]:
# Clean data

def str2num(s):
    
    '''
    float <- str
    
    Accounts for irregular phrases such as '1 to 4' in the counts data
    by taking the mean of all integers occuring in the phrase.
    '''
    
    assert type(s) is str, "Case counts is not a string"
    counts = [int(n) for n in s.split() if s.isdigit()]
    if len(counts):
        return np.mean(counts)
    return np.nan
        
utla_raw.TotalCases = [str2num(s) for s in utla_raw.TotalCases]
utla_raw.dropna(subset=['TotalCases'],inplace=True)
utla_raw.Date = pd.to_datetime(utla_raw.Date)
utla_raw = utla_raw[utla_raw.Date < pd.to_datetime('today').strftime("%m/%d/%Y")] # remove today's data as it seems to be underreported


In [5]:
# Filter to city of interest

target = "Sheffield"
pop = (10**6)/2

data = utla_raw[utla_raw.Area==target][['Date','TotalCases']]
GLM_Model.load(data, pop=pop, city=target)

In [6]:
# Visualise cases so far

GLM_Model.visualise(prediction=False)

In [7]:
# Fit model

par0 = {"r": 0.8, "p": 1, "K":"0.2*pop"}
parlower = {"r": 0, "p": 0, "K":"0.1*pop"}
parupper = {"r": 10, "p": 1, "K":"0.8*pop"}

GLM_Model.fit_bootstrap(par0,parlower,parupper,S=50)

In [8]:
# Visualise 7 days

GLM_Model.visualise(lookahead=7, intervals=True)

In [9]:
# Assess model by measuring RMSE of forecasts

GLM_Model.assess()