# Covid 19 Data modelling in R

https://docs.idmod.org/projects/emod-hiv/en/latest/model-overview.html

In [1]:
# Clean Environment
rm(list = ls())
gc()

Unnamed: 0,used,(Mb),gc trigger,(Mb).1,max used,(Mb).2
Ncells,567695,30.4,1229605,65.7,713196,38.1
Vcells,1064924,8.2,8388608,64.0,1820463,13.9


In [2]:
#install.packages("covid19.analytics")
#install.packages("dygraphs")
#install.packages("writexl")
#install.packages("xts")
#install.packages("deSolve")
#install.packages("reshape2")

In [3]:
# Import needed libraries
library(covid19.analytics)
library(dygraphs)
library(writexl)
library(xts)
library(deSolve)
library(reshape2)

Loading required package: zoo


Attaching package: ‘zoo’


The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric




In [4]:
KerasNNRegressor <- function(
  x = x,
  y = y,
  cutoff = .9,
  validation_split = 1 - cutoff,
  loss = 'mae',
  optimizer = optimizer_rmsprop(),
  batch_size = 128,
  activation = 'relu',
  finalactivation = 'sigmoid',
  numberOfHiddenLayers = 1,
  useBias = FALSE,
  l1.units = 20,
  l2.units = 10,
  l3.units = 5,
  l4.units = 4,
  l5.units = 2,
  dropoutRate = 0.2,
  epochs = 10,
  forceClassifier = FALSE
) {

  # Package
  library(keras)

  # Data
  all <- data.frame(cbind(y, x))

  # Setup
  train_idx <- 1:round(cutoff*nrow(all),0)
  x_train <- as.matrix(all[train_idx, -1])
  y_train <- as.matrix(all[train_idx, 1])
  x_test <- as.matrix(all[-train_idx, -1])
  y_test <- as.matrix(all[-train_idx, 1])

  # Check levels for response
  number.of.levels <- nrow(plyr::count(y_train))
  num_classes <- number.of.levels

  # To prepare this data for training we one-hot encode the
  # vectors into binary class matrices using the Keras to_categorical() function
  # y_train <- to_categorical(y_train, number.of.levels)
  # y_test <- to_categorical(y_test, number.of.levels)

  # Defining the Model
  if (numberOfHiddenLayers == 0) {
    model <- keras_model_sequential()
    model %>%
      layer_dense(
        units = 1,
        input_shape = c(ncol(x_train)),
        activation = finalactivation,
        use_bias = useBias)
    summary(model)
  } else if (numberOfHiddenLayers == 1) {
    model <- keras_model_sequential()
    model %>%
      layer_dense(units = l1.units, activation = activation, input_shape = c(ncol(x_train))) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = 1, activation = finalactivation)
    summary(model)
  } else if (numberOfHiddenLayers == 2) {
    model <- keras_model_sequential()
    model %>%
      layer_dense(units = l1.units, activation = activation, input_shape = c(ncol(x_train))) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = l2.units, activation = activation, use_bias = useBias) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = 1, activation = finalactivation)
    summary(model)
  } else if (numberOfHiddenLayers == 3) {
    model <- keras_model_sequential()
    model %>%
      layer_dense(units = l1.units, activation = activation, input_shape = c(ncol(x_train))) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = l2.units, activation = activation, use_bias = useBias) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = l3.units, activation = activation, use_bias = useBias) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = 1, activation = finalactivation)
    summary(model)
  } else if (numberOfHiddenLayers == 4) {
    model <- keras_model_sequential()
    model %>%
      layer_dense(units = l1.units, activation = activation, input_shape = c(ncol(x_train))) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = l2.units, activation = activation, use_bias = useBias) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = l3.units, activation = activation, use_bias = useBias) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = l4.units, activation = activation, use_bias = useBias) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = 1, activation = finalactivation)
    summary(model)
  } else if (numberOfHiddenLayers == 5) {
    model <- keras_model_sequential()
    model %>%
      layer_dense(units = l1.units, activation = activation, input_shape = c(ncol(x_train))) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = l2.units, activation = activation, use_bias = useBias) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = l3.units, activation = activation, use_bias = useBias) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = l4.units, activation = activation, use_bias = useBias) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = l5.units, activation = activation, use_bias = useBias) %>%
      layer_dropout(dropoutRate) %>%
      layer_dense(units = 1, activation = finalactivation)
    summary(model)
  } else {
    print("============== WARNING ==============")
    print("Input value for [numberOfHiddenLayers] must be 0, 1, 2, or 3.")
    print("Since none of the values above are entered, the default is set to 1.")
    print("=====================================")
  } # Done with model


  # Next, compile the model with appropriate loss function, optimizer, and metrics:
  model %>% compile(
    loss = loss,
    optimizer = optimizer,
    metrics = c(loss))

  # Training and Evaluation
  history <- model %>% fit(
    x_train, y_train,
    epochs = epochs,
    batch_size = batch_size,
    validation_split = validation_split
  ); plot(history)

  # Evaluate the model's performance on the test data:
  scores = model %>% evaluate(x_test, y_test)

  # Generate predictions on new data:
  if (forceClassifier == TRUE) {
    y_test_hat <- model %>% predict_proba(x_test)
    y_test_binary <- ifelse(y_test_hat > mean(y_test_hat), 1, 0)
    confusion.matrix <- table(Y_Hat = y_test_binary, Y = y_test)
    test.acc <- sum(diag(confusion.matrix))/sum(confusion.matrix)
    all.error <- plyr::count(y_test - cbind(y_test_binary))
    y_test_eval_matrix <- cbind(
      y_test=y_test,
      y_test_hat=y_test_binary,
      y_test_hat_raw=y_test_hat )

    # AUC/ROC
    if ((num_classes == 2) && (nrow(plyr::count(y_test_hat)) > 1)) {
      AUC_test <- pROC::roc(c(y_test), c(y_test_hat))
    } else {
      AUC_test <- c("Estimate do not have enough levels.")
    }

    # Output
    result <- list(
      Confusion.Matrix = confusion.matrix,
      Confusion.Matrix.Pretty = knitr::kable(confusion.matrix),
      Testing.Accuracy = test.acc,
      All.Types.of.Error = all.error,
      Test_AUC = AUC_test
    )
  } else {
    y_test_hat <- model %>% predict_proba(x_test)
    MSE_test <- mean((y_test - y_test_hat)^2)
    y_test_eval_matrix <- cbind(
      y_test=y_test,
      y_test_hat_raw=y_test_hat )

    # Output
    result <- list(
      MSE_test = MSE_test
    )
  }

  # Return
  return(
    list(
      Model = list(model = model, scores = scores),
      x_train = x_train,
      y_train = y_train,
      x_test = x_test,
      y_test = y_test,
      y_test_hat = y_test_hat,
      y_test_eval_matrix = y_test_eval_matrix,
      Training.Plot = plot(history),
      Result = result
    )
  )
}

## Pull data

In [5]:
# reads time series data
all_confirmed_cases <- covid19.data("ts-confirmed")
all_confirmed_deaths <- covid19.data("ts-deaths")
all_confirmed_recoveries <- covid19.data("ts-recovered")

Data being read from JHU/CCSE repository



~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 


Reading data from https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv

Data retrieved on 2021-08-30 16:54:13 || Range of dates on data: 2020-01-22--2021-08-29 | Nbr of records: 279



-------------------------------------------------------------------------------- 


Data being read from JHU/CCSE repository



~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 


Reading data from https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv

Data retrieved on 2021-08-30 16:54:14 || Range of dates on data: 2020-01-22--2021-08-29 | Nbr of records: 279



-------------------------------------------------------------------------------- 


Data being read from JHU/CCSE repository



~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 


Reading data from https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv

Data retrieved on 2021-08-30 16:54:17 || Range of dates on data: 2020-01-22--2021-08-29 | Nbr of records: 264



-------------------------------------------------------------------------------- 


In [6]:
# all_confirmed_cases_info <- all_confirmed_cases[1:]
# all_confirmed_deaths_info <- 
# all_confirmed_recoveries_info <-

all_data_time_offset <- 204

all_confirmed_cases <- all_confirmed_cases[, 1:all_data_time_offset]
all_confirmed_deaths <- all_confirmed_deaths[, 1:all_data_time_offset]
all_confirmed_recoveries <- all_confirmed_recoveries[, 1:all_data_time_offset]

In [7]:
# Look at how the data is structured
View(all_confirmed_cases)

Province.State,Country.Region,Lat,Long,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,⋯,2020-07-30,2020-07-31,2020-08-01,2020-08-02,2020-08-03,2020-08-04,2020-08-05,2020-08-06,2020-08-07,2020-08-08
<chr>,<chr>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,⋯,36532,36665,36700,36701,36737,36773,36820,36928,37006,37046
,Albania,41.15330,20.168300,0,0,0,0,0,0,⋯,5197,5276,5396,5519,5620,5750,5889,6016,6151,6275
,Algeria,28.03390,1.659600,0,0,0,0,0,0,⋯,29831,30394,30950,31465,31972,32504,33055,33626,34155,34693
,Andorra,42.50630,1.521800,0,0,0,0,0,0,⋯,922,925,925,925,937,939,939,944,955,955
,Angola,-11.20270,17.873900,0,0,0,0,0,0,⋯,1109,1148,1164,1199,1280,1344,1395,1483,1538,1572
,Antigua and Barbuda,17.06080,-61.796400,0,0,0,0,0,0,⋯,91,91,91,91,92,92,92,92,92,92
,Argentina,-38.41610,-63.616700,0,0,0,0,0,0,⋯,185373,191302,196543,201919,206743,213535,220682,228195,235677,241811
,Armenia,40.06910,45.038200,0,0,0,0,0,0,⋯,38196,38550,38841,39050,39102,39298,39586,39819,39985,40185
Australian Capital Territory,Australia,-35.47350,149.012400,0,0,0,0,0,0,⋯,113,113,113,113,113,113,113,113,113,113
New South Wales,Australia,-33.86880,151.209300,0,0,0,0,3,4,⋯,3756,3773,3784,3797,3809,3820,3832,3842,3851,3861


In [8]:
indexList <- c()
countryList <- c()

# Get all rows
for (i in rownames(all_confirmed_cases)) {
    # print(c(i, all_confirmed_cases[i, 2]))
    indexList <- c(indexList, i)
    countryList <- c(countryList, all_confirmed_cases[i, 2])
}

country_index_list <- as.data.frame(cbind(indexList, countryList))

# We can see that Italy is index 154, so we are going to  use that
country_index <- 135

country_index_list[country_index, ]

Unnamed: 0_level_0,indexList,countryList
Unnamed: 0_level_1,<chr>,<chr>
135,135,Germany


In [9]:
# For some reason, the recovered dataframe has different dimensions. Thanks CSSE at John Hopkins

recovery_indexList <- c()
recovery_countryList <- c()

# Get all rows
for (i in rownames(all_confirmed_recoveries)) {
    # print(c(i, all_confirmed_cases[i, 2]))
    recovery_indexList <- c(recovery_indexList, i)
    recovery_countryList <- c(recovery_countryList, all_confirmed_recoveries[i, 2])
}

recovery_country_index_list <- as.data.frame(cbind(recovery_indexList, recovery_countryList))

# View(recovery_country_index_list[(130:200), ])

# We can see that Italy is index 154, so we are going to  use that
recovery_country_index <- 120

recovery_country_index_list[recovery_country_index, ]

Unnamed: 0_level_0,recovery_indexList,recovery_countryList
Unnamed: 0_level_1,<chr>,<chr>
120,120,Germany


In [10]:
it_confirmed_cases <- all_confirmed_cases[country_index, ]
it_confirmed_deaths <- all_confirmed_deaths[country_index, ]
it_confirmed_recoveries <- all_confirmed_recoveries[recovery_country_index, ]

print("Cases:")
View(it_confirmed_cases)
print("Deaths:")
View(it_confirmed_deaths)
print("Recoveries:")
View(it_confirmed_recoveries)

firstCaseDate <- "2020-01-31"

[1] "Cases:"


Unnamed: 0_level_0,Province.State,Country.Region,Lat,Long,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,⋯,2020-07-30,2020-07-31,2020-08-01,2020-08-02,2020-08-03,2020-08-04,2020-08-05,2020-08-06,2020-08-07,2020-08-08
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
135,,Germany,51.16569,10.45153,0,0,0,0,0,1,⋯,209535,210399,211005,211220,212111,212828,214113,215039,216196,216903


[1] "Deaths:"


Unnamed: 0_level_0,Province.State,Country.Region,Lat,Long,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,⋯,2020-07-30,2020-07-31,2020-08-01,2020-08-02,2020-08-03,2020-08-04,2020-08-05,2020-08-06,2020-08-07,2020-08-08
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
135,,Germany,51.16569,10.45153,0,0,0,0,0,0,⋯,9144,9147,9154,9154,9154,9163,9179,9181,9195,9201


[1] "Recoveries:"


Unnamed: 0_level_0,Province.State,Country.Region,Lat,Long,2020-01-22,2020-01-23,2020-01-24,2020-01-25,2020-01-26,2020-01-27,⋯,2020-07-30,2020-07-31,2020-08-01,2020-08-02,2020-08-03,2020-08-04,2020-08-05,2020-08-06,2020-08-07,2020-08-08
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>
120,,Germany,51.16569,10.45153,0,0,0,0,0,0,⋯,191551,191992,192636,192908,193594,194173,194568,195281,195935,196550


In [11]:
# Find index of first case
firstInfection <- 0

for (i in 1:ncol(it_confirmed_cases)) {
    if (class(it_confirmed_cases[, i]) == 'integer' && it_confirmed_cases[, i] >= 1) {
        print(paste0("Index of the first infection is: ", i, ", Number of infections is: ", it_confirmed_cases[, i]))
        
        firstInfection <- it_confirmed_cases[, i]
        
        break
    }
}

[1] "Index of the first infection is: 10, Number of infections is: 1"


In [12]:
it_confirmed_cases <- t(it_confirmed_cases[, 5:dim(it_confirmed_cases)[2]])
colnames(it_confirmed_cases) <- c("Cases")
it_confirmed_cases <- as.xts(it_confirmed_cases)

In [13]:
it_confirmed_deaths <- t(it_confirmed_deaths[, 5:dim(it_confirmed_deaths)[2]])
colnames(it_confirmed_deaths) <- c("Deaths")
it_confirmed_deaths <- as.xts(it_confirmed_deaths)

In [14]:
it_confirmed_recoveries <- t(it_confirmed_recoveries[, 5:dim(it_confirmed_recoveries)[2]])
colnames(it_confirmed_recoveries) <- c("Recoveries")
it_confirmed_recoveries <- as.xts(it_confirmed_recoveries)

In [15]:
# Cleanup

rm("all_confirmed_cases", "all_confirmed_deaths", "all_confirmed_recoveries")

In [16]:
data_total <- as.xts(cbind(it_confirmed_cases, it_confirmed_deaths, it_confirmed_recoveries))
colnames(data_total) <- c("Cases", "Deaths", "Recoveries")

dygraph(data_total)

In [17]:
# I don't know why I put estimated recoveries as cases - deaths + real recoveries, since a case can either go in death or in recovery, so it should only be cases - deaths.
# This might not be right either, since on the first day, we have 1 case, 0 deaths, and so ... 1 recovery? Not really.

# I did it because in my model you can't die, all you can do if be "recovered", which technically is both dead and recovered

# Since the recoveries in our data are skewed, I'll write a function which computes a pretty good estimate of the recoveries
compute_recoveries <- function(
    cases,
    deaths,
    data_recoveries,
    recovery_time = 14
) {
    estimated_recoveries <- cases - deaths + data_recoveries
    
    return_dataframe <- lag(estimated_recoveries, recovery_time)
    return_dataframe[1:recovery_time] <- 0
    
    return(return_dataframe)
}

#Since the recoveries in our data are skewed, I'll write a function which computes a pretty good estimate of the recoveries
# compute_recoveries <- function(
#     cases,
#     deaths,
#     recovery_time = 14
# ) {
#     estimated_recoveries <- cases - deaths
    
#     return_dataframe <- lag(estimated_recoveries, recovery_time)
#     return_dataframe[1:recovery_time] <- 0
    
#     return(return_dataframe)
# }

In [18]:
compute_deltas <- function(
    dataframe
) {
    new_dataframe <- data.frame(matrix(NA, nrow = nrow(dataframe)))
    
    for (i in 1:ncol(dataframe)) {
        new_dataframe <- cbind(new_dataframe, diff(dataframe[, i]))
    }
    
    return(new_dataframe[, -1])
}

In [19]:
dim(it_confirmed_cases)

In [20]:
# This makes everything crash

#real_data_total <- as.xts(cbind(it_confirmed_cases, it_confirmed_deaths, compute_recoveries(it_confirmed_cases, it_confirmed_deaths, it_confirmed_recoveries)))
real_data_total <- data_total
colnames(real_data_total) <- c("Cases", "Deaths", "Recoveries")

In [21]:
rm("data_total")

In [22]:
dygraph(real_data_total)

Covid recoveries are so low because "In order to be considered recovered by the Centers for Disease Control and Prevention, a person must be free of a fever without the help of medication, show improvement in respiratory conditions and receive negative results from two separate tests performed at least 24 hours apart."

In [23]:
real_data_daily <- compute_deltas(real_data_total)

dygraph(real_data_daily)

In [24]:
minmax_normalize <- function(x, na.rm = TRUE) {
    return((x- min(x)) /(max(x)-min(x)))
}

## Testing different models

### Model Agnostic Variables

In [25]:
# Days that I'm analyzing
analysis_days <- 365

# Date list
dates <- seq(as.Date(firstCaseDate), by = "days", length.out = analysis_days)

In [26]:
firstCaseDate

### SIR Model

$$
\begin{eqnarray}
    \frac{dS}{dt} & = & - \beta S I \\
    \frac{dI}{dt} & = & \beta S I - \gamma I \\
    \frac{dR}{dt} & = & \gamma I \\
\end{eqnarray}
$$

If $\beta \cdot S_0 - \gamma < 0$, then we have an epidemic, otherwise not.

In our case, with Italy, if $\beta$ is $1.7$, $\gamma$ is $1$ and $S_0$ is $60000000$, then our $R_0$ is: 

$$R_0 = \frac{\beta S_0}{\gamma}$$

By plugging in our numbers we get:

$$\frac{1.7 \cdot 60000}{1} = 102000000$$

Which means that our $R_0$ is way bigger than 0

In [27]:
# Model inputs

# S: Susceptible (All population)
# I: Infected (Infected)
# R: Recovered (Dead OR Recovered)

susceptible <- 83e+06 # Source: https://www.statista.com/statistics/786485/population-by-gender-in-italy/#:~:text=Population%20in%20Italy%20in%202020%2C%20by%20gender&text=As%20of%20January%202020%2C%2060.2,roughly%2016%20million%20people%20lived.
infected <- firstInfection
recovered <- 0

initial_state_values = c(S = susceptible, I = infected, R = recovered)

# If beta * S_0 - gamma < 0, then we have an epidemic, otherwise not.

# Parameters
# Beta: The effective transmission rate
# Gamma: The effective recovery rate
# R0: (beta * S_0)/gamma

# The beta for covid is estimated to be ranging from 1.5 to 6.68. With median of 2.79. Source: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7751056/#:~:text=R0%20of%20COVID%2D19,-R0%20of&text=compared%2012%20studies%20published%20from,an%20interquartile%20range%20of%201.16.
# We can simulate this scenario by having our beta as 0.27 and our gamma as 0.1 

parameters = c(gamma = 0.3, beta = 0.4)
# parameters = c(gamma = 1, beta = 1.7)
#parameters = c(gamma = 2, beta = 2.4)

# Time points

time = seq(from = 1,to = analysis_days, by = 1)

# SIR model function 

sir_model <- function(time,state,parameters){
  with(as.list(c(state,parameters)),{
    N = S + I + R
    lambda = beta*(I/N) 
    dS =- lambda*S
    dI = lambda*S - gamma*I
    dR = gamma*I
    
    return(list(c(dS,dI,dR)))
  }
  )
}


#Solving the differential equations
output <- as.data.frame(ode(y = initial_state_values, func = sir_model, parms = parameters, times = time))

out_long = melt(output , id = "time")

colnames(out_long) <- c("Time", "Variable", "Value")

# dim(out_long)

#### SIR Model Graph

In [28]:
Susceptible <- out_long[1:analysis_days, 3]
Infected <- out_long[(analysis_days + 1):(analysis_days*2), 3]
Recovered <- out_long[(analysis_days*2 + 1):(analysis_days*3), 3]

plot_data <- as.data.frame(cbind(Susceptible, Infected, Recovered))
colnames(plot_data) <- c("Susceptbile", "Infected", "Recovered")
rownames(plot_data) <- dates

plot_data <- as.xts(plot_data)

# head(plot_data)

In [29]:
dygraph(plot_data) %>%
    dyAxis("y", label = "People") %>%
    dyAxis("x", label = "Date")

In [30]:
plot_data_daily <- compute_deltas(plot_data)

dygraph(plot_data_daily) %>%
    dyAxis("y", label = "People (Millions)") %>%
    dyAxis("x", label = "Date")

In [31]:
# All cases

all_sir_cases <- sum(plot_data$Infected)

print(all_sir_cases)

[1] 125715945


#### SIR Model Comparison

In [32]:
compare_data_sir <- minmax_normalize(plot_data$Recovered)
compare_data_real <- minmax_normalize(real_data_total$Recoveries)

# View(as.xts(cbind(compare_data_real, compare_data_sir)))

dygraph(as.xts(cbind(compare_data_real, compare_data_sir)))

### SEIR Model

In [33]:
# State values:

# 1: Susceptibles
# 2: Exposed, this means infected, but still not infectious
# 3: Infected
# 4: Recovered or Dead

# Parameters:
# Beta: Same as before
# Gamma: Same as before
# Delta: 1/latent period

contact_rate = 2                  # number of contacts per day
transmission_probability = 0.27      # transmission probability
infectious_period = 15                 # infectious period
latent_period = 7                   # latent period

beta_value = contact_rate * transmission_probability
gamma_value = 1 / infectious_period
delta_value = 1 / latent_period

Ro = beta_value / gamma_value

parameter_list = c (beta = beta_value, gamma = gamma_value, delta = delta_value)

# Susceptibles
susceptibles <- 83e+06 # Source: https://www.statista.com/statistics/786485/population-by-gender-in-italy/#:~:text=Population%20in%20Italy%20in%202020%2C%20by%20gender&text=As%20of%20January%202020%2C%2060.2,roughly%2016%20million%20people%20lived.
infected <- firstInfection
recovered <- 0
exposed <- 0

total_pop <- susceptibles + infected + recovered + exposed

initial_values = c (S = susceptibles/total_pop, E = infected/total_pop, I = recovered/total_pop, R = exposed/total_pop)

timepoints <- seq(0, analysis_days, by=1)

SEIR <- function (current_timepoint, state_values, parameters) {
  # create state variables (local variables)
  S = state_values [1]        # susceptibles
  E = state_values [2]        # exposed
  I = state_values [3]        # infectious
  R = state_values [4]        # recovered
  
  with ( 
    as.list (parameters),     # variable names within parameters can be used 
         {
           # compute derivatives
           dS = (-beta * S * I)
           dE = (beta * S * I) - (delta * E)
           dI = (delta * E) - (gamma * I)
           dR = (gamma * I)
           
           # combine results
           results = c (dS, dE, dI, dR)
           list (results)
         }
    )
}

output = lsoda(initial_values, timepoints, SEIR, parameter_list)

# head(output)

#### SEIR Model Graph

In [34]:
output <- output[-1, ]

plot_data <- as.data.frame(cbind(output[, 2], output[, 3], output[, 4], output[, 5]))
rownames(plot_data) <- dates

colnames(plot_data) <- c("Susceptible", "Exposed", "Infected", "Recovered")

plot_data <- as.xts(plot_data)

dygraph(plot_data) %>%
    dyAxis("y", label = "People (Percentage)") %>%
    dyAxis("x", label = "Date (Days)")

#### SEIR Model Daily Graph

In [35]:
plot_data_daily <- compute_deltas(plot_data)

dygraph(plot_data_daily) %>%
    dyAxis("y", label = "People (Millions)") %>%
    dyAxis("x", label = "Date")

#### SEIR Model Comparison

In [36]:
compare_data_sir <- minmax_normalize(plot_data$Recovered)
compare_data_real <- minmax_normalize(real_data_total$Recoveries)

# View(as.xts(cbind(compare_data_real, compare_data_sir)))

dygraph(as.xts(cbind(compare_data_real, compare_data_sir)))

In [37]:
compare_data_sir <- minmax_normalize(plot_data$Infected)
compare_data_real <- minmax_normalize(real_data_total$Cases)

# View(as.xts(cbind(compare_data_real, compare_data_sir)))

dygraph(as.xts(cbind(compare_data_real, compare_data_sir)))

Here the prediction should contain cumulative data, else it looks broken

### SIDARTHE Model

#### States:

- **S**usceptible
- **I**nfected
- **D**iagnosed
- **A**iling
- **R**ecognized
- **T**hreatened
- **H**ealing
- **E**xtinct

There are three subsystems to these model. **S**, or the susceptible individuals, **I, D, A, R and T**, the infected individuals, and **H and E**, healed or defunct.

Only when **I+D+A+R+T = 0**, then the remaining variables **S, H and E** are at equilibrium.

#### Ordinary differential equations

$$
\begin{eqnarray}
    \dot{S}(t) & = & -S(t)(\alpha I(t) + \beta D(t) + \gamma A(t) + \delta R(t)) \\
    \dot{I}(t) & = & S(t)(\alpha I(t) + \beta D(t) + \gamma A(t) + \delta R(t)) - (\epsilon + \zeta + \lambda)I(t) \\
    \dot{D}(t) & = & \epsilon I(t) - (\eta + \rho)D(t) \\
    \dot{A}(t) & = & \zeta I(t) - (\theta + \mu + \kappa)A(t) \\
    \dot{R}(t) & = & \eta D(t) + \theta A(t) - (\nu + \xi)R(t) \\
    \dot{T}(t) & = & \mu A(t) + \nu R(t) - (\sigma + \tau)T(t) \\
    \dot{H}(t) & = & \lambda I(t) + \rho D(t) + \kappa A(t) + \xi R(t) + \sigma T(t) \\
    \dot{E}(t) & = & \tau T(t) \\
\end{eqnarray}
$$

- All the uppercase Latin letters (S, I, D, ...) represent the fraction of the population in a given state, so that the sum of that equals the total population
- The parameters, denoted by lowercase greek letters, are positive numbers.
    - $\alpha, \beta, \gamma \text{ and } \delta$ respectively denote the transmission rate (the probability of disease transmission in a single contact multiplied by the average number of contacts per person) due to contacts between a susceptible subject and an infected, a diagnosed, an ailing or a recognized subject.
    - $\epsilon \text{ and } \theta$ capture the probability rate of detection, relative to asymptomatic and symptomatic cases, respectively. These parameters, also modifiable, reflect the level of attention on the disease and the number of tests performed over the population: they can be increased by enforcing a massive contact tracing and testing campaign
    - $\zeta$ and $\eta$ denote the probability rate at which an infected subject, respectively not aware and aware of being infected, develops clinically relevant symptoms, and are comparable in the absence of specific treatment.
    - $\mu$ and $\nu$ respectively denote the rate at which undetected and detected infected subjects develop life-threatening symptoms; they are comparable if there is no known specific treatment that is effective against the disease, otherwise µ may be larger. Conversely, ν may be larger because infected individuals with more acute symptoms, who have a higher risk of worsening, are more likely to have been diagnosed.
    - $\tau$ denotes the mortality rate (for infected subjects with life-threatening symptoms).
    - $\lambda, \kappa, \xi, \rho \text{ and } \theta$ denote the rate of recovery for the five classes of infected sub- jects; they may differ significantly if an appropriate treatment for the disease is known and adopted for diagnosed patients, but are probably comparable otherwise.
    