# Inference for flu data

## Flu case incidences

In [None]:
import numpy as np
import pandas as pd

import seirmo as se

flu_data = se.DatasetLibrary().french_flu()
flu_data = flu_data.loc[flu_data['year'] == 2010]

In [64]:
import numpy as np
import os
import pandas as pd

import seirmo as se
from seirmo import plots

flu_data = pd.read_csv('/home/sabsr3/Desktop/french_flu_data.csv')
# flu_data = flu_data.loc[flu_data['time_index'] == 2010]
flu_data = flu_data.loc[:30,:]
times = flu_data['time_index']
# print(times)

In [65]:
# Plot incidences data
plot = plots.IncidenceNumberPlot()
plot.add_data(flu_data, time_key='time_index', inc_key='inc')
plot.show()

## Model parameters inference

In [66]:
import pints

class SEIRModel(pints.ForwardModel):
    def __init__(self):
        super(SEIRModel, self).__init__()

        self._model = se.SEIRModel()
    
    def n_outputs(self):
        # Returns number of model outputs.
        # Returns only incidence number, in line with the flu data
        return 1

    def n_parameters(self):
        # Returns number of parameters, i.e. 4 initial condition and 3 parameters.
        return 7
    
    def simulate(self, log_parameters, times):
        output = self._model.simulate(parameters=np.exp(log_parameters), times=times, return_incidence=True)
        n_incidence = output[:,4]

        return n_incidence

In [73]:
# Guess on starting points of samplers
# parameter_guess = np.array([5, 4, 4, 0, 0.5, 0.5, 0.1, 0.05])
parameter_guess = np.array([0.7, 0, 0.3, 0, 0.5, 0.5, 0.1, 0.05])

# User of pints' SingleOutputProblem and GaussianLogLikelihood

In [74]:
# Create log-likelihood
pints_model = SEIRModel()
problem = pints.SingleOutputProblem(pints_model, times, flu_data['inc'])
log_likelihood = pints.GaussianLogLikelihood(problem)
# log_likelihood = pints.GaussianKnownSigmaLogLikelihood(problem, sigma=0.05)

# Run sampling routines
optimiser = pints.OptimisationController(
    function=log_likelihood,
    x0=parameter_guess,
    method=pints.CMAES
)


In [None]:
optimiser.set_max_iterations(1000)
optimiser.set_log_interval(1)

log_estimates, _ = optimiser.run()
estimates = np.exp(log_estimates)

Maximising LogPDF
Using Covariance Matrix Adaptation Evolution Strategy (CMA-ES)
Running in sequential mode.
Population size: 10
Iter. Eval. Best      Time m:s
0     10    -1.07e+09  18:57.1


In [70]:
print('Parameter names:')
print(['Initial S', 'Initial E', 'Initial I', 'Initial R', 'Infection Rate', 'Incubation Rate', 'Recovery Rate'])
print('Parameter estimates:')
print(estimates)

Parameter names:
['Initial S', 'Initial E', 'Initial I', 'Initial R', 'Infection Rate', 'Incubation Rate', 'Recovery Rate']
Parameter estimates:
[2.28015024 1.08513333 1.41945182 1.02657282 1.62861438 1.58669156
 1.11330857]


## Compare inferred model to data

In [71]:
model = se.SEIRModel()
times = times.values.tolist()
inferred_result = model.simulate(parameters=estimates, times=times, return_incidence=True)

In [72]:
inferred_data = pd.DataFrame({
            'Time': times,
            'Incidence Number': inferred_result[:, -1],
            'Susceptible': inferred_result[:, 0],
            'Exposed': inferred_result[:, 1],
            'Infectious': inferred_result[:, 2],
            'Recovered': inferred_result[:, 3],
        })

# Plot flu data vs inferred incidences data
plot = plots.SubplotFigure()
plot.add_data(flu_data, time_key='time_index', inc_key='inc')
plot.add_simulation(inferred_data)
plot.get_subplots()
plot.show()