# Inference for flu data

## Flu case incidences

In [None]:
import numpy as np
import pandas as pd

import seirmo as se

flu_data = se.DatasetLibrary().french_flu()
flu_data = flu_data.loc[flu_data['year'] == 2010]

In [22]:
import numpy as np
import os
import pandas as pd

import seirmo as se
from seirmo import plots

flu_data = pd.read_csv('/mnt/c/Users/user/Desktop/french_flu_data.csv')

# playing around with data to investigate if pints run quickly
flu_data = flu_data.loc[:30,:]
# flu_data['inc'] = flu_data['inc']/650000
times = flu_data['time_index']

In [23]:
# Plot incidences data
plot = plots.IncidenceNumberPlot()
plot.add_data(flu_data, time_key='time_index', inc_key='inc')
plot.show()

## Model parameters inference

In [36]:
import pints

class SEIRModel(pints.ForwardModel):
    def __init__(self):
        super(SEIRModel, self).__init__()

        self._model = se.SEIRModel()
    
    def n_outputs(self):
        # Returns number of model outputs.
        # Returns only incidence number, in line with the flu data
        return 1

    def n_parameters(self):
        # Returns number of parameters, i.e. 4 initial condition and 3 parameters.
        return 7
    
    def simulate(self, log_parameters, times):
        parameters = np.exp(log_parameters)
        total = np.sum(parameters[:4])
        parameters[:4] = parameters[:4] / total
        # print(parameters)
        output = self._model.simulate(parameters=parameters, times=times, return_incidence=True)
        output = output * total
        n_incidence = output[:,4]

        return n_incidence

In [37]:
# Guess on starting points of samplers
# parameter_guess= np.array([5, 4, 4, 0, 0.5, 0.5, 0.1, 0.01])
parameter_guess = np.array([0.7, 0, 0.3, 0, 0.5, 0.5, 0.1, 0.05])

# User of pints' SingleOutputProblem and GaussianLogLikelihood

In [38]:
# Create log-likelihood
pints_model = SEIRModel()
problem = pints.SingleOutputProblem(pints_model, times, flu_data['inc'])
log_likelihood = pints.GaussianLogLikelihood(problem)
# log_likelihood = pints.GaussianKnownSigmaLogLikelihood(problem, sigma=0.05)

# Run sampling routines
optimiser = pints.OptimisationController(
    function=log_likelihood,
    x0=parameter_guess,
    method=pints.CMAES
)


In [39]:
optimiser.set_max_iterations(1000)
optimiser.set_log_interval(1)

log_estimates, _ = optimiser.run()
estimates = np.exp(log_estimates)

Maximising LogPDF
Using Covariance Matrix Adaptation Evolution Strategy (CMA-ES)
Running in sequential mode.
Population size: 10
Iter. Eval. Best      Time m:s
0     10    -2.71e+14   0:00.1
1     20    -1.95e+14   0:00.3
2     30    -1.43e+14   0:00.5
3     40    -1.21e+14   0:00.6
4     50    -8.6e+13    0:00.8
5     60    -4.53e+13   0:01.0
6     70    -3.46e+13   0:01.1
7     80    -2.88e+13   0:01.3
8     90    -1.99e+13   0:01.5
9     100   -8.82e+12   0:01.7
10    110   -8.33e+12   0:02.1
11    120   -5.33e+12   0:02.4
12    130   -4.72e+12   0:02.9
13    140   -3.19e+12   0:03.3
14    150   -2.36e+12   0:03.6
15    160   -1.46e+12   0:03.9
16    170   -1.06e+12   0:04.1
17    180   -7.96e+11   0:04.3
18    190   -3.88e+11   0:04.6
19    200   -2.75e+11   0:04.8
20    210   -1.83e+11   0:05.1
21    220   -1.2e+11    0:05.4
22    230   -7.87e+10   0:05.9
23    240   -4.38e+10   0:06.8
24    250   -2.6e+10    0:09.5
25    260   -1.36e+10   0:19.2
26    270   -7.41e+09   1:33.9
27 

KeyboardInterrupt: 

In [43]:
print('Parameter names:')
print(['Initial S', 'Initial E', 'Initial I', 'Initial R', 'Infection Rate', 'Incubation Rate', 'Recovery Rate'])
print('Parameter estimates:')
print(estimates)

Parameter names:
['Initial S', 'Initial E', 'Initial I', 'Initial R', 'Infection Rate', 'Incubation Rate', 'Recovery Rate']
Parameter estimates:
[7.73977117e+00 1.81493348e-04 3.05243647e-05 9.99331063e+05
 6.94876341e-01 1.91645669e-01 6.74562774e-09 1.09345323e+00]


## Compare inferred model to data

In [48]:
model = se.SEIRModel()
times = times.values.tolist()
estimates = estimates[:7]
inferred_result = model.simulate(parameters=estimates, times=times, return_incidence=True)

In [49]:
inferred_data = pd.DataFrame({
            'Time': times,
            'Incidence Number': inferred_result[:, -1],
            'Susceptible': inferred_result[:, 0],
            'Exposed': inferred_result[:, 1],
            'Infectious': inferred_result[:, 2],
            'Recovered': inferred_result[:, 3],
        })

# Plot flu data vs inferred incidences data
plot = plots.SubplotFigure()
plot.add_data(flu_data, time_key='time_index', inc_key='inc')
plot.add_simulation(inferred_data)
plot.get_subplots()
plot.show()

In [50]:
print(inferred_data)

    Time  Incidence Number   Susceptible   Exposed  Infectious      Recovered
0      1          0.000000  7.739771e+00  0.000181    0.000031  999331.062824
1      2          0.000053  7.739484e+00  0.000415    0.000084  999331.062824
2      3          0.000130  7.738734e+00  0.001035    0.000214  999331.062824
3      4          0.000322  7.736863e+00  0.002584    0.000536  999331.062824
4      5          0.000816  7.732116e+00  0.006516    0.001352  999331.062824
5      6          0.002050  7.720193e+00  0.016389    0.003402  999331.062824
6      7          0.005134  7.690392e+00  0.041055    0.008536  999331.062824
7      8          0.012902  7.615749e+00  0.102797    0.021438  999331.062824
8      9          0.032036  7.432581e+00  0.253928    0.053474  999331.062824
9     10          0.078140  6.996608e+00  0.611761    0.131614  999331.062824
10    11          0.182922  6.042500e+00  1.382947    0.314537  999331.062824
11    12          0.385477  4.310961e+00  2.729009    0.700013  