# Inference for flu data

## Flu case incidences

In [None]:
import numpy as np
import pandas as pd

import seirmo as se

flu_data = se.DatasetLibrary().french_flu()
flu_data = flu_data.loc[flu_data['year'] == 2010]

In [37]:
import numpy as np
import os
import pandas as pd

import seirmo as se
from seirmo import plots

flu_data = pd.read_csv('/mnt/c/Users/user/Desktop/french_flu_data.csv')
# flu_data = flu_data.loc[flu_data['time_index'] == 2010]
# print(flu_data)

# playing around with data to investigate if pints run quickly
flu_data = flu_data.loc[:30,:]
flu_data['inc'] = flu_data['inc']/650000
times = flu_data['time_index']

0      1
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9     10
10    11
11    12
12    13
13    14
14    15
15    16
16    17
17    18
18    19
19    20
20    21
21    22
22    23
23    24
24    25
25    26
26    27
27    28
28    29
29    30
30    31
Name: time_index, dtype: int64


In [38]:
# Plot incidences data
plot = plots.IncidenceNumberPlot()
plot.add_data(flu_data, time_key='time_index', inc_key='inc')
plot.show()

## Model parameters inference

In [39]:
import pints

class SEIRModel(pints.ForwardModel):
    def __init__(self):
        super(SEIRModel, self).__init__()

        self._model = se.SEIRModel()
    
    def n_outputs(self):
        # Returns number of model outputs.
        # Returns only incidence number, in line with the flu data
        return 1

    def n_parameters(self):
        # Returns number of parameters, i.e. 4 initial condition and 3 parameters.
        return 7
    
    def simulate(self, log_parameters, times):
        output = self._model.simulate(parameters=np.exp(log_parameters), times=times, return_incidence=True)
        n_incidence = output[:,4]

        return n_incidence

In [40]:
# Guess on starting points of samplers
# parameter_guess = np.array([5, 4, 4, 0, 0.5, 0.5, 0.1, 0.1])
parameter_guess = np.array([0.7, 0, 0.3, 0, 0.5, 0.5, 0.1, 0.05])

# User of pints' SingleOutputProblem and GaussianLogLikelihood

In [41]:
# Create log-likelihood
pints_model = SEIRModel()
problem = pints.SingleOutputProblem(pints_model, times, flu_data['inc'])
log_likelihood = pints.GaussianLogLikelihood(problem)
# log_likelihood = pints.GaussianKnownSigmaLogLikelihood(problem, sigma=0.05)

# Run sampling routines
optimiser = pints.OptimisationController(
    function=log_likelihood,
    x0=parameter_guess,
    method=pints.CMAES
)


In [42]:
optimiser.set_max_iterations(1000)
optimiser.set_log_interval(1)

log_estimates, _ = optimiser.run()
estimates = np.exp(log_estimates)

557   1:15.3
358   3590   30.88572   1:15.5
359   3600   30.88576   1:15.7
360   3610   30.88577   1:15.9
361   3620   30.88588   1:16.2
362   3630   30.88588   1:16.4
363   3640   30.88588   1:16.6
364   3650   30.88602   1:16.9
365   3660   30.88602   1:17.1
366   3670   30.88602   1:17.3
367   3680   30.88602   1:17.5
368   3690   30.88614   1:17.7
369   3700   30.88615   1:18.0
370   3710   30.88619   1:18.2
371   3720   30.8862    1:18.5
372   3730   30.88627   1:18.7
373   3740   30.88627   1:18.9
374   3750   30.8863    1:19.2
375   3760   30.8864    1:19.4
376   3770   30.88643   1:19.6
377   3780   30.88643   1:19.8
378   3790   30.88643   1:20.1
379   3800   30.88653   1:20.3
380   3810   30.88659   1:20.6
381   3820   30.88659   1:20.8
382   3830   30.88659   1:21.0
383   3840   30.88659   1:21.2
384   3850   30.88659   1:21.5
385   3860   30.88659   1:21.7
386   3870   30.88665   1:21.9
387   3880   30.88669   1:22.1
388   3890   30.88669   1:22.3
389   3900   30.88674   1:

In [43]:
print('Parameter names:')
print(['Initial S', 'Initial E', 'Initial I', 'Initial R', 'Infection Rate', 'Incubation Rate', 'Recovery Rate'])
print('Parameter estimates:')
print(estimates)

Parameter names:
['Initial S', 'Initial E', 'Initial I', 'Initial R', 'Infection Rate', 'Incubation Rate', 'Recovery Rate']
Parameter estimates:
[7.73977117e+00 1.81493348e-04 3.05243647e-05 9.99331063e+05
 6.94876341e-01 1.91645669e-01 6.74562774e-09 1.09345323e+00]


## Compare inferred model to data

In [48]:
model = se.SEIRModel()
times = times.values.tolist()
estimates = estimates[:7]
inferred_result = model.simulate(parameters=estimates, times=times, return_incidence=True)

In [49]:
inferred_data = pd.DataFrame({
            'Time': times,
            'Incidence Number': inferred_result[:, -1],
            'Susceptible': inferred_result[:, 0],
            'Exposed': inferred_result[:, 1],
            'Infectious': inferred_result[:, 2],
            'Recovered': inferred_result[:, 3],
        })

# Plot flu data vs inferred incidences data
plot = plots.SubplotFigure()
plot.add_data(flu_data, time_key='time_index', inc_key='inc')
plot.add_simulation(inferred_data)
plot.get_subplots()
plot.show()

In [50]:
print(inferred_data)

    Time  Incidence Number   Susceptible   Exposed  Infectious      Recovered
0      1          0.000000  7.739771e+00  0.000181    0.000031  999331.062824
1      2          0.000053  7.739484e+00  0.000415    0.000084  999331.062824
2      3          0.000130  7.738734e+00  0.001035    0.000214  999331.062824
3      4          0.000322  7.736863e+00  0.002584    0.000536  999331.062824
4      5          0.000816  7.732116e+00  0.006516    0.001352  999331.062824
5      6          0.002050  7.720193e+00  0.016389    0.003402  999331.062824
6      7          0.005134  7.690392e+00  0.041055    0.008536  999331.062824
7      8          0.012902  7.615749e+00  0.102797    0.021438  999331.062824
8      9          0.032036  7.432581e+00  0.253928    0.053474  999331.062824
9     10          0.078140  6.996608e+00  0.611761    0.131614  999331.062824
10    11          0.182922  6.042500e+00  1.382947    0.314537  999331.062824
11    12          0.385477  4.310961e+00  2.729009    0.700013  