In [1]:
import numpy as np
import pandas as pd
import os, sys

In [2]:
import os, sys
# add to path
sys.path.append("../..")

import attr
import datetime

import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import seaborn

import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_probability as tfp

tfb = tfp.bijectors

import edward2 as ed

import pandas as pd

tf.config.set_visible_devices([], 'GPU')


In [3]:
from filterflow.smc import SMC
from filterflow.base import State, StateWithMemory, StateSeries

from filterflow.observation.base import ObservationModelBase, ObservationSampler
from filterflow.observation.linear import LinearObservationSampler
from filterflow.transition.random_walk import RandomWalkModel
from filterflow.proposal import BootstrapProposalModel
from filterflow.transition.base import TransitionModelBase

from filterflow.resampling.criterion import NeffCriterion, AlwaysResample, NeverResample
from filterflow.resampling.standard import SystematicResampler, MultinomialResampler
from filterflow.resampling.differentiable import RegularisedTransform, CorrectedRegularizedTransform
from filterflow.resampling.differentiable.ricatti.solver import RicattiSolver

from filterflow.resampling.base import NoResampling

from filterflow.state_space_model import StateSpaceModel

In [4]:
data_dir = "../../data/covid"
os.listdir(data_dir)

['covid.csv']

In [5]:
df = pd.read_csv(os.path.join(data_dir, 'covid.csv'))

In [6]:
df.loc[df.geoId =='UK', 'deaths'].sum()

31241

Infections model (Latent state evolution):
\begin{align}
    S_t &= S_{t-1}-\beta S_{t-1}I_{t-1}/N \\
    E_t &= (1-\alpha)E_{t-1}+\beta S_{t-1} I_{t-1}/N \\
    I_t &= (1-\gamma+\epsilon_t) I_{t-1}+\alpha E_{t-1}\\
    R_t &= R_{t-1} + \gamma I_{t-1}  ~\mathbf{\text{not needed}}\\
    \epsilon_t &\sim \mathcal{N}(0,1)\\
\end{align}

Death Model (Observation Model):
\begin{align}
d_t &= \delta_t I_t \\
    D_t - D_{t-1} &\sim \text{NegBinom}(d_t, d_t + \frac{d_t^2}{\Psi}) \\
    \Psi &\sim \mathcal{N}^+(0,5)
\end{align}

In [7]:
def split_sei(x):
    S, E, I = tf.split(x, 3, axis=-1)
    return S,E,I

def join_sei(S,E,I):
    x = tf.concat([S,E, I], axis=-1)
    return x

def zero_state(batch_size, num_particles, dimension):
    uniform_dist = tfp.distributions.Uniform()
    x = uniform_dist.sample([batch_size, n_particles, 3*dimension])
    return split_sei(x)

    


In [51]:
alpha = 0.25
beta = 0.5
gamma = 0.5
delta = 0.1
population_size = 66488991.0

In [52]:
class SEIRTransitionModel(TransitionModelBase):
    def __init__(self, alpha, beta, gamma, population_size, name='SEIRTransitionModel'):
        super(SEIRTransitionModel, self).__init__(name=name)
        self.normal_dist = tfp.distributions.Normal(loc=tf.constant(0.), scale=tf.constant(1.))
        self.beta = beta
        self.alpha = alpha
        self.gamma = gamma
        self.pop = population_size
        
    def _loglikelihood(self, prior_state: State, proposed_state: State):
        St_1, Et_1, It_1 = split_sei(prior_state.particles)
        St, Et, It = split_sei(proposed_state.particles)
        
        eps = (It - (1.-self.gamma)*It_1 - self.alpha*Et_1) / It_1
        log_prob = tf.reduce_sum(self.normal_dist.log_prob(eps), axis=-1)
        return log_prob
    
    def loglikelihood(self, prior_state: State, proposed_state: State, inputs: tf.Tensor):
        return self._loglikelihood(prior_state, proposed_state)

    def sample(self, state: State, inputs: tf.Tensor):
        St_1, Et_1, It_1 = split_sei(state.particles)
        
        eps = self.normal_dist.sample()
        
        St = St_1 - self.beta*St_1*It_1/self.pop
        Et = (1-self.alpha) * Et_1 + self.beta*St_1*It_1/self.pop

        It = (1-self.gamma+eps)*It_1 + self.alpha*Et_1
        
        x = join_sei(St, Et, It)
        
        return State(particles=x, 
                     log_weights = state.log_weights,
                     weights=state.weights, 
                     log_likelihoods=state.log_likelihoods)


In [53]:
class SEIRProposalModel(SEIRTransitionModel):
    def __init__(self, alpha, beta, gamma, population_size, name ='SEIRProposalModel'):
        super(SEIRProposalModel, self).__init__(alpha, beta, gamma, population_size, name=name)

    def loglikelihood(self, proposed_state: State, state: State, inputs: tf.Tensor, observation: tf.Tensor):
        return self._loglikelihood(state, proposed_state)
    
    def propose(self, state: State, inputs: tf.Tensor, observation: tf.Tensor):
        return self.sample(state, None)

In [67]:
class SEIRObservationModel(ObservationSampler):
    
    def __init__(self, delta, batch_shape=50, name='SEIRObservationModel'):
        super(SEIRObservationModel, self).__init__(name=name)
        self.half_norm_dist = tfp.distributions.HalfNormal(scale = tf.constant(5.))
        self.delta = delta
        self.batch_shape = batch_shape
            
    def loglikelihood(self, state: State, observation: tf.Tensor):
        S, E, I = split_sei(state.particles)
        b = state.batch_size
        dt = tf.expand_dims(self.delta*I, -1)
        psi = self.half_norm_dist.sample([b, 1, 1, self.batch_shape])
        nb_dist = tfp.distributions.NegativeBinomial(total_count=psi, probs=dt/(psi+dt))
        tf.print(nb_dist.prob(observation))
        prob = tf.reduce_mean(nb_dist.prob(observation), axis=-1)
        return tf.reduce_sum(tf.math.log(prob), -1)

    def sample(self, state: State):
        S, E, I = split_sei(state.particles)
        b = state.batch_size
        dt = tf.expand_dims(self.delta*I, -1)
        psi = self.half_norm_dist.sample([b, 1, 1, self.batch_shape])
        nb_dist = tfp.distributions.NegativeBinomial(total_count=psi, probs=dt/(psi+dt))
        dt1 = tf.reduce_mean(nb_dist.sample(), -1)
        return dt1

In [68]:
batch_size = 4
n_particles = 4
dimension = 1


In [69]:
learnable_alpha = tf.Variable(alpha, trainable=True)
learnable_beta = tf.Variable(beta, trainable=True)
learnable_gamma = tf.Variable(gamma, trainable=True)
transition_model = SEIRTransitionModel(learnable_alpha, learnable_beta, learnable_gamma, population_size)
proposal_model = SEIRProposalModel(learnable_alpha, learnable_beta, learnable_gamma, population_size)
observation_model = SEIRObservationModel(delta)

In [70]:
trainable_variables = [learnable_alpha, learnable_beta, learnable_gamma]

In [71]:
uniform_dist = tfp.distributions.Uniform()
initial_particles = uniform_dist.sample([batch_size, n_particles, 3*dimension])

initial_particles = tf.cast(initial_particles, dtype=float)

initial_weights = tf.ones((batch_size, n_particles), dtype=float) / tf.cast(n_particles, float)
log_likelihoods = tf.zeros(batch_size, dtype=float)
initial_state = State(particles=initial_particles, 
                      log_weights = tf.math.log(initial_weights),
                      weights=initial_weights, 
                      log_likelihoods=log_likelihoods)

In [72]:
observations = df[df.geoId == 'UK']['deaths'].iloc[::-1].values.astype(np.float32)
obs_data = tf.data.Dataset.from_tensor_slices(observations.reshape(-1, 1, 1, 1))

In [73]:
T = observations.shape[0]

In [74]:
# resampling
resampling_criterion = NeffCriterion(tf.constant(0.5), tf.constant(True))
systematic = SystematicResampler()
multinomial = MultinomialResampler()


epsilon = tf.constant(0.25)
scaling = tf.constant(0.75)

regularized = RegularisedTransform(epsilon, scaling=scaling, max_iter=1000, convergence_threshold=1e-4)

In [75]:
regularized_filter = SMC(observation_model, transition_model, proposal_model, resampling_criterion, regularized)
particle_filter = SMC(observation_model, transition_model, proposal_model, resampling_criterion, multinomial)

In [76]:
particle_filter(initial_state, obs_data, T, return_final=False).particles

[[[[0.97900629 0.979039907 0.983047068 ... 0.979058862 0.979239225 0.979022205]]

  [[0.950171769 0.9503631 0.967356503 ... 0.950467408 0.951458514 0.950261354]]

  [[0.965075493 0.965169311 0.974726915 ... 0.96522063 0.965713859 0.965118468]]

  [[0.956896245 0.957039177 0.970521748 ... 0.9571172 0.957863331 0.956962347]]]


 [[[0.951389492 0.951166868 0.952006459 ... 0.951718152 0.951202273 0.951190531]]

  [[0.953142226 0.952936053 0.953716 ... 0.9534477 0.95296818 0.952956915]]

  [[0.93729651 0.936926603 0.938316226 ... 0.9378407 0.936984241 0.936965525]]

  [[0.994299352 0.994296253 0.994307876 ... 0.994303942 0.994296908 0.994296312]]]


 [[[0.976287842 0.977544308 0.976594806 ... 0.976448417 0.976565421 0.97654134]]

  [[0.975692749 0.977011 0.976015508 ... 0.975861847 0.975984693 0.975959361]]

  [[0.987096906 0.987481177 0.98718828 ... 0.98714447 0.987179458 0.987172186]]

  [[0.981614113 0.982381701 0.981799364 ... 0.981710851 0.981781542 0.981766939]]]


 [[[0.959624529 0.9

<tf.Tensor: shape=(131, 4, 4, 3), dtype=float32, numpy=
array([[[[4.45953608e-01, 5.63569665e-02, 2.12330177e-01],
         [7.03737378e-01, 3.49563181e-01, 5.12023389e-01],
         [4.96753216e-01, 6.01953901e-02, 3.55930328e-01],
         [8.03465843e-01, 1.95525795e-01, 4.41275299e-01]],

        [[4.35812831e-01, 5.90015411e-01, 5.02408504e-01],
         [6.18468523e-02, 5.15924454e-01, 4.83706117e-01],
         [1.05289817e-01, 7.37821937e-01, 6.54477358e-01],
         [8.81749749e-01, 6.31327629e-02, 5.72237670e-02]],

        [[1.69450521e-01, 4.57379848e-01, 2.40412638e-01],
         [7.86816478e-01, 5.91158390e-01, 2.46528804e-01],
         [8.76721978e-01, 1.51617248e-02, 1.30000770e-01],
         [7.62564659e-01, 1.52164042e-01, 1.85830742e-01]],

        [[2.27915764e-01, 5.71551383e-01, 4.18753505e-01],
         [7.89886594e-01, 4.05986845e-01, 4.76272851e-01],
         [2.68729925e-02, 3.28156829e-01, 1.68686032e-01],
         [4.43344116e-02, 3.17615300e-01, 2.10219577e

In [21]:
# Create an optimizer.
optimizer = tf.optimizers.Adam(learning_rate=0.01, epsilon=1e-8)


@tf.function
def smc_routine(smc, state, use_correction_term=False):
    final_state = smc(initial_state, 
                      observation_series=obs_data, 
                      n_observations=T, 
                      return_final=True)

    res = tf.reduce_mean(final_state.log_likelihoods)
    if use_correction_term:
        return res, tf.reduce_mean(final_state.resampling_correction)
    return res, tf.constant(0.)
    

@tf.function
def run_one_step(smc, use_correction_term, init_state):
    with tf.GradientTape() as tape:
        tape.watch(trainable_variables)
        real_ll, correction = smc_routine(smc, init_state, use_correction_term)
        loss = -(real_ll + correction)
    grads_loss = tape.gradient(loss, trainable_variables)
    return real_ll, grads_loss


@tf.function
def train_one_step(smc, use_correction_term):
    real_ll, grads_loss = run_one_step(smc, use_correction_term, initial_state)
    clip = lambda x: tf.clip_by_value(x, -500., 500.)
    grads_loss = [clip(grad) for grad in grads_loss]
    optimizer.apply_gradients(zip(grads_loss, trainable_variables))
    return -real_ll, grads_loss


@tf.function
def train_niter(smc, num_steps=100, use_correction_term=False, reset=True):
    reset_trainable_variables = [learnable_alpha.assign(alpha), 
                                 learnable_beta.assign(beta), 
                                 learnable_gamma.assign(gamma)]
    if reset:
        reset_operations = reset_trainable_variables
    else:
        reset_operations = []

    loss_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False, element_shape=[])
    grad_tensor_array = tf.TensorArray(dtype=tf.float32, size=num_steps, dynamic_size=False, element_shape=[])
    time_tensor_array = tf.TensorArray(dtype=tf.float64, size=num_steps, dynamic_size=False, element_shape=[])
    with tf.control_dependencies(reset_operations):
        tic = tf.timestamp()
        for step in tf.range(1, num_steps+1):
            loss, grads = train_one_step(smc, use_correction_term)

            toc = tf.timestamp()
            max_grad = tf.reduce_max(tf.abs(grads))
            if True:
                tf.print('Step', step, '/', num_steps, ': ms per step= ', 1000. * (toc - tic) / tf.cast(step, tf.float64), ': total time (s)= ', (toc - tic), ', loss = ', loss, ', max abs grads = ', max_grad, end='\r')
            loss_tensor_array = loss_tensor_array.write(step-1, loss)
            grad_tensor_array = grad_tensor_array.write(step-1, max_grad)
            time_tensor_array = time_tensor_array.write(step-1, toc-tic)
    return loss_tensor_array.stack(), grad_tensor_array.stack(), time_tensor_array.stack()
            
@tf.function
def train_total_time(smc, total_time, use_correction_term=False, reset=True):
    reset_trainable_variables = [learnable_alpha.assign(alpha), 
                                 learnable_beta.assign(beta), 
                                 learnable_gamma.assign(gamma)]
    if reset:
        reset_operations = reset_trainable_variables
    else:
        reset_operations = []
        
    loss_tensor_array = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True, element_shape=[])
    grad_tensor_array = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True, element_shape=[])
    time_tensor_array = tf.TensorArray(dtype=tf.float64, size=0, dynamic_size=True, element_shape=[])
    with tf.control_dependencies(reset_operations):
        tic = tf.timestamp()
        toc = tic
        step = tf.constant(1)
        while toc - tic < total_time:
            loss, grads = train_one_step(smc, use_correction_term)
            max_grad = tf.reduce_max(tf.abs(grads))
            loss_tensor_array = loss_tensor_array.write(step-1, loss)
            grad_tensor_array = grad_tensor_array.write(step-1, max_grad)
            time_tensor_array = time_tensor_array.write(step-1, toc-tic)
            step = step + 1

            toc = tf.timestamp()

            tf.print('Time elapsed (s): ', toc-tic, ', n_steps: ', step, ': ms per step= ', 1000. * (toc - tic) / tf.cast(step, tf.float64), end='\r')
    return loss_tensor_array.stack(), grad_tensor_array.stack(), time_tensor_array.stack()


@tf.function
def run_several(smc, n_times, use_correction_term=False):
    loss_array = tf.TensorArray(dtype=tf.float32, size=n_times, dynamic_size=False, element_shape=[])
    grad_array = tf.TensorArray(dtype=tf.float32, size=n_times, dynamic_size=False)
    for i in tf.range(n_times):
        real_ll, grads_loss = run_one_step(smc, use_correction_term, init_state)
        loss_array = loss_array.write(i, real_ll)        
        grad_array = grad_array.write(i, grads_loss)
        tf.print('Step: ', i+1, '/', n_times, end='\r')
    return loss_array.stack(), grad_array.stack()

In [None]:
train_niter(particle_filter, tf.constant(500), False)

Step 65 / 500 : ms per step=  158.76639806307278 : total time (s)=  10.319815874099731 , loss =  -nan , max abs grads =  -inf