# Filtering a fully specified model

Before running this notebook: ensure that the steps outlined in sv_setup_instructions.txt are complete

In [None]:
import pydpf
import numpy as np
import torch
import pathlib
from tqdm import tqdm
from sv_model import make_SSM
import pandas as pd
from time import time

## Settings

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
experiments =  ['DPF', 'Soft', 'Stop-Gradient', 'Marginal Stop-Gradient', 'Optimal Transport', 'Kernel']
alpha = 0.91
beta = 0.5
sigma = 1.
batch_size = 128
data_path = pathlib.Path('.').parent.absolute().joinpath(f'data/alpha={alpha}-beta={beta}-sigma={sigma}.csv')
result_path = pathlib.Path('.').parent.absolute().joinpath('results/fully_specified_results.csv')


In [None]:
def get_DPF(DPF_type):
    if DPF_type == 'DPF':
        return pydpf.DPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Soft':
        return pydpf.SoftDPF(SSM=SSM, resampling_generator=experiment_cuda_rng, softness=0.7)
    if DPF_type == 'Stop-Gradient':
        return pydpf.StopGradientDPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Marginal Stop-Gradient':
        return pydpf.MarginalStopGradientDPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Optimal Transport':
        return pydpf.OptimalTransportDPF(SSM=SSM, regularisation=0.5, transport_gradient_clip=1.)
    if DPF_type == 'Kernel':
        kernel = pydpf.KernelMixture(pydpf.MultivariateGaussian(torch.zeros(1, device=device),torch.eye(1, device=device)*0.1, generator=experiment_cuda_rng), generator=experiment_cuda_rng)
        return pydpf.KernelDPF(SSM=SSM, kernel=kernel)
    raise ValueError('DPF_type should be one of the allowed options')

In [None]:
def fractional_diff_exp(a, b):
    frac = b-a
    return torch.abs(1 - torch.exp(frac))

In [None]:
alpha_t = torch.tensor([[alpha]], device=device)
beta_t =torch.tensor([beta], device=device)
sigma_t = torch.tensor([[sigma]], device=device)
dataset = pydpf.StateSpaceDataset(data_path=data_path, series_id_column='series_id', state_prefix='state', observation_prefix='observation', device=device)


In [None]:
for experiment in experiments:
    print(f"Testing {experiment}")
    experiment_cuda_rng = torch.Generator(device=device).manual_seed(0)
    experiment_cpu_rng = torch.Generator().manual_seed(0)
    size = 0
    pf_time = []
    MSE = []
    likelihood_error = []
    SSM = make_SSM(sigma_t, alpha_t, beta_t, device, generator=experiment_cuda_rng)
    dpf = get_DPF(experiment)
    pf = pydpf.DPF(SSM=SSM, resampling_generator=experiment_cuda_rng, multinomial=True)
    aggregation_function = {'Likelihood': pydpf.LogLikelihoodFactors(), 'Filtering mean': pydpf.FilteringMean()}
    data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, generator=experiment_cpu_rng, collate_fn=dataset.collate)
    for state, observation in tqdm(data_loader):
        with torch.inference_mode():
            size += state.size(1)
            true_outputs = pf(observation=observation, n_particles=10000, aggregation_function=aggregation_function, time_extent=1000)
            torch.cuda.synchronize()
            s_time = time()
            outputs = dpf(observation=observation, n_particles=100, aggregation_function=aggregation_function, time_extent=1000)
            torch.cuda.synchronize()
            pf_time.append((time() - s_time))
            MSE.append(torch.sum((true_outputs['Filtering mean'] - outputs['Filtering mean'])**2, dim=-1).mean().item()*state.size(1))
            likelihood_error.append(fractional_diff_exp(true_outputs['Likelihood'], outputs['Likelihood']).mean().item()*state.size(1))

    
    results = pd.read_csv(result_path, index_col=0)
    row = np.array([sum(MSE) / size, sum(likelihood_error) / size, sum(pf_time[1:-1]) / (len(data_loader) - 2)])
    results.loc[experiment] = row
    print(results)
    results.to_csv(result_path)