In [41]:
import pydpf
import numpy as np
import torch
import pathlib
import model
from tqdm import tqdm
from model import make_SSM
import pandas as pd
from time import time

In [42]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
experiment_cuda_rng = torch.Generator(device=device).manual_seed(0)
experiment_cpu_rng = torch.Generator().manual_seed(0)
DPF_type = 'Kernel'
data_path = pathlib.Path('.').parent.absolute().joinpath('data.csv')


In [43]:
def get_DPF(SSM):
    if DPF_type == 'DPF':
        return pydpf.DPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Soft':
        return pydpf.SoftDPF(SSM=SSM, resampling_generator=experiment_cuda_rng, softness=0.5)
    if DPF_type == 'Stop-Gradient':
        return pydpf.StopGradientDPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Marginal Stop-Gradient':
        return pydpf.MarginalStopGradientDPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Optimal Transport':
        return pydpf.OptimalTransportDPF(SSM=SSM, regularisation=0.5, transport_gradient_clip=1.)
    if DPF_type == 'Kernel':
        kernel = pydpf.KernelMixture(pydpf.MultivariateGaussian(torch.zeros(1, device=device),torch.eye(1, device=device)*0.1, generator=experiment_cuda_rng), generator=experiment_cuda_rng)
        return pydpf.KernelDPF(SSM=SSM, kernel=kernel)
    raise ValueError('DPF_type should be one of the allowed options')

In [44]:
def fractional_diff_exp(a, b):
    frac = b-a
    return torch.abs(1 - torch.exp(frac))

In [45]:
data_gen_generator = torch.Generator(device=device).manual_seed(0)
alpha = torch.tensor([[0.91]], device=device)
beta =torch.tensor([0.5], device=device)
sigma = torch.tensor([[1.]], device=device)
SSM = make_SSM(sigma, alpha, beta, device, generator=data_gen_generator)
#pydpf.simulate_and_save(data_path, SSM=SSM, time_extent=1000, n_trajectories=500, batch_size=100, device=device)

In [46]:
dataset = pydpf.StateSpaceDataset(data_path=data_path, series_id_column='series_id', state_prefix='state', observation_prefix='observation', device=device)
dpf = get_DPF(SSM)
pf = pydpf.DPF(SSM=SSM, resampling_generator=experiment_cuda_rng, multinomial=True)
aggregation_function = {'Likelihood': pydpf.LogLikelihoodFactors(), 'Filtering mean': pydpf.FilteringMean()}
data_loader = torch.utils.data.DataLoader(dataset, batch_size=30, shuffle=False, generator=experiment_cpu_rng, collate_fn=dataset.collate)

In [47]:

size = 0
pf_time = []
MSE = []
likelihood_error = []

for state, observation in tqdm(data_loader):
    with torch.inference_mode():
        size += state.size(1)
        true_outputs = pf(observation=observation, n_particles=10000, aggregation_function=aggregation_function, time_extent=1000)
        s_time = time()
        outputs = dpf(observation=observation, n_particles=100, aggregation_function=aggregation_function, time_extent=1000)
        pf_time.append((time() - s_time))
        MSE.append(torch.sum((true_outputs['Filtering mean'] - outputs['Filtering mean'])**2, dim=-1).mean().item()*state.size(1))
        likelihood_error.append(fractional_diff_exp(true_outputs['Likelihood'], outputs['Likelihood']).mean().item()*state.size(1))

100%|██████████| 17/17 [00:25<00:00,  1.51s/it]


In [48]:
result_path = pathlib.Path('.').parent.absolute().joinpath('fully_specified_results.csv')
results = pd.read_csv(result_path, index_col=0)
row = np.array([sum(MSE)/size, sum(likelihood_error)/size, sum(pf_time[1:-1])/(len(data_loader)-2)])
results.loc[DPF_type] = row
print(results)
results.to_csv(result_path)

                             e_x       e_l       time
method                                               
DPF                     0.026536  0.063251   0.712928
Soft                    0.028045  0.068421   0.861655
Stop-Gradient           0.026536  0.063251   0.735490
Marginal Stop-Gradient  0.026536  0.063251   0.739505
Optimal Transport       0.048391  0.078096  54.498954
Kernel                  0.032957  0.072445   0.781434
