## Imports

In [8]:

from tqdm import tqdm
import torch
from pydpf import pydpf
import model
import pathlib
from time import time
import pandas as pd

## Set options

In [9]:
dx = 25
dy = 1
cuda = True
data_path = pathlib.Path('.').parent.absolute().joinpath('data.csv')
result_path = pathlib.Path('.').parent.absolute().joinpath('Kalman_comparison_results.csv')
Ks = [25, 100, 1000, 10000]
generate_data = False


if cuda:
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")
cuda_gen = torch.Generator(device=device).manual_seed(0)
cpu_gen = torch.Generator().manual_seed(0)

In [10]:
def make_bootstrap_componets(dx, dy, generator):
    dynamic_model = model.GaussianDynamic(dx, generator)
    observation_model = model.GaussianObservation(dx, dy, generator)
    prior_model = model.GaussianPrior(dx, generator)
    return prior_model, dynamic_model, observation_model

## Generate Data

## Create Dataset

In [11]:
dataset = pydpf.StateSpaceDataset(data_path=data_path,
                                  series_id_column='series_id',
                                  state_prefix='state',
                                  observation_prefix='observation',
                                  device=device)
data_loader = torch.utils.data.DataLoader(dataset, batch_size=100, shuffle=False, collate_fn=dataset.collate, generator=cpu_gen)

## Define a particle filter with multinomial resampling

In [12]:
prior_model, dynamic_model, observation_model = make_bootstrap_componets(dx, dy, cuda_gen)
multinomial_resampler = pydpf.MultinomialResampler(cuda_gen)
SSM = pydpf.FilteringModel(prior_model=prior_model, dynamic_model=dynamic_model, observation_model=observation_model)
PF = pydpf.ParticleFilter(resampler=multinomial_resampler, SSM=SSM)
KalmanFilter = pydpf.KalmanFilter(prior_model=prior_model, dynamic_model=dynamic_model, observation_model=observation_model)

## Run particle Filter

In [13]:
def fractional_diff_exp(a, b):
    frac = b-a
    return torch.abs(1 - torch.exp(frac))
    

In [14]:
aggregation_function_dict = {'Means': pydpf.FilteringMean(), 'Likelihood_factors': pydpf.LogLikelihoodFactors()}

for K in Ks:
    size = 0
    state_error = []
    kalman_time = []
    pf_time = []
    likelihood_error = []
    #Time the Kalman filter without running the particle filter in the same loop as timing seems to be dependent on K.
    for state, observation in tqdm(data_loader):
        with torch.inference_mode():
            size += state.size(1)
            if cuda:
                torch.cuda.current_stream().synchronize() 
            s_time = time()
            kalman_state, kalman_cov, kalman_likelihood = KalmanFilter(observation=observation, time_extent=1000)
            if cuda:
                torch.cuda.current_stream().synchronize() 
            kalman_time.append((time() - s_time))
            if not K is None:
                if cuda:
                    torch.cuda.current_stream().synchronize() 
                s_time = time()
                outputs = PF(observation=observation, n_particles=K, aggregation_function=aggregation_function_dict, time_extent=1000)
                if cuda:
                    torch.cuda.current_stream().synchronize()
                pf_time.append((time() - s_time))
                state_sq_error = torch.sum((outputs['Means'] - kalman_state)**2, dim=-1).mean()
                state_error.append(state_sq_error.item() * state.size(1))
                log_abs_likelihood_error = fractional_diff_exp(kalman_likelihood, outputs['Likelihood_factors'].squeeze()).mean()
                likelihood_error.append(log_abs_likelihood_error.item() * state.size(1))
        
    results_df = pd.read_csv(result_path, index_col=0)
    if not K is None:
        row_label = f'PF K = {K}'
        row = list(results_df.loc[row_label])
    kalman_row = list(results_df.loc['Kalman Filter'])
    if cuda:
        if K is None:
            kalman_row[1] = sum(kalman_time[1:-1])/(len(data_loader)-2)
        else:
            #Ignore first iteration as CUDA is often slower on the first pass, ignore the last iteration incase it had a different size
            row[1] = sum(pf_time[1:-1])/(len(data_loader)-2)
            row[2] = sum(state_error)/size
            row[3] = sum(likelihood_error)/size
    else:
        if K is None:
            kalman_row[0] = sum(kalman_time[1:-1])/(len(data_loader)-2)
        else:

            row[0] = sum(pf_time[1:-1])/(len(data_loader)-2)
    
    if not K is None:
        results_df.loc[row_label] = row
    results_df.loc['Kalman Filter'] = kalman_row
    results_df.to_csv(result_path)
print(results_df)
        
        
    
       

100%|██████████| 20/20 [00:58<00:00,  2.93s/it]
100%|██████████| 20/20 [01:10<00:00,  3.55s/it]
100%|██████████| 20/20 [00:33<00:00,  1.66s/it]
100%|██████████| 20/20 [01:37<00:00,  4.85s/it]

               Time CPU (s)   Time GPU (s)   epsilon x   epsilon y
Kalman Filter      1.220324       1.712297    0.000000    0.000000
PF K = 25          1.313197       1.200860    3.832700    0.139985
PF K = 100         3.263150       1.437558    1.066567    0.070517
PF K = 1000       21.502048       0.645984    0.114204    0.022389
PF K = 10000     196.095725       3.918857    0.011712    0.007090



