In [1]:
import pydpf
import numpy as np
import torch
import pathlib
import model
from tqdm import tqdm
from time import time
from training_loop import train
import pandas as pd

In [2]:
DPF_type = 'Optimal Transport'
n_repeats = 10
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
def get_DPF(SSM):
    if DPF_type == 'DPF':
        return pydpf.DPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Soft':
        return pydpf.SoftDPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Stop-Gradient':
        return pydpf.StopGradientDPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Marginal Stop-Gradient':
        return pydpf.MarginalStopGradientDPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Optimal Transport':
        return pydpf.OptimalTransportDPF(SSM=SSM, regularisation=10., transport_gradient_clip=1.)
    if DPF_type == 'Kernel':
        kernel = pydpf.KernelMixture(pydpf.MultivariateGaussian(torch.zeros(1, device=device),torch.nn.Parameter(torch.eye(1, device=device)*0.1), generator=experiment_cuda_rng), gradient_estimator='reparameterisation', generator=experiment_cuda_rng)
        return pydpf.KernelDPF(SSM=SSM, kernel=kernel)
    raise ValueError('DPF_type should be one of the allowed options')

In [4]:
experiment_cuda_rng = torch.Generator(device).manual_seed(0)
generation_rng = torch.Generator(device).manual_seed(0)
aggregation_function_dict = {'ELBO': pydpf.LogLikelihoodFactors()}
test_dataset = pydpf.StateSpaceDataset(data_path=pathlib.Path('.').parent.absolute().joinpath('test_trajectory.csv'), state_prefix='state', device='cuda')
Gradients = []
size = 0
alpha_p = torch.nn.Parameter(torch.tensor([[0.93]], dtype = torch.float32, device=device))
SSM = model.make_SSM(torch.tensor([[1.]], device=device), alpha_p, torch.tensor([0.5], device=device), device)
DPF = get_DPF(SSM)
forward_time = []
backward_time = []
state = test_dataset.state[:,0:1].expand((101, 100, 1)).contiguous()
observation = test_dataset.observation[:,0:1].expand((101, 100, 1)).contiguous()
for i in tqdm(range(20)):
    DPF.update()
    size += state.size(1)
    start = time()
    outputs = DPF(observation=observation, n_particles=100, ground_truth=state, aggregation_function=aggregation_function_dict, time_extent=100)
    ls = torch.mean(outputs['ELBO'], dim=0)
    loss = ls.mean()
    forward_time.append((time() - start))
    start = time()
    loss.backward(retain_graph=True)
    backward_time.append((time() - start))
    alpha_p.grad = None
    for i in range(len(ls)):
        ls[i].backward(retain_graph=True)
        Gradients.append(alpha_p.grad.item())
        alpha_p.grad = None
    loss.backward()

100%|██████████| 20/20 [03:49<00:00, 11.49s/it]


In [5]:
alphas = np.empty(n_repeats)
data_path = pathlib.Path('.').parent.absolute().joinpath('data.csv')
for n in range(n_repeats):
    experiment_cuda_rng = torch.Generator(device).manual_seed(n*10)
    experiment_cpu_rng = torch.Generator().manual_seed(n*10)
    generation_rng = torch.Generator(device).manual_seed(n*10)
    true_SSM = model.make_SSM(torch.tensor([[1.]], device=device), torch.tensor([[0.91]], device=device), torch.tensor([0.5], device=device), device, generation_rng)
    pydpf.simulate_and_save(data_path, SSM=true_SSM, time_extent=1000, n_trajectories=500, batch_size=100, device=device, by_pass_ask=True)
    alpha = torch.nn.Parameter(torch.rand((1,1), device=device, generator=experiment_cuda_rng), requires_grad=True)
    SSM = model.make_SSM(torch.tensor([[1.]], device=device), alpha, torch.tensor([0.5], device=device), device, generation_rng)
    dpf = get_DPF(SSM)
    if DPF_type == 'Kernel':
        opt = torch.optim.SGD([{'params':[alpha], 'lr':0.05}, {'params':dpf.resampler.mixture.parameters(), 'lr':0.01}])
    else:
        opt = torch.optim.SGD([{'params':[alpha], 'lr':0.05}])
    opt_schedule = torch.optim.lr_scheduler.ExponentialLR(opt, 0.95)
    dataset = pydpf.StateSpaceDataset(data_path, state_prefix='state', device=device)
    _, ELBO = train(dpf, opt, dataset, 10, (100, 100, 100), (30, 100, 100), (0.5, 0.25, 0.25), 1., experiment_cpu_rng, target='ELBO', time_extent=100, lr_scheduler=opt_schedule)
    print(alpha)
    alphas[n] = alpha


Done                  

epoch 1/10, train loss: 1.2539286589622498, validation MSE: 1.8500530242919921, validation ELBO: 115.39723510742188
epoch 2/10, train loss: 1.0382283973693847, validation MSE: 1.5686058282852173, validation ELBO: 111.18718109130859
epoch 3/10, train loss: 1.019157919883728, validation MSE: 1.5606566429138184, validation ELBO: 110.1022216796875
epoch 4/10, train loss: 1.0220255827903748, validation MSE: 1.5390944719314574, validation ELBO: 109.48365173339843
epoch 5/10, train loss: 1.0221030163764953, validation MSE: 1.5481766700744628, validation ELBO: 109.30546264648437
epoch 6/10, train loss: 1.0181823778152466, validation MSE: 1.5457442283630372, validation ELBO: 110.65148315429687
epoch 7/10, train loss: 1.0196914768218994, validation MSE: 1.5513216495513915, validation ELBO: 109.69273681640625
epoch 8/10, train loss: 1.0222209811210632, validation MSE: 1.593090581893921, validation ELBO: 110.94208068847657
epoch 9/10, train loss: 1.026155698299408, validati

In [6]:
result_path = pathlib.Path('.').parent.absolute().joinpath('single_parameter_results.csv')
results = pd.read_csv(result_path, index_col=0)
row = np.array([sum(forward_time[1:-1])/(len(forward_time)-2), sum(backward_time[1:-1])/(len(backward_time)-2), np.sqrt(np.var(Gradients)), np.mean(np.abs(alphas - 0.91))])
results.loc[DPF_type] = row
print(results)
results.to_csv(result_path)

                        Forward Time (s)  Backward Time (s)  \
method                                                        
DPF                             0.186978           0.053568   
Soft                            0.236972           0.112147   
Stop-Gradient                   0.175371           0.078364   
Marginal Stop-Gradient          0.131526           0.049114   
Optimal Transport               1.089317           0.107106   
Kernel                          0.114504           0.085748   

                        Gradient standard deviation  alpha error  
method                                                            
DPF                                        0.034826     0.003685  
Soft                                       0.375484     0.006728  
Stop-Gradient                              1.145910     0.005106  
Marginal Stop-Gradient                     0.477815     0.002993  
Optimal Transport                          0.116350     0.025273  
Kernel                    