In [1]:
import pydpf
import numpy as np
import torch
import pathlib
import model
from tqdm import tqdm
from time import time
from training_loop import train
import pandas as pd

In [2]:
DPF_type = 'Optimal Transport'
n_repeats = 10
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
def get_DPF(SSM):
    if DPF_type == 'DPF':
        return pydpf.DPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Soft':
        return pydpf.SoftDPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Stop-Gradient':
        return pydpf.StopGradientDPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Marginal Stop-Gradient':
        return pydpf.MarginalStopGradientDPF(SSM=SSM, resampling_generator=experiment_cuda_rng)
    if DPF_type == 'Optimal Transport':
        return pydpf.OptimalTransportDPF(SSM=SSM, regularisation=0.5, transport_gradient_clip=1.)
    if DPF_type == 'Kernel':
        kernel = pydpf.KernelMixture(pydpf.MultivariateGaussian(torch.zeros(1, device=device),torch.nn.Parameter(torch.eye(1, device=device)*0.1), generator=experiment_cuda_rng), generator=experiment_cuda_rng)
        return pydpf.KernelDPF(SSM=SSM, kernel=kernel)
    raise ValueError('DPF_type should be one of the allowed options')

In [4]:
experiment_cuda_rng = torch.Generator(device).manual_seed(0)
generation_rng = torch.Generator(device).manual_seed(0)
aggregation_function_dict = {'ELBO': pydpf.LogLikelihoodFactors()}
test_dataset = pydpf.StateSpaceDataset(data_path=pathlib.Path('.').parent.absolute().joinpath('test_trajectory.csv'), state_prefix='state', device='cuda')
Gradients = []
size = 0
alpha_p = torch.nn.Parameter(torch.tensor([[0.93]], dtype = torch.float32, device=device))
SSM = model.make_SSM(torch.tensor([[1.]], device=device), alpha_p, torch.tensor([0.5], device=device), device)
DPF = get_DPF(SSM)
forward_time = []
backward_time = []
state = test_dataset.state[:,0:1].expand((101, 100, 1)).contiguous()
observation = test_dataset.observation[:,0:1].expand((101, 100, 1)).contiguous()
for i in tqdm(range(20)):
    DPF.update()
    size += state.size(1)
    start = time()
    outputs = DPF(observation=observation, n_particles=100, ground_truth=state, aggregation_function=aggregation_function_dict, time_extent=100)
    ls = torch.mean(outputs['ELBO'], dim=0)
    loss = ls.mean()
    forward_time.append((time() - start))
    start = time()
    loss.backward(retain_graph=True)
    backward_time.append((time() - start))
    alpha_p.grad = None
    for i in range(len(ls)):
        ls[i].backward(retain_graph=True)
        Gradients.append(alpha_p.grad.item())
        alpha_p.grad = None
    loss.backward()

  5%|▌         | 1/20 [00:20<06:24, 20.23s/it]


KeyboardInterrupt: 

In [5]:
alphas = np.empty(n_repeats)
data_path = pathlib.Path('.').parent.absolute().joinpath('data.csv')
for n in range(n_repeats):
    experiment_cuda_rng = torch.Generator(device).manual_seed(n*10)
    experiment_cpu_rng = torch.Generator().manual_seed(n*10)
    generation_rng = torch.Generator(device).manual_seed(n*10)
    true_SSM = model.make_SSM(torch.tensor([[1.]], device=device), torch.tensor([[0.91]], device=device), torch.tensor([0.5], device=device), device, generation_rng)
    pydpf.simulate_and_save(data_path, SSM=true_SSM, time_extent=1000, n_trajectories=500, batch_size=100, device=device, bypass_ask=True)
    alpha = torch.nn.Parameter(torch.rand((1,1), device=device, generator=experiment_cuda_rng), requires_grad=True)
    SSM = model.make_SSM(torch.tensor([[1.]], device=device), alpha, torch.tensor([0.5], device=device), device, generation_rng)
    dpf = get_DPF(SSM)
    if DPF_type == 'Kernel':
        opt = torch.optim.SGD([{'params':[alpha], 'lr':0.05}, {'params':dpf.resampler.mixture.parameters(), 'lr':0.01}])
    else:
        opt = torch.optim.SGD([{'params':[alpha], 'lr':0.05}])
    opt_schedule = torch.optim.lr_scheduler.ExponentialLR(opt, 0.95)
    dataset = pydpf.StateSpaceDataset(data_path, state_prefix='state', device=device)
    _, ELBO = train(dpf, opt, dataset, 10, (100, 100, 100), (30, 100, 100), (0.5, 0.25, 0.25), 1., experiment_cpu_rng, target='ELBO', time_extent=100, lr_scheduler=opt_schedule)
    print(alpha)
    alphas[n] = alpha


Done                  

epoch 1/10, train loss: 1.3528409481048584, validation MSE: 2.424424648284912, validation ELBO: -130.94684143066405
epoch 2/10, train loss: 1.2742947626113892, validation MSE: 2.8925307750701905, validation ELBO: -145.71783752441405
epoch 3/10, train loss: 1.2814369821548461, validation MSE: 2.5621196746826174, validation ELBO: -134.63070220947264
epoch 4/10, train loss: 1.1772432327270508, validation MSE: 1.763780689239502, validation ELBO: -115.36298828125
epoch 5/10, train loss: 1.1188967895507813, validation MSE: 1.9161650657653808, validation ELBO: -117.99909210205078
epoch 6/10, train loss: 1.164355399608612, validation MSE: 2.470067548751831, validation ELBO: -132.15218505859374
epoch 7/10, train loss: 1.274304714202881, validation MSE: 2.5998330116271973, validation ELBO: -136.25420837402345
epoch 8/10, train loss: 1.247541527748108, validation MSE: 2.5000436782836912, validation ELBO: -133.1541534423828
epoch 9/10, train loss: 1.272823281288147, validat

KeyboardInterrupt: 

In [14]:
result_path = pathlib.Path('.').parent.absolute().joinpath('single_parameter_results.csv')
results = pd.read_csv(result_path, index_col=0)
row = np.array([sum(forward_time[1:-1])/(len(forward_time)-2), sum(backward_time[1:-1])/(len(backward_time)-2), np.sqrt(np.var(Gradients)), np.mean(np.abs(alphas - 0.91))])
results.loc[DPF_type] = row
print(results)
results.to_csv(result_path)

                        Forward Time (s)  Backward Time (s)  \
method                                                        
DPF                             0.171809           0.052404   
Soft                            0.170429           0.078622   
Stop-Gradient                   0.201992           0.078292   
Marginal Stop-Gradient          0.130973           0.053427   
Optimal Transport               1.089317           0.107106   
Kernel                          0.140275           0.078705   

                        Gradient standard deviation  alpha error  
method                                                            
DPF                                        0.034926     0.002912  
Soft                                       0.375278     0.006917  
Stop-Gradient                              1.145910     0.011383  
Marginal Stop-Gradient                     0.477815     0.004123  
Optimal Transport                          0.116350     0.025273  
Kernel                    