# DeepMind Lab Maze experiments

Before this notebook can be run and the models evaluated the data must be prepared. Ensure you have completed all the steps outlined in dm_setup_instructions.txt.

In [None]:
import math
import pathlib
import pydpf
import torch
import dm_model
import dm_neural_networks
import dm_training
import time
import pandas as pd
import datetime


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
observation_encoding_size = 128
state_encoding_size = 64
scaling = 1000.
DPF_types = ['DPF', 'Soft', 'Stop-Gradient', 'Marginal Stop-Gradient', 'Optimal Transport', 'Kernel']
deterministic = True
if deterministic:
    results_file = 'results/deep_mind_maze_results.csv'
else:
    results_file = 'results/nondeterministic_deep_mind_maze_results.csv'

In [None]:
def flatten_gens(list_of_gens):
    return [item for gen in list_of_gens for item in gen]
    
def is_in_it(item, it):
    return any(id(item) == id(item_) for item_ in it)
    

def get_SSM():
    """
    Build the model from components
    """
    encoder = dm_neural_networks.ObservationEncoder(observation_encoding_size, generator=cuda_gen, dropout_keep_ratio=0.3)
    decoder = dm_neural_networks.ObservationDecoder(observation_encoding_size, generator=cuda_gen, dropout_keep_ratio=0.3)
    state_encoder = dm_neural_networks.StateEncoder(state_encoding_size, generator=cuda_gen, dropout_keep_ratio=0.6)
    observation_partial_flows = [dm_neural_networks.RealNVP_cond(dim = observation_encoding_size, hidden_dim=observation_encoding_size, condition_on_dim=state_encoding_size, generator = cuda_gen, zero_i=True), dm_neural_networks.RealNVP_cond(dim = observation_encoding_size, hidden_dim=observation_encoding_size, condition_on_dim=state_encoding_size, generator = cuda_gen, zero_i=True)]
    flow_cov = torch.nn.Parameter(torch.eye(observation_encoding_size, device=device)*1, requires_grad=False)
    observation_flow = dm_neural_networks.NormalizingFlowModel_cond(pydpf.MultivariateGaussian(torch.zeros(observation_encoding_size, device=device), cholesky_covariance= flow_cov, diagonal_cov=True, generator=cuda_gen), observation_partial_flows, device)
    observation_model = dm_model.MazeObservation(observation_flow, encoder, decoder, state_encoder, device=device)
    dynamic_cov = torch.diag(torch.tensor([30/(scaling), 30/(scaling), 0.1], device=device))
    dynamic_model = dm_model.MazeDynamic(cuda_gen, dynamic_cov)
    prior_model = dm_model.MazePrior(2*1000/scaling, 1.3*1000/scaling, cuda_gen)
    encoder_parameters = flatten_gens([encoder.parameters(), state_encoder.parameters(), decoder.parameters()])
    flow_parameters = flatten_gens([observation_flow.parameters(), prior_model.parameters()])
    SSM = pydpf.FilteringModel(dynamic_model=dynamic_model, prior_model=prior_model, observation_model=observation_model)
    #print(f'observation encoder ps {sum(p.numel() for p in encoder.parameters())}')
    #print(f'state encoder ps {sum(p.numel() for p in state_encoder.parameters())}')
    #print(f'decoder ps {sum(p.numel() for p in decoder.parameters())}')
    #print(f'observation flow ps {sum(p.numel() for p in observation_flow.parameters())}')
    return SSM, encoder_parameters, flow_parameters, [flow_cov]
            

In [None]:
def transform_control(control, **data):
    output = control/torch.tensor([[[scaling, scaling, 1.]]], device=device)
    return output
    

In [None]:
def normalise_obs(observation, **data):
    return (observation - torch.mean(observation))/torch.std(observation)
    

In [None]:
def get_DPF(SSM):
    if DPF_type == 'DPF':
        return pydpf.DPF(SSM=SSM, resampling_generator=cuda_gen)
    if DPF_type == 'Soft':
        return pydpf.SoftDPF(SSM=SSM, resampling_generator=cuda_gen)
    if DPF_type == 'Stop-Gradient':
        return pydpf.StopGradientDPF(SSM=SSM, resampling_generator=cuda_gen)
    if DPF_type == 'Marginal Stop-Gradient':
        return pydpf.MarginalStopGradientDPF(SSM=SSM, resampling_generator=cuda_gen)
    if DPF_type == 'Optimal Transport':
        return pydpf.OptimalTransportDPF(SSM=SSM, regularisation=1.)
    if DPF_type == 'Kernel':
        Gaussian_kernel = pydpf.StandardGaussian(3, cuda_gen, learn_mean=False, learn_cov=True)
        kernel_mixture = pydpf.KernelMixture(kernel=Gaussian_kernel, generator=cuda_gen)
        return pydpf.KernelDPF(SSM=SSM, kernel=kernel_mixture)
    raise ValueError('DPF_type should be one of the allowed options')

In [None]:
with pydpf.utils.set_deterministic_mode(deterministic, True):
    for DPF_type in DPF_types:
        total_MSE = 0
        total_time = 0
        #repeat five times
        for i in range(1):
            cuda_gen = torch.Generator(device=device).manual_seed(i*10)
            SSM, encoder_params, flow_params, flow_cov = get_SSM()
            dpf = get_DPF(SSM)
            dpf.to(device)
            if DPF_type == 'Kernel':
                opt = torch.optim.AdamW([{'params': encoder_params, 'lr': 0.005}, {'params': flow_params, 'lr': 0.001}, {'params': dpf.resampler.mixture.parameters(), 'lr': 0.001, 'weight_decay': 0}], weight_decay=1e-3, betas=(0.7, 0.98), eps=1e-9)
            else:
                opt = torch.optim.AdamW([{'params': encoder_params, 'lr': 0.005}, {'params': flow_params, 'lr': 0.001}], weight_decay=1e-2, betas=(0.8, 0.99), eps=1e-9)
            opt_scheduler = torch.optim.lr_scheduler.ExponentialLR(opt, gamma=0.99)
            data = pydpf.StateSpaceDataset(data_path= pathlib.Path('.').parent.absolute().joinpath('data/maze_data.csv'), state_prefix='state', control_prefix='control', device = device)
            data.apply(normalise_obs,  'observation')
            scaling_tensor = torch.tensor([[[scaling, scaling, 1.]]], device=device)
            data.apply(lambda state, **data: (state - torch.tensor([[[1000., 650., 0.]]], device=device))/scaling_tensor, 'state')
            data.apply(transform_control, 'control')
            print('Data Loaded')
            start_time = time.time()
            test_mse, _ = dm_training.train(dpf, opt, data, 2, (100, 100, 100), (64, 64, 64), (0.45, 0.2, 0.35), (1., 1., 1.), torch.Generator().manual_seed(i*10), None, 'MSE', 99, pre_train_epochs=0, device=device, lr_scheduler=opt_scheduler, state_scaling = scaling)
            total_MSE += test_mse
            total_time += time.time() - start_time
        MSE = total_MSE / 5
        runtime = total_time/5
        result_path = pathlib.Path('.').parent.absolute().joinpath(results_file)
        results = pd.read_csv(result_path, index_col=0)
        row = [str(datetime.timedelta(seconds=runtime)), math.sqrt(MSE)]
        results.loc[DPF_type] = row
        print(results)
        results.to_csv(result_path)
