In [97]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import json
import gpytorch
import torch
import tqdm
import os
import threading
import concurrent
import logging

from torch.utils.tensorboard import SummaryWriter
from torch.optim.lr_scheduler import StepLR
from matplotlib import pyplot as plt
from time import sleep
from typing import List, Union
from math import ceil
from data import GaussianPreprocessor, SequenceDataset, SequencePredictionDataset, SequenceReconstructionDataset
from models import LSTMFeatureExtractor, GaussianProcessLayer, GPModel

%matplotlib inline
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [98]:
username = os.getenv("USERNAME")

task = 1876652

data_paths= [f"/home/{username}/workspace/dataset_downloader/{task}/processed"]

include_dirs=[
5798514803372,  5798515131052,  5798515442348,  5798515778220,  5798516081324,  5798516392620,
5798514811564,  5798515139244,  5798515450540,  5798515786412,  5798516089516,  5798516400812,
5798514827948,  5798515147436,  5798515458732,  5798515794604,  5798516097708,  5798516409004,
5798514836140,  5798515155628,  5798515466924,  5798515802796,  5798516105900,  5798516417196,
5798514844332,  5798515172012,  5798515475116,  5798515810988,  5798516114092,  5798516425388,
5798514852524,  5798515180204,  5798515483308,  5798515819180,  5798516122284,  5798516433580,
5798514860716,  5798515188396,  5798515491500,  5798515827372,  5798516130476,  5798516441772,
5798514877100,  5798515196588,  5798515499692,  5798515835564,  5798516138668,  5798516449964,
]

backup_dirs =[
5798514885292,  5798515204780,  5798515507884,  5798515843756,  5798516146860,  5798516458156,
5798514893484,  5798515212972,  5798515524268,  5798515851948,  5798516155052,  5798516466348,
5798514909868,  5798515221164,  5798515532460,  5798515860140,  5798516163244,  5798516474540,
5798514918060,  5798515229356,  5798515540652,  5798515868332,  5798516171436,  5798516482732,
5798514926252,  5798515237548,  5798515548844,  5798515876524,  5798516179628,  5798694355628,
5798514934444,  5798515245740,  5798515557036,  5798515884716,  5798516187820,  5798694363820,
5798514950828,  5798515253932,  5798515565228,  5798515892908,  5798516196012,  5798694478508,
5798514959020,  5798515262124,  5798515581612,  5798515901100,  5798516204204,  5798694486700,
5798514967212,  5798515270316,  5798515589804,  5798515909292,  5798516212396,  5798694494892,
5798514975404,  5798515278508,  5798515597996,  5798515917484,  5798516220588,  5798694503084,
5798514983596,  5798515286700,  5798515606188,  5798515925676,  5798516228780,  5798694511276,
5798514999980,  5798515294892,  5798515614380,  5798515933868,  5798516236972,  5798694519468,
5798515008172,  5798515303084,  5798515638956,  5798515942060,  5798516245164,  5798694527660,
5798515016364,  5798515311276,  5798515655340,  5798515950252,  5798516261548,  5798694535852,
5798515024556,  5798515319468,  5798515663532,  5798515958444,  5798516269740,  5798694544044,
5798515032748,  5798515327660,  5798515671724,  5798515966636,  5798516277932,  5798694552236,
5798515040940,  5798515335852,  5798515688108,  5798515974828,  5798516286124,  5798694560428,
5798515049132,  5798515344044,  5798515696300,  5798515983020,  5798516302508,  5798694568620,
5798515057324,  5798515352236,  5798515704492,  5798515991212,  5798516310700,  5798694576812,
5798515065516,  5798515360428,  5798515712684,  5798515999404,  5798516318892,  5798694585004,
5798515073708,  5798515368620,  5798515720876,  5798516007596,  5798516327084,  5798694617772,
5798515081900,  5798515385004,  5798515729068,  5798516015788,  5798516335276,  5798694625964,
5798515090092,  5798515393196,  5798515737260,  5798516023980,  5798516343468,  5798694634156,
5798515098284,  5798515401388,  5798515745452,  5798516032172,  5798516351660,  5798694642348,
5798515106476,  5798515409580,  5798515753644,  5798516048556,  5798516368044,
5798515114668,  5798515417772,  5798515761836,  5798516056748,  5798516376236,
5798515122860,  5798515434156,  5798515770028,  5798516064940,  5798516384428,  
]

include_dirs = set(include_dirs)

include_dirs = [str(x) for x in include_dirs]

In [99]:
# features = ['engineRpm', 'speedMps', 'driveMotorTorqueNm', 'throttle', 'brake', "linearVelocityVrf_y", "linearAccelerationVrf_y"]
# normalize_features = ['engineRpm', 'speedMps', 'driveMotorTorqueNm', 'throttle', 'brake', "linearVelocityVrf_y", "linearAccelerationVrf_y"]
# onehot_features = None
# lb = np.array([0.000000, 0.000000, -70.000000, 0.0, 0.0, -0.3, -10, -0.3, -10])
# ub = np.array([4300.0, 24.0, 255.000000, 100.0, 100.0, 22.5, 10, 22.5, 10])

features = {"env_learning_zip/chassis.txt": ['engineRpm', 'speedMps', 'driveMotorTorqueNm'],
            "env_learning_zip/localization.txt":  ["linearVelocityVrf_y", "linearAccelerationVrf_y"],
            "env_learning_zip/control.txt":  ["throttle", "brake"]
           }

normalize_features = ['engineRpm', 'speedMps', 'driveMotorTorqueNm', "linearVelocityVrf_y", "linearAccelerationVrf_y", "throttle"]
onehot_features = None
lb = np.array([0.000000, 0.000000, -70.000000, -0.3, -10.0, -100.0])
ub = np.array([4300.0, 24.0, 255.000000, 22.5, 5.0, 100.0, ])

In [100]:
class Args:
    def __init__(self, in_memory=False, workers=16, features=None):
        self.in_memory = in_memory
        self.workers = workers
        self.features = features

args = Args(in_memory=False,  workers=16, features=features)

In [101]:
device = torch.device("cpu")
if torch.cuda.is_available():
    device = torch.device("cuda")

In [102]:
input_size = 5
hidden_size = 64
batch_size = 100
seq_length = 10
bidirectional = False

In [6]:
preprocessor = GaussianPreprocessor(data_paths, 
                                    features=features, 
                                    normalize_features=normalize_features,
                                    lb=lb,
                                    ub=ub
                                   )

dataset = SequenceReconstructionDataset(data_paths, 
                                        batch_size=batch_size,
                                        seq_length=seq_length,
                                        preprocessor=preprocessor, 
                                        include_dirs=include_dirs,
                                        args=args,
                                        files=["env_learning_zip/chassis.txt", 
                                               "env_learning_zip/localization.txt", 
                                               "env_learning_zip/control.txt"],
                                       )

100%|███████████████████████████████████████████████████████████████| 48/48 [00:56<00:00,  1.17s/it]


In [103]:
feature_extractor = LSTMFeatureExtractor(input_size, hidden_size, bidirectional=bidirectional)

In [104]:
print(dataset.sample())

tensor([[[ 0.1629,  0.8797, -0.3723,  0.9141,  0.3956,  0.1549],
         [ 0.1628,  0.8797, -0.3723,  0.9137,  0.4105,  0.1174],
         [ 0.1627,  0.8780, -0.3538,  0.9132,  0.4286,  0.1496],
         [ 0.1624,  0.8780, -0.3538,  0.9126,  0.4399,  0.1406],
         [ 0.1622,  0.8780, -0.3538,  0.9126,  0.4234,  0.1349],
         [ 0.1622,  0.8780, -0.3538,  0.9128,  0.4032,  0.1571],
         [ 0.1622,  0.8780, -0.3538,  0.9132,  0.3657,  0.1382],
         [ 0.1622,  0.8780, -0.3354,  0.9138,  0.3370,  0.1498],
         [ 0.1626,  0.8780, -0.3354,  0.9136,  0.3281,  0.1738],
         [ 0.1630,  0.8780, -0.3169,  0.9129,  0.3332,  0.1089]]],
       dtype=torch.float64)


In [108]:
class DKLModel(gpytorch.Module):
    def __init__(self, feature_extractor, feature_size, grid_bounds=(-1, 1), device=torch.device('cpu')):
        super(DKLModel, self).__init__()
        self.feature_extractor = feature_extractor
        self.inducing_points = torch.randn(1024, feature_size, dtype=torch.float).to(device)
        self.gp_layer = GPModel(inducing_points=self.inducing_points)
        self.grid_bounds = grid_bounds
        
        self.scale_to_bounds = gpytorch.utils.grid.ScaleToBounds(self.grid_bounds[0], self.grid_bounds[1])
        
    def forward(self, x):
        features = self.feature_extractor(x)[:, -1, :]
        features = self.scale_to_bounds(features)
        res = self.gp_layer(features)
        return res
    


In [109]:
model = DKLModel(feature_extractor, feature_size=hidden_size, device=device)
likelihood = gpytorch.likelihoods.GaussianLikelihood()


# If you run this example without CUDA, I hope you like waiting!
if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

In [None]:
smoke_test = ('CI' in os.environ)
num_epochs = 1 if smoke_test else 300
num_batches = 100


model.train()
likelihood.train()

optimizer = torch.optim.Adam([
    {'params': model.parameters()},
    {'params': likelihood.parameters()},
], lr=0.1)


scheduler = StepLR(optimizer, step_size=100, gamma=0.1)

# Our loss object. We're using the VariationalELBO, which essentially just computes the ELBO
# mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
mll = gpytorch.mlls.VariationalELBO(likelihood, model.gp_layer, num_data=batch_size)
# mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=1000)


writer = SummaryWriter()


# We use more CG iterations here because the preconditioner introduced in the NeurIPS paper seems to be less
# effective for VI.
epochs_iter = tqdm.tqdm(range(num_epochs), desc="Epoch")
for i in epochs_iter:
    # Within each iteration, we will go over each minibatch of data
    
    optimizer.zero_grad()
    
    start_index = 0
    
    losses = []
    
    batch_index = 0
    for batch in dataset.batches():
        if batch_index == num_batches:
            break
        batch_index += 1
        
        target = batch[:, -2, -1].to(device, dtype=torch.float)
        state = batch[:, :, :-1].to(device, dtype=torch.float)
        output = model(state)
        loss = -mll(output, target)
        loss.backward()
        losses.append(loss.item())
        writer.add_scalar('train MAE', torch.mean(torch.abs(output.mean - target)))
        
    total_loss = sum(losses) / num_batches
    epochs_iter.set_postfix(loss=total_loss)
    writer.add_scalar('Loss/train', total_loss, i)
    optimizer.step()
    scheduler.step()
    torch.cuda.empty_cache()
    
writer.close()

Epoch:  12%|████▍                               | 37/300 [8:07:22<50:56:23, 697.28s/it, loss=-.0572]