## Install the package dependencies before running this notebook

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
from glob import glob

"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

'\n    number of trajectories in each city\n    # austin --  train: 43041 test: 6325 \n    # miami -- train: 55029 test:7971\n    # pittsburgh -- train: 43544 test: 6361\n    # dearborn -- train: 24465 test: 3671\n    # washington-dc -- train: 25744 test: 3829\n    # palo-alto -- train:  11993 test:1686\n\n    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds\n    \n'

## Create a Torch.Dataset class for the training dataset

In [2]:
from glob import glob
import pickle
import numpy as np

ROOT_PATH = "./"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def get_city_trajectories(city="palo-alto", split="train", normalized=False):
    f_in = ROOT_PATH + split + "/" + city + "_inputs"
    inputs = pickle.load(open(f_in, "rb"))
    inputs = np.asarray(inputs)
    
    outputs = None
    
    if split=="train":
        f_out = ROOT_PATH + split + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)

    return inputs, outputs

def get_training_trajectories(normalized=False):
    """ 
    Get the training trajectories of all cities.
    This is useful for if we wish to ignore the city features. 
    """
    first_iter = True
    cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
    for city in cities:
        print(city)
        # Assign initial array
        if first_iter:
            first_iter = False
            inputs, outputs = get_city_trajectories(city, split='train', normalized=normalized)
            continue
        # Get city's trajectories
        city_in, city_out = get_city_trajectories(city, split='train', normalized=normalized)
        #print(city_in.shape, city_out.shape)
        inputs = np.concatenate([inputs, city_in])
        outputs = np.concatenate([outputs, city_out])
        
    #print(inputs.shape)
    #print(outputs.shape)
    return inputs, outputs


def get_test_coords(normalized=False):
    """ Retrieve only the test data, used for submission into the csv set. """
    first_iter = True
    cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
    for city in cities:
        # Assign initial array
        if first_iter:
            first_iter = False
            inputs, outputs = get_city_trajectories(city, split='test', normalized=normalized)
            continue
        # Get city's trajectories
        city_in, outputs = get_city_trajectories(city, split='test', normalized=normalized)
        inputs = np.concatenate([inputs, city_in])
    return inputs


class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, city: str, split:str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform

        self.inputs, self.outputs = get_city_trajectories(city=city, split=split, normalized=False)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):

        data = (self.inputs[idx], self.outputs[idx])
            
        if self.transform:
            data = self.transform(data)

        return data

class ArgoverseFullDataset(Dataset):
    """ Dataset class for Argoverse. Uses inputs from all cities in contrast to ArgoverseDataset's single city. """
    def __init__(self, transform=None):
        super(ArgoverseFullDataset, self).__init__()
        self.transform = transform

        self.inputs, self.outputs = get_training_trajectories()

    def __len__(self):
        return self.inputs.shape[0]

    def __getitem__(self, idx):

        data = (self.inputs[idx], self.outputs[idx])
            
        if self.transform:
            data = self.transform(data)

        return data
    
    
# intialize a dataset
city = 'palo-alto' 
split = 'train'
train_dataset  = ArgoverseDataset(city = city, split = split)

In [3]:
train_dataset = ArgoverseFullDataset()

austin
miami
pittsburgh
dearborn
washington-dc
palo-alto


## Create a DataLoader class for training

In [4]:
train_set_size = int(np.floor(len(train_dataset) * .90))
train_set_size

183434

In [5]:
test_set_size = len(train_dataset) - train_set_size
test_set_size

20382

In [6]:
# Create test and validation set
train_data, validation_data = torch.utils.data.random_split(train_dataset, [train_set_size, test_set_size])

In [None]:
batch_sz = 5000  # batch size 
train_set = DataLoader(train_data, batch_size=batch_sz)
test_set = DataLoader(validation_data, batch_size=2500)

## Sample a batch of data and visualize 

In [8]:
import matplotlib.pyplot as plt
import random


def show_sample_batch(sample_batch):
    """visualize the trajectory for a batch of samples"""
    inp, out = sample_batch
    batch_sz = inp.size(0)
    agent_sz = inp.size(1)
    
    fig, axs = plt.subplots(1,batch_sz, figsize=(15, 3), facecolor='w', edgecolor='k')
    fig.subplots_adjust(hspace = .5, wspace=.001)
    axs = axs.ravel()   
    for i in range(batch_sz):
        #axs[i].xaxis.set_ticks([]) # These will now show ticks
        #axs[i].yaxis.set_ticks([])
        
        # first two feature dimensions are (x,y) positions
        axs[i].scatter(inp[i,:,0], inp[i,:,1])
        axs[i].scatter(out[i,:,0], out[i,:,1])
    print('show')
    plt.show()
# This doesn't display well with batch sizes over like 10
'''
ex_inp = 0       
for i_batch, sample_batch in enumerate(train_set):
    inp, out = sample_batch
    ex_inp = inp
    """
    TODO:
      implement your Deep learning model
      implement training routine
    """
    show_sample_batch(sample_batch)
    break
'''

'\nex_inp = 0       \nfor i_batch, sample_batch in enumerate(train_set):\n    inp, out = sample_batch\n    ex_inp = inp\n    """\n    TODO:\n      implement your Deep learning model\n      implement training routine\n    """\n    show_sample_batch(sample_batch)\n    break\n'





# MLP design

In this Multilayer perceptron model, I will attempt to use the previous 50 steps to predict the next step 60 times. This contrasts the MLP model that predicts the next 60 steps given 50 steps.

In [9]:
def transformation(data):
    """ 
    This transformation will take a 
    1x50x2 input tensor and 1x60x2 output tensor 
    to create a 60x50x2 input tensor and a 1x60x2 output tensor.
    This is done so that the MLP will be training on only "1 step" predictions
    by using the previous predictions to make a more accurate prediction.
    This should help with the problem that MLP 60 step predictions are not related by any matter.
    """
    inputs = data[0]
    outputs = data[1]
    
    new_inputs = []
    # Create new input tensors
    for i in range(60):
        # Clip portion of the inputs in order to concatenate output coordinates
        input_clip = inputs[i:]
        output_clip = outputs[max(0, i - 50):i]
        

        # Attach the coordinates together
        new_input = np.concatenate([
            input_clip,
            output_clip
        ])
        
        #print(i, new_input.shape)
        new_inputs.append(new_input)
        
    # Output is just the coordinate at given index
    outputs = torch.flatten(torch.tensor(outputs), start_dim = 0, end_dim=0)
    return (torch.tensor(new_inputs), outputs)
    

In [10]:
import torch
from torch import nn
from torch.nn import functional as F

In [11]:
class RNNModel(nn.Module):
    """The RNN model."""
    def __init__(self, rnn_layer, **kwargs):
        super(RNNModel, self).__init__(**kwargs)
        # RNN layer for processing input and giving hidden state
        self.rnn = rnn_layer
        # Input size
        self.input_size = self.rnn.input_size
        self.num_hiddens = self.rnn.hidden_size
        self.layers = self.rnn.num_layers
        self.linear = nn.Linear(self.num_hiddens, self.input_size)

    def forward(self, inputs, state):
        """
        Forward pass of inputs to the rnn. 
        Inputs should be an input tensor in the shape 
        (batch_size, sequence_length (50), input_size of each coordinate (2 minimum))
        State should be a tuple containing hidden state and candidate memory
        (num_layers, batch_size, num_hiddens) for both
        """
        X = inputs.to(torch.float32)
        Y, state = self.rnn(X, state)
        # The fully connected layer will first change the shape of `Y` to
        # (`num_steps` * `batch_size`, `num_hiddens`). Its output shape is
        # (`num_steps` * `batch_size`, `vocab_size`).
        output = self.linear(Y.reshape((-1, Y.shape[-1])))
        return output, state

    def begin_state(self, batch_size, device):
        if not isinstance(self.rnn, nn.LSTM):
            # `nn.GRU` takes a tensor as hidden state
            return  torch.zeros((self.num_directions * self.rnn.num_layers,
                                 batch_size, self.num_hiddens),
                                device=device)
        else:
            # `nn.LSTM` takes a tuple of hidden states, one for hidden state and one for candidate
            # Shape is 10 * * hidden_size 
            return (
                [torch.zeros((self.rnn.num_layers, batch_size, self.num_hiddens), device=device),
                torch.zeros((self.rnn.num_layers, batch_size, self.num_hiddens), device=device)]
            )
    
    def get_input_size():
        return self.input_size


In [12]:
def get_loss(net, test_iter, loss_fn, device):
    state = None
    input_size = net.input_size
    losses = []
    for X, y in test_iter:
        batch_size = X.shape[0]
        # ReInitialize state
        state = net.begin_state(X.shape[0], device)
        # Warm up states
        last_X = X[:, -1:, :].detach().to(device)
        X = X[:, :-1, :].detach().float()
        X, y = X.to(device), y.to(device)
        Y_hat, state = net(X, state)
        # Start grading loss
        predictions, state = net(last_X, state)
        predictions = predictions.reshape((batch_size, 1, input_size))
        prev_in = predictions
        #print(predictions.shape)
        for i in range(59): 
            # Keep passing in the arg
            # Predict output
            y_hat_new, state = net(prev_in, state)
            #print(y_hat_new.shape)
            y_hat_new = y_hat_new.reshape((batch_size, 1, input_size)) 
            #print(y_hat_new.shape)
            # Concatenate to tensor of predictions
            predictions = torch.cat([predictions, y_hat_new.reshape((batch_size, 1, input_size))], dim=1)
            # Previous out becomes new in
            #print(predictions.shape)
            prev_in = y_hat_new
            
        # Calculate loss
        loss = loss_fn(y, predictions)
        losses.append(loss.item())
    return np.mean(losses)

In [13]:
import math
def train_epoch_ch8(net, train_iter, loss, updater, device):
    """Train a net within one epoch (defined in Chapter 8)."""
    state = None
    batch_losses = []
    input_size = net.input_size
    batch_count = 0
    for X, Y in train_iter:
        state = None
        l = 0
        batch_size = X.shape[0]
        if state is None:
            # Initialize `state` when either it is the first iteration or
            # using random sampling
            state = net.begin_state(batch_size, device)
            #print(state)
        else:
            
            if isinstance(net, nn.Module) and not isinstance(state, tuple):
                # `state` is a tensor for `nn.GRU`
                state.detach_()
            else:
                # `state` is a tuple of tensors for `nn.LSTM` and
                # for our custom scratch implementation
                for s in state:
                    s.detach_()
            #print(state[0].shape, batch_size)
            if state[0].shape[1] != batch_size:
                state = (state[0][:, :batch_size, :],
                        state[1][:, :batch_size, :])
        # Reshape outputs to match prediction
        y = Y.float()#.reshape((-1, 2)).float()#.T.reshape(-1)
        # Transform X to take the first 49 inputs
        last_X = X[:, -1:, : ].to(device)
        X = X[:, :-1, :].float()
        #print(X.shape)
        #print(Y.shape)
        #print(last_X.shape)
        X, y = X.to(device), y.to(device)
        # Ensure state is to device
        # Forward pass x to warm up
        y_hat, state = net(X, (state[0], state[1]))
        
        
        # Get last prediction as it is an output we need
        y_hat, state = net(last_X, state) # Should be form (batch, 1, 2)
        y_hat = y_hat.reshape((batch_size, 1, input_size))
        prev_in = y_hat
        # Build prediction tensor
        for i in range(59):
            # Predict output
            y_hat_new, state = net(prev_in, state)
            y_hat_new = y_hat_new.reshape((batch_size, 1, input_size)) 
            
            # Concatenate to tensor of predictions
            y_hat = torch.cat([y_hat, y_hat_new.reshape((batch_size, 1, input_size))], dim=1)
            #print('y_hat', y_hat.shape)
            #print('y_hat_new', y_hat_new.shape)
            #print('y shape', y.shape)
            # Output becomes input
            prev_in = y_hat_new
            
        # Calculate loss
        l = loss(y_hat.float(), y)
        # Add for stats
        batch_losses.append(l.item())
        
        # Backpropagate the loss
        if isinstance(updater, torch.optim.Optimizer):
            updater.zero_grad()
            l.backward()
            #grad_clipping(net, 1)
            updater.step()
        else:
            l.backward()
            #grad_clipping(net, 1)
            # Since the `mean` function has been invoked
            updater(batch_size=1)
        #metric.add(l * y.numel(), y.numel())
        
        batch_count += 1
        #if batch_count % 1 == 0:
            #print('batch', batch_count, 'loss', l.item())
    return np.mean(batch_losses)

In [14]:
def predict(net, pred_iter, device):
    # Get 
    state = None
    input_size = net.input_size
    all_predictions = None
    for X, y in pred_iter:
        batch_size = X.shape[0]
        print(state[0].shape, X.shape)
        # ReInitialize state or trim state
        if state == None:
            state = net.begin_state(batch_size=batch_size, device=device)
            state[0] = state[0].to(device)
            state[1] = state[1].to(device)
        elif state[0].shape[1] != batch_size:
            state = (state[0][:, :batch_size, :],
                    state[1][:, :batch_size, :])
        # Warm up states
        last_X = X[:, -1:, :].detach().to(device)
        X = X[:, :-1, :].detach().float()
        X, y = X.to(device), y.to(device)
        Y_hat, state = net(X, state)
        # Start grading loss
        predictions, state = net(last_X, state)
        predictions = predictions.reshape((batch_size, 1, input_size))
        prev_in = predictions
        #print(predictions.shape)
        for i in range(59): 
            # Keep passing in the arg
            # Predict output
            y_hat_new, state = net(prev_in, state)
            #print(y_hat_new.shape)
            y_hat_new = y_hat_new.reshape((batch_size, 1, input_size)) 
            #print(y_hat_new.shape)
            # Concatenate to tensor of predictions
            predictions = torch.cat([predictions, y_hat_new.reshape((batch_size, 1, input_size))], dim=1)
            # Previous out becomes new in
            #print(predictions.shape)
            prev_in = y_hat_new
            
        # Concatenate predictions
        if all_predictions == None:
            all_predictions = predictions
        else:
            all_predictions = torch.concat([all_predictions, predictions])
    return all_predictions

In [15]:
def train_ch8(net, train_iter, test_iter, lr, num_epochs, device,
              use_random_iter=False):
    """Train a model (defined in Chapter 8)."""
    loss = nn.MSELoss()
    # Optimizer for sgd
    updater = torch.optim.SGD(net.parameters(), lr)
    # Train and predict
    epoch_train_err = []
    epoch_val_err = []
    for epoch in range(num_epochs):
        total_loss = train_epoch_ch8(
            net, train_iter, loss, updater, device)
        val_err = get_loss(net, test_iter, loss, device)
        print(f'(train) epoch {epoch + 1}, train err {total_loss}, val err {val_err}')
        # Get training error
        epoch_val_err.append(val_err)

In [16]:
rnn_lstm = torch.nn.LSTM(
    input_size=2,
    hidden_size=512,
    num_layers=1,
    batch_first=True
)
net = RNNModel(rnn_lstm)
net.cuda()

RNNModel(
  (rnn): LSTM(2, 512, batch_first=True)
  (linear): Linear(in_features=512, out_features=2, bias=True)
)

In [17]:
num_epochs, lr = 100, .00001
loss_fn = nn.MSELoss()
# Optimizer for sgd
#updater = torch.optim.SGD(net.parameters(), lr)
device = torch.device('cuda')
# Train and predict
#loss = train_epoch_ch8(net, train_set, loss_fn, updater, device)
#print(f'(train) epoch, loss {loss}')

In [18]:
train_ch8(net, train_set, test_set, lr, num_epochs, device)

(train) epoch 1, train err 10060114.716216216, val err 8753210.556068666
(train) epoch 2, train err 7762870.304054054, val err 6955004.826928768
(train) epoch 3, train err 6509417.128378378, val err 6050359.434161817
(train) epoch 4, train err 5902137.878378378, val err 5653005.600592541
(train) epoch 5, train err 5609017.081081081, val err 5183673.302734093
(train) epoch 6, train err 5423720.277027027, val err 5170969.667208303
(train) epoch 7, train err 5336500.824324325, val err 4999650.346765022
(train) epoch 8, train err 554349773847.8851, val err 286878519165.1898
(train) epoch 9, train err 225684560204.1081, val err 328615712421.67377
(train) epoch 10, train err 439235973341.4054, val err 633874913456.038
(train) epoch 11, train err 989006122897.2972, val err 839728756890.4042
(train) epoch 12, train err 1127655580312.2163, val err 736524867730.312
(train) epoch 13, train err 10882649310512.432, val err 5604146997504.623
(train) epoch 14, train err 6237884503676.541, val err 721

KeyboardInterrupt: 

In [None]:
#get_loss(net, test_set, nn.MSELoss(), torch.device('cpu'))

In [None]:
#predict(net, test_set, device)