## Install the package dependencies before running this notebook

In [97]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
import pandas as pd
from glob import glob


"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

'\n    number of trajectories in each city\n    # austin --  train: 43041 test: 6325 \n    # miami -- train: 55029 test:7971\n    # pittsburgh -- train: 43544 test: 6361\n    # dearborn -- train: 24465 test: 3671\n    # washington-dc -- train: 25744 test: 3829\n    # palo-alto -- train:  11993 test:1686\n\n    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds\n    \n'

## Create a Torch.Dataset class for the training dataset

In [3]:
from glob import glob
import pickle
import numpy as np

ROOT_PATH = "./"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def get_city_trajectories(city="palo-alto", split="train", normalized=False):

    
    outputs = None
    
    if split=="train":
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[:int(n * 0.8)]
        
        f_out = ROOT_PATH + split + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[:int(n * 0.8)]
        
    elif split == 'val':
        f_in = ROOT_PATH + 'train' + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[int(n * 0.8):]
        
        f_out = ROOT_PATH + 'train' + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[int(n * 0.8):]
    
    else:
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)

    return inputs, outputs

class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, city: str, split:str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform

        self.inputs, self.outputs = get_city_trajectories(city=city, split=split, normalized=False)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):

        data = (self.inputs[idx], self.outputs[idx])
            
        if self.transform:
            data = self.transform(data)

        return data

In [19]:
# intialize a dataset
def get_data_loader(city = 'austin', batch_size = 20):
    train_dataset  = ArgoverseDataset(city = city, split = 'train')
    val_dataset = ArgoverseDataset(city = city, split = 'val')

    train_loader = DataLoader(train_dataset,batch_size=batch_size)
    val_loader = DataLoader(val_dataset,batch_size=batch_size)
    return train_dataset, val_dataset, train_loader, val_loader

In [20]:
train_dataset, val_dataset, train_loader, val_loader = get_data_loader(city = 'austin', batch_size = 20)

In [21]:
len(val_dataset)

8609

In [22]:
len(train_dataset)

34432

## Create a DataLoader class for training

In [23]:
from torch import nn, optim

# model
class EncoderDecoder(nn.Module):

    def __init__(self):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(100, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 32)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 120),
            nn.ReLU(),
            nn.Linear(120, 120)
        )
        
    def forward(self, x):
        x = x.reshape(-1, 100).float()
        x = self.encoder(x)
        x = self.decoder(x)
        x = x.reshape(-1, 60, 2)
        return x.float()

In [24]:
# model, optimizer, loss
model_encdoc = EncoderDecoder()
opt = optim.Adam(model_encdoc.parameters(), lr=1e-3)
loss_func = nn.MSELoss()

In [37]:
def train_epochs(model, train_loader, loss_func, opt, epochs = 10):
    for epoch in range(epochs):

        total_loss = 0
        for i_batch, sample_batch in enumerate(train_loader):
            inp, out = sample_batch
            out = out.float()
            preds = model(inp)
            loss = loss_func(preds, out) # MSE
            #loss = torch.sqrt(loss_func(preds, out)) # RMSE

            opt.zero_grad()
            loss.backward()
            opt.step()

            total_loss += loss.item()

        print('epoch {} trian loss: {}'.format(epoch, total_loss / len(train_dataset)))

In [29]:
train_epochs(model_encdoc, train_loader, loss_func, opt, epochs = 10)

epoch 0 loss: 1577.4951438496105
epoch 1 loss: 104.47886275536065
epoch 2 loss: 94.63518537464638
epoch 3 loss: 85.23028913040586
epoch 4 loss: 79.36879324115343
epoch 5 loss: 70.28743940037866
epoch 6 loss: 61.36069848280414
epoch 7 loss: 51.35414551181864
epoch 8 loss: 46.51442320683632
epoch 9 loss: 44.449591387160204


In [31]:
def val_loss(model, val_loader, loss_func, opt):
    val_loss = 0
    for i_batch, sample_batch in enumerate(val_loader):
        inp, out = sample_batch
        out = out.float()
        preds = model(inp)
        loss = loss_func(preds, out) # MSE
        #loss = torch.sqrt(loss_func(preds, out)) # RMSE

        val_loss += loss.item()
    print('val loss: {}'.format(val_loss / len(val_dataset)))

In [32]:
val_loss(model_encdoc, val_loader, loss_func, opt)

val loss: 33.87206649824584


## Train One City

In [40]:
def train_city(city, batch_size, epochs):
    # data loader
    train_dataset, val_dataset, train_loader, val_loader = get_data_loader(city = city, batch_size = batch_size)
    
    # model, optimizer, loss
    model_encdoc = EncoderDecoder()
    opt = optim.Adam(model_encdoc.parameters(), lr=1e-3)
    loss_func = nn.MSELoss()
    
    # train
    train_epochs(model_encdoc, train_loader, loss_func, opt, epochs = epochs)
    
    # evaluate
    val_loss(model_encdoc, val_loader, loss_func, opt)
    return model_encdoc

In [47]:
model_austin = train_city('austin', batch_size = 20, epochs = 10)

epoch 0 trian loss: 1419.9582264272697
epoch 1 trian loss: 108.36184304708885
epoch 2 trian loss: 93.0241262593677
epoch 3 trian loss: 86.77068308738085
epoch 4 trian loss: 77.04915108999798
epoch 5 trian loss: 68.87862812542117
epoch 6 trian loss: 60.82795408312716
epoch 7 trian loss: 47.9808281908248
epoch 8 trian loss: 54.221560459597846
epoch 9 trian loss: 43.66346011950624
val loss: 81.09657288838352


In [102]:
model_miami = train_city('miami', batch_size = 20, epochs = 10)

epoch 0 trian loss: 8094.124991832613
epoch 1 trian loss: 320.8347391701099
epoch 2 trian loss: 308.84381753184095
epoch 3 trian loss: 367.1413703523161
epoch 4 trian loss: 293.7242112523118
epoch 5 trian loss: 227.21703706443532
epoch 6 trian loss: 233.7530184572071
epoch 7 trian loss: 226.3822229227612
epoch 8 trian loss: 204.6684912793255
epoch 9 trian loss: 185.38811065361844
val loss: 771.712787172218


In [43]:
model_pittsburgh = train_city('pittsburgh', batch_size = 20, epochs = 10)

epoch 0 trian loss: 5045.750116832194
epoch 1 trian loss: 88.43685108489707
epoch 2 trian loss: 108.13351118697553
epoch 3 trian loss: 91.35974981173264
epoch 4 trian loss: 85.48789503140077
epoch 5 trian loss: 81.22023427973893
epoch 6 trian loss: 76.83747576869554
epoch 7 trian loss: 71.98684428438378
epoch 8 trian loss: 68.27108403608258
epoch 9 trian loss: 68.16439541742261
val loss: 82.7789422627982


In [44]:
model_dearborn = train_city('dearborn', batch_size = 20, epochs = 10)

epoch 0 trian loss: 21632.358976619395
epoch 1 trian loss: 99.1427164769084
epoch 2 trian loss: 120.78400676755658
epoch 3 trian loss: 142.67611968738882
epoch 4 trian loss: 158.61833270686267
epoch 5 trian loss: 152.53529154766892
epoch 6 trian loss: 137.80558234431044
epoch 7 trian loss: 140.81699421857812
epoch 8 trian loss: 128.49511352524883
epoch 9 trian loss: 123.01828291690926
val loss: 97.97044548560191


In [45]:
model_washington_dc = train_city('washington-dc', batch_size = 20, epochs = 10)

epoch 0 trian loss: 6022.460507663209
epoch 1 trian loss: 62.3682036195989
epoch 2 trian loss: 67.8226058935144
epoch 3 trian loss: 77.34792097970899
epoch 4 trian loss: 71.77336365760037
epoch 5 trian loss: 71.1224920297644
epoch 6 trian loss: 69.99568596648461
epoch 7 trian loss: 65.09404799486181
epoch 8 trian loss: 60.329751778712506
epoch 9 trian loss: 67.02109944598826
val loss: 25.569555072782208


In [46]:
model_palo_alto = train_city('palo-alto', batch_size = 20, epochs = 10)

epoch 0 trian loss: 1706.648500871481
epoch 1 trian loss: 37.13787258247461
epoch 2 trian loss: 41.81229658374999
epoch 3 trian loss: 41.58599702310385
epoch 4 trian loss: 40.477038920590424
epoch 5 trian loss: 39.974332967212206
epoch 6 trian loss: 36.87423752674826
epoch 7 trian loss: 34.91997149797177
epoch 8 trian loss: 34.71355988988203
epoch 9 trian loss: 32.787065855189326
val loss: 35.806601844386144


## Make Prediction

In [103]:
cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
models = [model_austin, model_miami, model_pittsburgh, model_dearborn, model_washington_dc, model_palo_alto]

In [104]:
def predict_test(city, model):
    test_dataset = get_city_trajectories(city = city, split = 'test')[0]
    test_dataset = torch.from_numpy(test_dataset)
    pred = model(test_dataset).reshape(-1, 120)
    return pd.DataFrame(pred.detach().numpy())

In [107]:
columns =['ID'] + ['v'+str(i) for i in range(120)]

In [132]:
def predict_all(cities, models):
    out = predict_test(cities[0], models[0]).reset_index()
    out.columns = columns
    out['ID'] = out['ID'].astype(str) + '_' + cities[0]
    
    for city, model in zip(cities[1:], models[1:]):
        temp = predict_test(city, model).reset_index()
        temp.columns = columns
        temp['ID'] = temp['ID'].astype(str) + '_' + city
        out = pd.concat([out, temp])
    
    return out        

In [135]:
test_output = predict_all(cities, models)

In [138]:
test_output.to_csv('test_output.csv', index=False)

In [137]:
test_output

Unnamed: 0,ID,v0,v1,v2,v3,v4,v5,v6,v7,v8,...,v110,v111,v112,v113,v114,v115,v116,v117,v118,v119
0,0_austin,-37.092140,-558.233093,-36.212402,-559.129822,-39.184959,-559.299316,-37.873959,-558.724426,-36.783634,...,-36.389957,-562.540588,-36.678539,-562.922119,-36.588951,-562.212585,-37.313049,-563.563232,-36.154697,-562.256775
1,1_austin,-355.932068,1.240178,-355.515411,1.933314,-356.613861,0.808810,-358.056946,2.498858,-353.676208,...,-357.212036,-8.838811,-356.911804,-6.350550,-359.835480,-7.254091,-357.790405,-8.638774,-354.693695,-6.785054
2,2_austin,47.580032,-244.318924,47.089760,-244.017761,46.274807,-244.131592,47.400848,-244.522568,47.110691,...,47.509930,-245.574371,47.502926,-245.898819,47.491585,-245.613831,47.236183,-246.294342,47.658653,-245.597809
3,3_austin,-125.622185,1844.922119,-128.412018,1848.106079,-127.492073,1848.432373,-123.330025,1844.409546,-124.582161,...,-114.900162,1804.237305,-118.245995,1803.821533,-116.672668,1803.788086,-117.224403,1794.578979,-119.037430,1803.603394
4,4_austin,1220.163330,-631.054932,1220.815918,-632.809875,1222.928589,-634.556335,1221.447998,-629.966431,1222.253052,...,1210.961426,-635.399231,1213.137939,-630.322144,1211.410889,-631.666687,1211.640747,-634.069946,1208.859375,-631.380981
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1681,1681_palo-alto,-1356.685669,-499.824921,-1347.526245,-503.715088,-1337.406494,-491.999847,-1356.929810,-498.521606,-1347.829834,...,-1345.711060,-502.014801,-1341.127930,-485.718719,-1363.687134,-483.366730,-1342.359009,-491.394043,-1354.453247,-493.610565
1682,1682_palo-alto,132.719818,-33.590569,131.826401,-35.181828,132.523865,-35.435230,133.308319,-35.049389,132.180237,...,132.092789,-33.535801,130.835831,-35.273243,131.078506,-33.510445,131.654358,-33.532181,132.085007,-34.368294
1683,1683_palo-alto,-1440.119385,2197.866455,-1441.860352,2192.417969,-1429.445435,2198.081787,-1439.794312,2192.210449,-1432.607666,...,-1432.318726,2195.731201,-1431.524780,2199.824951,-1431.475708,2206.967773,-1429.082886,2208.507568,-1431.573608,2207.785645
1684,1684_palo-alto,1070.881104,1448.572876,1078.947754,1451.064697,1078.194458,1443.860596,1065.563965,1437.965332,1075.657715,...,1062.080444,1449.426636,1072.670776,1454.047363,1063.268433,1463.544312,1066.539307,1457.791870,1071.422974,1459.067383


## Sample a batch of data and visualize 

In [139]:
import matplotlib.pyplot as plt
import random


def show_sample_batch(sample_batch):
    """visualize the trajectory for a batch of samples"""
    inp, out = sample_batch
    batch_sz = inp.size(0)
    agent_sz = inp.size(1)
    
    fig, axs = plt.subplots(1,batch_sz, figsize=(15, 3), facecolor='w', edgecolor='k')
    fig.subplots_adjust(hspace = .5, wspace=.001)
    axs = axs.ravel()   
    for i in range(batch_sz):
        axs[i].xaxis.set_ticks([])
        axs[i].yaxis.set_ticks([])
        
        # first two feature dimensions are (x,y) positions
        axs[i].scatter(inp[i,:,0], inp[i,:,1])
        axs[i].scatter(out[i,:,0], out[i,:,1])

        
for i_batch, sample_batch in enumerate(train_loader):
    inp, out = sample_batch
    print(inp.shape, out.shape)
    break
    """
    TODO:
      implement your Deep learning model
      implement training routine
    """
    show_sample_batch(sample_batch)
    break

torch.Size([20, 50, 2]) torch.Size([20, 60, 2])
