In [1]:
#
# Author: Yu Cao
#
#
#
# 2-layered neural network for autonomous car trajectory predictions

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy as np
import pickle
from glob import glob

import itertools
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import pandas as pd

In [3]:
import pickle

class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, data_path: str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.data_path = data_path
        self.transform = transform

        self.pkl_list = glob(os.path.join(self.data_path, '*'))
        self.pkl_list.sort()
        
    def __len__(self):
        return len(self.pkl_list)

    def __getitem__(self, idx):

        pkl_path = self.pkl_list[idx]
        with open(pkl_path, 'rb') as f:
            data = pickle.load(f)
            
        if self.transform:
            data = self.transform(data)

        return data


In [4]:
new_path = "./new_train/new_train"
val_dataset  = ArgoverseDataset(data_path=new_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
batch_sz = 300

In [6]:
def my_collate(batch):
    inp = []
    for scene in batch:
        a = []
        agent_id = scene['agent_id']
        agent_idx = np.where(scene['track_id'] == agent_id)[0][0]
        p_input = scene['p_in'][agent_idx]
        inp.append([item for sublist in p_input for item in sublist])
    out = []
    for scene in batch:
        agent_id = scene['agent_id']
        agent_idx = np.where(scene['track_id'] == agent_id)[0][0]
        p_out = scene['p_out'][agent_idx]
        out.append([item for sublist in p_out for item in sublist])
    
    inp = torch.FloatTensor(inp)
    # size: batch size x 38 (linear p_in)
    
    out = torch.FloatTensor(out)
    # size: batch size x 60 (linear p_out)
    
    return [inp, out]

val_loader = DataLoader(val_dataset,batch_size=batch_sz, shuffle = False, collate_fn=my_collate, num_workers=0)

In [None]:
train_in, train_label = next(iter(val_loader))
train_in.size()

In [7]:
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hid1 = torch.nn.Linear(n_feature, n_hidden)   # hidden layer
        self.hid2 = torch.nn.Linear(n_hidden, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hid1(x))
        x = self.hid2(x)             # linear output length 60
        return x

In [8]:
net = Net(n_feature=38, n_hidden=100, n_output=60).to(device)
# print(net)  # network architecture
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
loss_func = torch.nn.MSELoss() 

In [25]:
for epoch in range(2):  # loop over the dataset multiple time
    running_loss = 0.0
    for i, data in enumerate(val_loader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        #print(outputs)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 50 == 49:    # print every 200 mini-batches
            #print(outputs[0])
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 50))
            running_loss = 0.0

[1,    50] loss: 324.954
[1,   100] loss: 321.473
[1,   150] loss: 324.601
[1,   200] loss: 324.516
[1,   250] loss: 325.949
[1,   300] loss: 329.020
[1,   350] loss: 323.752
[1,   400] loss: 323.963
[1,   450] loss: 323.248
[1,   500] loss: 325.307
[1,   550] loss: 319.126
[1,   600] loss: 321.780
[1,   650] loss: 323.760
[2,    50] loss: 324.954
[2,   100] loss: 321.473
[2,   150] loss: 324.601
[2,   200] loss: 324.516
[2,   250] loss: 325.949
[2,   300] loss: 329.020
[2,   350] loss: 323.752
[2,   400] loss: 323.963
[2,   450] loss: 323.248
[2,   500] loss: 325.307
[2,   550] loss: 319.126
[2,   600] loss: 321.780
[2,   650] loss: 323.760


In [26]:
MODEL_PATH = 'model.pth' # save model
torch.save(net, MODEL_PATH)

In [27]:
net = torch.load(MODEL_PATH)

In [28]:
net

Net(
  (hid1): Linear(in_features=38, out_features=100, bias=True)
  (hid2): Linear(in_features=100, out_features=60, bias=True)
)

In [29]:
# get input tensors for test data
def test_collate(batch):
    inp = []
    for scene in batch:
        a = []
        agent_id = scene['agent_id']
        agent_idx = np.where(scene['track_id'] == agent_id)[0][0]
        p_input = scene['p_in'][agent_idx]
        inp.append([item for sublist in p_input for item in sublist])
    
    inp = torch.FloatTensor(inp)
    return inp

In [30]:
batch_sz

300

In [31]:
test_dataset  = ArgoverseDataset(data_path="./new_val_in/new_val_in")
test_loader = DataLoader(test_dataset,batch_size=batch_sz, shuffle = False, collate_fn=test_collate, num_workers=0)
test_in = next(iter(test_loader))
submission = pd.read_csv('sample_submission.csv')

In [36]:
predictions = []
for i, data in enumerate(test_loader, 0):
    data = data.to(device)
    pred = net(data)
    predictions.append(pred)

In [37]:
# convert output tensors to list of lists
predictions_lst = []
for batch in predictions:
    for scene in batch:
        predictions_lst.append(scene.tolist())
scene_id = submission['ID']
scene_id = scene_id.tolist()
for i in range(len(predictions_lst)):
    predictions_lst[i].insert(0, scene_id[i])

In [38]:
# save as csv
first_submission = pd.DataFrame(predictions_lst, columns = submission.columns)
first_submission.to_csv('fc_submission2.csv', index=False)

In [39]:
1+1

2

# code for report q3

In [56]:
sam = np.random.randint(0,687,2)

In [58]:
randpredictions = []
truth = []
for i, data in enumerate(val_loader, 0):
    if i not in sam:
        continue
    inputs, labels = data
    inputs = inputs.to(device)
    labels = labels.to(device)
    pred = net(inputs)
    randpredictions.append(pred)
    truth.append(labels)

In [61]:
# convert output tensors to list of lists
randpredictions_lst = []
for batch in randpredictions:
    for scene in batch:
        randpredictions_lst.append(scene.tolist())
scene_id = submission['ID']
scene_id = scene_id.tolist()
for i in range(len(randpredictions_lst)):
    randpredictions_lst[i].insert(0, scene_id[i])

In [62]:
# save as csv
rand_submission = pd.DataFrame(randpredictions_lst, columns = submission.columns)
rand_submission.to_csv('rand_submission.csv', index=False)

In [63]:
# convert output tensors to list of lists
truth_lst = []
for batch in truth:
    for scene in batch:
        truth_lst.append(scene.tolist())
scene_id = submission['ID']
scene_id = scene_id.tolist()
for i in range(len(truth_lst)):
    truth_lst[i].insert(0, scene_id[i])

In [65]:
# save as csv
truth_submission = pd.DataFrame(truth_lst, columns = submission.columns)
truth_submission.to_csv('truth_submission.csv', index=False)