In [None]:
# import libraries and packages
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import pickle
from glob import glob
import itertools
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from random import *
import pandas as pd
from sklearn.preprocessing import normalize
import matplotlib.pyplot as plt
import numpy as np
from google.colab import drive
import csv

In [None]:
# unzip the master dataset
from csv import reader
from collections import defaultdict

drive.mount('/content/gdrive')
!unzip /content/gdrive/MyDrive/train.zip

# Baseline Model
- According to the Exploratory Data Analysis, there is only one `Agent` per csv file with a total of 50 timestamps
- Our goal is the predict the latest 5 trajectories of the `Agent` using the previous 45 trajectories as training data
- The input features are the x and y values for the `Agent` 

## Data Loader
- Input features with size: batch size x 45 x 2
- Output features with size: batch size x 5 x 2

In [None]:
class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, data_path: str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.data_path = data_path
        self.transform = transform

        self.csv_list = glob(os.path.join(self.data_path, '*'))
        self.csv_list.sort()
        
    def __len__(self):
        return len(self.csv_list)

    def __getitem__(self, idx):
        csv_path = self.csv_list[idx]
        with open(csv_path, 'rb') as f:
            data = pd.read_csv(f)
            
        if self.transform:
            data = self.transform(data)

        return data

# intialize a dataset
train_data  = ArgoverseDataset(data_path="/content/train/data")
#test_data  = ArgoverseDataset(data_path="/content/new_val_in/new_val_in/")

In [None]:
batch_sz = 10
def my_collate(batch):
    # extract 50 timestamps for X values and Y values for each scene in batch
    xs = [scene.loc[scene['OBJECT_TYPE'] == 'AGENT', 'X'] for scene in batch]
    ys = [scene.loc[scene['OBJECT_TYPE'] == 'AGENT', 'Y'] for scene in batch]

    # split X and Y values so that training has 45 datapoints and testing has 5 data points
    input_xs = [scene[:45] for scene in xs]
    input_ys = [scene[:45] for scene in ys]
    output_xs = [scene[45:] for scene in xs]
    output_ys = [scene[45:] for scene in ys]

    # Input: batch_size x 2 x 45
    inp = [np.dstack((x,y)).reshape(2, 45) for(x,y) in zip(input_xs,input_ys)]
    # Output: batch size x 2 x 5
    out = [np.dstack((x,y)).reshape(2, 5) for(x,y) in zip(output_xs,output_ys)]
    
    # Convert np.array into pytorch tensor
    inp = torch.FloatTensor(inp)
    out = torch.FloatTensor(out)
    
    return [inp, out]

train_iter = DataLoader(train_data,batch_size=batch_sz, shuffle = False, collate_fn=my_collate, num_workers=0)
train_in, train_label = next(iter(train_iter))
train_in.size()