## Deep Learining project


*   Gianfranco Di Marco - 1962292
*   Giacomo Colizzi Coin - 1794538


\
**- Trajectory Prediction -**

Is the problem of predicting the short-term (1-3 seconds) and long-term (3-5 seconds) spatial coordinates of various road-agents such as cars, buses, pedestrians, rickshaws, and animals, etc. These road-agents have different dynamic behaviors that may correspond to aggressive or conservative driving styles.

**- nuScenes Dataset -**

Available at. https://www.nuscenes.org/nuscenes. The nuScenes
dataset is a large-scale autonomous driving dataset. The dataset has 3D bounding boxes for 1000 scenes collected in Boston and Singapore. Each scene is 20 seconds long and annotated at 2Hz. This results in a total of 28130 samples for training, 6019 samples for validation and 6008 samples for testing. The dataset has the full autonomous vehicle data suite: 32-beam LiDAR, 6 cameras and radars with complete 360° coverage


> Holger Caesar and Varun Bankiti and Alex H. Lang and Sourabh Vora and Venice Erin Liong and Qiang Xu and Anush Krishnan and Yu Pan and Giancarlo Baldan and Oscar Beijbom: "*nuScenes: A multimodal dataset for autonomous driving*", arXiv preprint arXiv:1903.11027, 2019.

The most important part of this dataset for our project is the Map Expansion Pack, which simplify the trajectory prediction problem

## Requirements

**Environment**

In [1]:
# Necessary since Google Colab supports only Python 3.7
# -> some libraries can be different from local and Colab
try:
    import google.colab
    from google.colab import drive
    ENVIRONMENT = 'colab'
    %pip install tf-estimator-nightly==2.8.0.dev2021122109
    %pip install folium==0.2.1
except:
    ENVIRONMENT = 'local'

**Libraries**

In [2]:
%pip install nuscenes-devkit
%pip install pytorch-lightning

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
# Learning
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import resnet50, resnet34
from torchvision.transforms import Normalize
from torchmetrics import functional
from sklearn.cluster import KMeans
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint

# Math
import numpy as np

# Dataset
from nuscenes.nuscenes import NuScenes
from nuscenes.prediction import PredictHelper
from nuscenes.prediction.input_representation.static_layers import StaticLayerRasterizer
from nuscenes.prediction.input_representation.agents import AgentBoxesWithFadedHistory
from nuscenes.prediction.input_representation.interface import InputRepresentation
from nuscenes.prediction.input_representation.combinators import Rasterizer
from nuscenes.eval.prediction.config import PredictionConfig, load_prediction_config
from nuscenes.eval.prediction.splits import get_prediction_challenge_split
from nuscenes.eval.prediction import metrics, data_classes

# File system
import os
import shutil
import pickle
import zipfile
import tarfile
import urllib.request

# Logging
from torch.utils.tensorboard import SummaryWriter
from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
import warnings

# Generic
import time
from tqdm import tqdm
from typing import List, Dict, Tuple, Any
from collections import defaultdict
from abc import abstractmethod
import multiprocessing as mp
import matplotlib.pyplot as plt

## Configuration

**Generic Parameters**

In [4]:
# Environment-dependent parameters
if ENVIRONMENT == 'colab':
    ROOT = '/content/drive/MyDrive/DL/Trajectory-Prediction-PyTorch/'
    MAX_NUM_WORKERS = 0
    MAX_BATCH_SIZE = 8
    PROGRESS_BAR_REFRESH_RATE = 20
elif ENVIRONMENT == 'local':
    ROOT = os.getcwd()
    # TODO: solve problem with VRAM with PL
    if os.name == 'nt':
        MAX_NUM_WORKERS = 0
        MAX_BATCH_SIZE = 16
    else:
        MAX_NUM_WORKERS = 4
        MAX_BATCH_SIZE = 8
    PROGRESS_BAR_REFRESH_RATE = 10
else:
    raise ValueError("Wrong 'environment' value")

# Train parameters
BATCH_SIZE = MAX_BATCH_SIZE
NUM_WORKERS = MAX_NUM_WORKERS
LEARNING_RATE = 1e-4
MOMENTUM = 0.9
TRAIN_EPOCHES = 20 
PLOT_PERIOD = 1     # 1 = plot at each epoch
LOGDIR = os.path.join(ROOT, 'logdir')
CHECKPOINT_DIR = os.path.join(ROOT, 'checkpoints')
BEST_CHECKPOINT_DIR = os.path.join(CHECKPOINT_DIR, 'best')
CHECKPOINT_MONITOR = "val_loss"
TOP_K_SAVE = 10

# Test parameters
DEBUG_MODE = False

**Network Parameters**

In [5]:
PREDICTION_MODEL = 'P2T'
AGENT_HISTORY = 1
SHORT_TERM_HORIZON = 3
LONG_TERM_HORIZON = 6
TRAJ_HORIZON = SHORT_TERM_HORIZON
if PREDICTION_MODEL == 'CoverNet':
    # - Architecture parameters
    BACKBONE_WEIGHTS = 'ImageNet'
    BACKBONE_MODEL = 'ResNet18'
    K_SIZE = 20000
    # - Trajectory parameters
    TRAJ_LINK = 'https://www.nuscenes.org/public/nuscenes-prediction-challenge-trajectory-sets.zip'
    TRAJ_DIR = os.path.join(ROOT, 'trajectory_sets')
    EPSILON = 2
elif PREDICTION_MODEL == 'P2T':
    # - RL parameters
    INITIAL_STATE = [19, 12]
    POLICY_SAMPLES = 200
    MDP_HORIZON = 40
    # - Reward Model parameters
    TRAIN_RM_EPOCHES = 100 #(?) 
    REWARD_MODEL_LR = 0.0001
    RM_LOGDIR = os.path.join(LOGDIR, 'reward_model')
    # - Trajectory Generator parameters
    PRETRAIN_TG_EPOCHES = 100
    TRAIN_TG_EPOCHES = 400
    TRAJ_HIDDEN_SIZE = 32
    PLAN_HIDDEN_SIZE = 32
    ATT_HIDDEN_SIZE = 32
    POS_EMBEDDING_SIZE = 16
    SCENE_EMBEDDING_SIZE = 32
    AGENT_EMBEDDING_SIZE = 16
    SCENE_FEATURES_SIZE = 64
    AGENT_FEATURES_SIZE = 4
    DYN_FEATURES_SIZE: 3
    ACTIVATION_SLOPE: 0.1
    TRAJ_GEN_LR_PRE: 0.001
    TRAJ_GEN_LR: 0.0001
    TRAJ_CLUSTERS = 10
    MAX_CLIP_NORM = 10
    PRE_TG_LOGDIR = os.path.join(LOGDIR, 'traj_generator', 'pretrain')
    FT_TG_LOGDIR = os.path.join(LOGDIR, 'traj_generator', 'finetune')

**Dataset Parameters**

In [6]:
# Organization parameters
HELPER_NEEDED = False
PREPARE_DATASET = False
PREPROCESSED = True

# File system parameters
PL_SEED = 42
DATAROOT = os.path.join(ROOT, 'data', 'sets', 'nuscenes')
PREPROCESSED_FOLDER = 'preprocessed'
GT_SUFFIX = '-gt'
FILENAME_EXT = '.pt'
ADDITIONAL_EXT = '.npy'
DATASET_VERSION = 'v1.0-trainval'
AGGREGATORS = [{'name': "RowMean"}]

# Other parameters
MAX_PREDICTED_MODES = 25
SAMPLES_PER_SECOND = 2
NORMALIZATION = 'imagenet'
GRID_EXTENT = [-25, 25, -10, 40]

## Dataset

**Initialization**

N.B: The download links in function *urllib.request.urlretrieve()* should be replaced periodically because it expires. Steps to download correctly are (on Firefox):


1.   Dowload Map Expansion pack (or Trainval metadata) from the website
2.   Stop the download
3.   Right-click on the file -> copy download link
4.   Paste the copied link into the first argument of the urlretrieve function. The second argument is the final name of the file

In [7]:
# Drive initialization
if ENVIRONMENT == 'colab':
    drive.mount('/content/drive')

In [8]:
if PREPARE_DATASET:

    # Creating dataset dir
    os.makedirs(DATAROOT, exist_ok=True)
    os.chdir(DATAROOT)

    # Downloading Map Expansion Pack
    os.mkdir('maps')
    os.chdir('maps')
    print("Downloading and extracting Map Expansion pack ...")
    urllib.request.urlretrieve('https://s3.amazonaws.com/data.nuscenes.org/public/v1.0/nuScenes-map-expansion-v1.3.zip?AWSAccessKeyId=AKIA6RIK4RRMFUKM7AM2&Signature=AvzxB6d7CxtpCUYIUChItvDSA3Q%3D&Expires=1651141974', 'nuScenes-map-expansion-v1.3.zip')
    with zipfile.ZipFile('nuScenes-map-expansion-v1.3.zip', 'r') as zip_ref:
        zip_ref.extractall(os.getcwd())
    os.remove('nuScenes-map-expansion-v1.3.zip')

    # Downloading Trainval Metadata
    os.chdir('..')
    print("Downloading and extracting TrainVal metadata ...")
    urllib.request.urlretrieve('https://s3.amazonaws.com/data.nuscenes.org/public/v1.0/v1.0-trainval_meta.tgz?AWSAccessKeyId=AKIA6RIK4RRMFUKM7AM2&Signature=ZDr9UgOoV3UpYCI5RCY%2BNKiZVZ4%3D&Expires=1651142002', 'v1.0-trainval_meta.tgz')
    tar_ref = tarfile.open('v1.0-trainval_meta.tgz', 'r:gz')
    tar_ref.extractall(os.getcwd())
    tar_ref.close()
    os.remove('v1.0-trainval_meta.tgz')
    os.chdir(DATAROOT)

**Dataset definition**

In [9]:
class TrajPredDataset(torch.utils.data.Dataset):
    """ Trajectory Prediction Dataset

    Base Class for Trajectory Prediction Datasets
    """
    def __init__(self, dataset, name, data_type, preprocessed, split,
                 dataroot, preprocessed_folder, filename_ext, additional_ext,
                 gt_suffix, traj_horizon, max_traj_horizon, num_workers):
        """ Dataset Initialization

        Parameters
        ----------
        dataset: the instantiated dataset
        name: name of the dataset
        data_type: data type of the dataset elements
        preprocessed: True if data has already been preprocessed
        split: the dataset split ('train', 'train_val', 'val')
        dataroot: the root directory of the dataset
        preprocessed_folder: the folder containing preprocessed data
        filename_ext: the extension of the generated filenames
        additional_ext: the extenstion of the generated additional files
        gt_suffix: the suffix added after each GT filename (before ext)
        traj_horizon: horizon (in seconds) for the future trajectory
        max_traj_horizon: maximum trajectory horizon possible (in seconds)
        num_workers: num of processes that collect data
        """
        super(TrajPredDataset, self).__init__()
        self.dataset = dataset
        self.name = name
        self.data_type = data_type
        self.preprocessed = preprocessed
        self.split = split
        self.dataroot = dataroot
        self.preprocessed_folder = preprocessed_folder
        self.filename_ext = filename_ext
        self.additional_ext = additional_ext
        self.gt_suffix = gt_suffix
        self.traj_horizon = traj_horizon
        self.max_traj_horizon = max_traj_horizon
        self.num_workers = num_workers
        self.helper = None
        self.tokens = None
        self.static_layer_rasterizer = None
        self.agent_rasterizer = None
        self.input_representation = None

    def __len__(self):
        """ Return the size of the dataset """
        raise NotImplementedError

    def __getitem__(self, idx):
        """ Return an element of the dataset """
        raise NotImplementedError

    @abstractmethod
    def generate_data(self):
        """ Data generation

        If self.preprocessed, directly collect data.
        Otherwise, generate data without preprocess it.
        """
        raise NotImplementedError

    @abstractmethod
    def get_raster(self, token):
        """ Convert a token split into a raster

        Parameters
        ----------
        token: token containing instance token and sample token

        Return
        ------
        raster: the raster image
        """
        raise NotImplementedError

class TrajPredictionDataModule(pl.LightningDataModule):
    """ PyTorch Lightning Data Module for the Trajectory Prediction Problem """
    def __init__(self, train_dataset, val_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS):
        """ Data Module initialization

        Parameters
        ----------
        train_dataset: instance of the train dataset class
        nuscenes_val: instance of the validation dataset class 
        batch_size: number of samples to extract from the dataset at each step
        num_workers: number of cores implied in data collection
        """
        super(TrajPredictionDataModule, self).__init__()
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset
        self.batch_size = batch_size
        self.num_workers = num_workers
    
    @abstractmethod
    def setup(self, stage=None):
        """ Setup the data module """
        raise NotImplementedError

    @abstractmethod
    def train_dataloader(self):
        """ Dataloader for the training part """
        raise NotImplementedError

    @abstractmethod
    def val_dataloader(self):
        """ Dataloader for the validation part """
        raise NotImplementedError

    @abstractmethod
    def test_dataloader(self):
        """ Dataloader for the testing part """
        raise NotImplementedError

class nuScenesDataset(TrajPredDataset):
    """ nuScenes Dataset for Trajectory Prediction challenge """
    def __init__(self, helper, data_type='raster', preprocessed=False,
                 split='train', include_static=False, include_add_data=False,
                 dataroot=DATAROOT, preprocessed_folder=PREPROCESSED_FOLDER,
                 filename_ext=FILENAME_EXT, additional_ext=ADDITIONAL_EXT,
                 gt_suffix=GT_SUFFIX, traj_horizon=TRAJ_HORIZON, 
                 max_traj_horizon=LONG_TERM_HORIZON, samples_per_second=SAMPLES_PER_SECOND,
                 agent_history=AGENT_HISTORY, normalization=NORMALIZATION, 
                 grid_extent=GRID_EXTENT, num_workers=NUM_WORKERS):
        """ nuScenes Dataset Initialization

        Parameters
        ----------
        helper: the helper of the instantiated nuScenes dataset (None if not needed)
        data_type: data type of the dataset elements
        preprocessed: True if data has already been preprocessed
        split: the dataset split ('train', 'train_val', 'val')
        include_static: if to return also static rasters in __getitem__
        include_add_data: if to return also additional data in __getitem__
        dataroot: the root directory of the dataset
        preprocessed_folder: the folder containing preprocessed data
        filename_ext: the extension of the generated filenames
        additional_ext: the extenstion of the generated additional files
        gt_suffix: the suffix added after each GT filename (before ext)
        traj_horizon: horizon (in seconds) for the future trajectory
        max_traj_horizon: maximum trajectory horizon possible (in seconds)
        samples_per_second: sampling frequency (in Hertz)
        agent_history: the seconds of considered agent history
        normalization: which kind of normalization to apply to input
        grid_extent: extension of the grid for eventual map discretization
        num_workers: num of processes that collect data
        """
        # General initialization
        super(nuScenesDataset, self).__init__(
            None, 'nuScenes', data_type, preprocessed, split, dataroot, preprocessed_folder, 
            filename_ext, additional_ext, gt_suffix, traj_horizon, max_traj_horizon, num_workers)
        self.include_static = include_static
        self.include_add_data = include_add_data
        self.agent_history = agent_history
        self.grid_extent = grid_extent
        self.helper = helper
        self.tokens = get_prediction_challenge_split(
            split, dataroot=dataroot)
        self.samples_per_second = samples_per_second
        if data_type == 'raster':
            if helper is not None:
                self.static_layer_rasterizer = StaticLayerRasterizer(self.helper)
                self.agent_rasterizer = AgentBoxesWithFadedHistory(
                    self.helper, seconds_of_history=1)
                self.input_representation = InputRepresentation(
                    self.static_layer_rasterizer, self.agent_rasterizer, Rasterizer())
            else:
                self.static_layer_rasterizer = None
                self.agent_rasterizer = None
                self.input_representation = None
        else:   # NOTE: possible also other type of input data
            pass
        if not self.preprocessed:
            print("Preprocessing data ...")
            self.generate_data()

        # Normalization function
        if normalization == 'imagenet':
            self.normalization = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        else:
            raise ValueError("Available only 'imagenet' normalization")
            
    def __len__(self) -> int:
        """ Return the size of the dataset """
        return len(self.tokens)

    def __getitem__(self, idx) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, int]:
        """ Return an element of the dataset 
        
        Parameters
        ----------
        idx: index of the element

        Return
        ------
        agent_state_vector: vector [velocity, acceleration, yaw rate] of the target agent
        raster_img_static: raster map of the scene, with only static element (empty if not self.include_static)
        raster_img_dynamic: raster map of the scene, complete with dynamic elements
        gt_trajectory: ground truth of the agent (agent future)
        idx: index of the element
        """
        # Select subfolder
        if idx < 0:
            idx = len(self) + idx
        subfolder = f'batch_{idx//128}'

        # Load files
        complete_tensor = torch.load(
            os.path.join(self.dataroot, self.preprocessed_folder, self.split,
                         subfolder, self.tokens[idx] + self.filename_ext))
        gt_trajectory = torch.load(
            os.path.join(self.dataroot, self.preprocessed_folder, self.split, subfolder,
                         self.tokens[idx] + self.gt_suffix + self.filename_ext))

        # Separate state and rasters
        agent_state_vector, raster_img = self.tensor_io_conversion(
            "read", None, None, complete_tensor)

        # Include static data
        if not self.include_static:
            raster_img_dynamic = raster_img
            raster_img_static = torch.empty((0,))
        else:
            # TODO: adapt also to non-square images
            raster_img_static, raster_img_dynamic = \
                raster_img.split(raster_img.shape[-1], dim=1)
        
        # Normalization
        raster_img_static = self.normalization(raster_img_static)
        raster_img_dynamic = self.normalization(raster_img_dynamic)

        return agent_state_vector, raster_img_static, raster_img_dynamic, gt_trajectory, idx

    def generate_data(self):
        """ Data generation

        If self.preprocessed, directly collect data.
        Otherwise, generate data without preprocess it.
        """
        # Generate directories if don't exist
        preprocessed_dir = os.path.join(self.dataroot, self.preprocessed_folder)
        split_dir = os.path.join(preprocessed_dir, self.split)
        if self.preprocessed_folder not in os.listdir(self.dataroot):
            os.mkdir(preprocessed_dir)
        if self.split not in os.listdir(preprocessed_dir):
            os.mkdir(split_dir)

        # Variable useful to restore interrupted preprocessing
        preprocessed_batches = os.listdir(split_dir)
        already_preproc = \
            len([f for f in preprocessed_batches
                 if os.path.isfile(os.path.join(split_dir, f))])

        # Create subfolders
        if len(preprocessed_batches) == 0:
            n_subfolders = len(self.tokens) // 128 + int(len(self.tokens) % 128 != 0)
            for i in range(n_subfolders):
                subfolder = 'batch_' + str(i)
                os.mkdir(os.path.join(split_dir, subfolder))

        # Generate data
        if self.data_type == 'raster':
            for i, t in enumerate(tqdm(self.tokens)):
                subfolder = f'batch_{i//128}'
                if i >= int(already_preproc/2):
                    self.generate_raster_data(t, split_dir, subfolder)
        else:
            pass

    def generate_raster_data(self, token, batches_dir, subfolder):
        """ Generate raster and agent data from a dataset token

        The generated input data consists in a tensor like this:
            [raster map | agent state vector]
        The generated ground truth data is the future agent trajectory tensor

        Parameters
        ----------
        token: token containing instance token and sample token
        batches_dir: the directory in which the batches will be generated
        subfolder: the data is divided into subfolders in order to avoid Drive timeouts;
            this parameter tells which is the actual subfolder towhere place data
        """
        # Generate dynamic raster image, state and GT
        instance_token, sample_token = token.split("_")
        raster_img = self.input_representation.make_input_representation(
            instance_token, sample_token)
        raster_tensor = torch.Tensor(raster_img).permute(2, 0, 1) / 255.
        agent_state_vector = torch.Tensor(
            [[self.helper.get_velocity_for_agent(instance_token, sample_token),
              self.helper.get_acceleration_for_agent(instance_token, sample_token),
              self.helper.get_heading_change_rate_for_agent(instance_token, sample_token)]])
        gt_trajectory = torch.Tensor(
            self.helper.get_future_for_agent(instance_token, sample_token,
                                             seconds=self.max_traj_horizon, in_agent_frame=True))

        # Generate additional data
        if self.include_add_data:
            future_indefinite = torch.Tensor(
                self.helper.get_future_for_agent(instance_token, sample_token,
                                                 seconds=300, in_agent_frame=True))
            xy_past = torch.Tensor(
                self.helper.get_past_for_agent(instance_token, sample_token,
                                               seconds=self.agent_history, in_agent_frame=True))
            complete_past = torch.Tensor(
                self.helper.get_past_for_agent(instance_token, sample_token,
                                               seconds=self.agent_history, in_agent_frame=True,
                                               just_xy=False))
            annotations = self.helper.get_annotations_for_sample(sample_token)
            sample_annotation = self.helper.get_sample_annotation(instance_token, sample_token)
            additional_dict = {
                'future_indefinite': future_indefinite,
                'xy_past': xy_past,
                'complete_past': complete_past,
                'annotations': annotations,
                'sample_annotation': sample_annotation
            }
            np.save(os.path.join(batches_dir, subfolder, token + 'add' + self.additional_ext), additional_dict)

        # Handle incomplete GT and nan values
        while gt_trajectory.shape[0] < self.samples_per_second * self.max_traj_horizon:
            gt_trajectory = torch.concat((gt_trajectory, gt_trajectory[-1].unsqueeze(0)))
        gt_trajectory = gt_trajectory[:(self.samples_per_second * self.traj_horizon)]
        nan_mask = agent_state_vector != agent_state_vector
        if nan_mask.any():
            agent_state_vector[nan_mask] = 0

        # Generate static raster image
        if self.include_static:     
            raster_img_static = \
                self.static_layer_rasterizer.make_representation(instance_token, sample_token)
            raster_tensor_static = torch.Tensor(raster_img_static).permute(2, 0, 1) / 255.
            raster_tensor = torch.cat([raster_tensor_static, raster_tensor], dim=1)

        # Concatenate and save to disk
        raster_agent_tensor, _ = \
            self.tensor_io_conversion('write', raster_tensor, agent_state_vector)
        torch.save(raster_agent_tensor, os.path.join(
            batches_dir, subfolder, token + self.filename_ext))
        torch.save(gt_trajectory, os.path.join(
            batches_dir, subfolder, token + self.gt_suffix + self.filename_ext))
 
    @staticmethod
    def tensor_io_conversion(mode, big_t=None, small_t=None, complete_t=None) -> Tuple[torch.Tensor, torch.Tensor]:
        """ Utility IO function to concatenate tensors of different shape

        Normally used to concatenate (or separate) raster map and agent state vector in order to speed up IO

        Parameters
        ----------
        mode: 'write' (concatenate) or 'read' (separate)
        big_t: the bigger tensor (None if we are going to separate tensors)
        small_t: the smaller tensor (None if we are going to separate tensors)
        complete_t: the concatenated tensor (None if we are going to concatenate tensors)

        Return
        ------
        out1: small tensor (mode == 'read') or complete tensor (mode == 'write')
        out2: big tensor (mode == 'read') or empty tensor (mode == 'write') 
        """
        out1, out2 = None, None
        if mode == 'write':    # concatenate
            if big_t is None or small_t is None:
                raise ValueError("Wrong argument: 'big_t' and 'small_t' cannot be None")
            small_t = small_t.permute(1, 0).unsqueeze(2)
            small_t = small_t.expand(-1, -1, big_t.shape[-1])
            out1 = torch.cat((big_t, small_t), dim=1)
            out2 = torch.empty(small_t.shape)
        elif mode == 'read':    # separate
            if complete_t is None:
                raise ValueError("Wrong argument: 'complete_t' cannot be None")
            out1 = complete_t[..., -1, -1].unsqueeze(0)
            out2 = complete_t[..., :-1, :]
        else:
            raise ValueError(
                "Wrong argument 'mode'; available 'read' or 'write'")
        return out1, out2

class nuScenesDataModule(TrajPredictionDataModule):
    """ PyTorch Lightning Data Module for the nuScenes dataset """
    def __init__(self, nuscenes_train, nuscenes_val, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS):
        """ Data Module initialization

        Parameters
        ----------
        nuscenes_train: instance of the nuScenesDataset class (split='train')
        nuscenes_val: instance of the nuScenesDataset class (split='val')
        batch_size: number of samples to extract from the dataset at each step
        num_workers: number of cores implied in data collection
        """
        super(nuScenesDataModule, self).__init__(
            nuscenes_train, nuscenes_val, batch_size, num_workers)

    def setup(self, stage=None):
        """ Setup the data module """
        if stage == "fit" or stage is None:
            self.nusc_train = self.train_dataset
            self.nusc_val = self.val_dataset

        if stage == "test" or stage is None:
            self.nusc_test = self.val_dataset

    def train_dataloader(self):
        """ Dataloader for the training part """
        return torch.utils.data.DataLoader(self.nusc_train, self.batch_size, shuffle=True,
                                           num_workers=self.num_workers, drop_last=True)

    def val_dataloader(self):
        """ Dataloader for the validation part """
        return torch.utils.data.DataLoader(self.nusc_val, self.batch_size, shuffle=False, 
                                           num_workers=self.num_workers, drop_last=True)

    def test_dataloader(self):
        """ Dataloader for the testing part """
        return torch.utils.data.DataLoader(self.nusc_test, self.batch_size, shuffle=False,
                                           num_workers=self.num_workers, drop_last=True)

## Baselines

**Covernet**

In [None]:
class CoverNet(pl.LightningModule):
    """ CoverNet model for Trajectory Prediction """
    def __init__(self, K_size, epsilon, traj_link, traj_dir, device, 
                 lr=LEARNING_RATE, momentum=MOMENTUM,
                 traj_samples=SAMPLES_PER_SECOND*TRAJ_HORIZON):
        """ CoverNet initialization

        Parameters
        ----------
        K_size: number of modes (trajectories) (needed ?)
        epsilon: value (in meters) relative to the space coverage
        traj_link: link from which to download the trajectories
        device: target device of the model (e.g. 'cuda:0')
        lr: learning rate of the optimizer
        momentum: momentum of the optimizer
        traj_samples: number of samples to consider in the trajectory
        """
        super().__init__()
        self.K_size = K_size
        self.convModel = resnet50(pretrained=True)
        self.activation = {}
        def get_activation(name):
            def hook(model, input, output):
                self.activation[name] = output
            return hook
        self.convModel.layer4.register_forward_hook(get_activation('layer4'))
        self.trajectories = prepare_trajectories(epsilon, traj_link, traj_dir)
        self.fc1 = nn.Linear(2051, 4096)
        self.fc2 = nn.Linear(4096, self.trajectories.size()[0])
        self.traj_samples = traj_samples
        self.tgt_device = device
        self.momentum = momentum
        self.lr = lr

    def forward(self, x) -> torch.Tensor:
        """ Network inference """
        img, state = x
        self.convModel(img)
        resnet_output = torch.flatten(self.convModel.avgpool(self.activation['layer4']),start_dim=1)
        x = torch.cat([resnet_output, state], 1)
        x = self.fc1(x)
        x = self.fc2(x)
        return x

    def training_step(self, batch, batch_idx):
        """ Training step of the model

        Parameters
        ----------
        batch: batch of data
        batch_idx: index of the actual batch (from 0 to len(dataset))
        """
        # Collect data
        x_state, _, x_img, gt, _ = batch
        x_state = torch.flatten(x_state, 0, 1)
        reduced_traj = self.trajectories[:, :self.traj_samples]
        # Prepare positive samples
        with torch.no_grad():
            y = get_positives(reduced_traj, gt.to('cpu'))
            y = y.to(self.tgt_device)
        # Inference
        y_hat = self((x_img, x_state))
        loss = F.cross_entropy(y_hat, y)
        # Log
        self.log('train_loss', loss.item(), on_step=True)
            
        return loss

    def validation_step(self, batch, batch_idx):
        """ Validation step of the model

        Parameters
        ----------
        batch: batch of data
        batch_idx: index of the actual batch (from 0 to len(dataset))
        """
        with torch.no_grad():
            # Collect data
            x_state, _, x_img, gt, _ = batch
            x_state = torch.flatten(x_state, 0, 1)
            reduced_traj = self.trajectories[:, :self.traj_samples]
            # Prepare positive samples
            y = get_positives(reduced_traj, gt.to('cpu'))
            y = y.to(self.tgt_device)
            # Inference
            y_hat = self((x_img, x_state))
            loss = F.cross_entropy(y_hat, y)
        # Log
        self.log('val_loss', loss.item(), on_epoch=True)
        
        return loss

    def configure_optimizers(self):
        """ Set the optimizer for the model """
        # TODO: find best optimizer and parameters
        #return torch.optim.Adam(self.parameters(), lr=self.lr)
        return torch.optim.SGD(self.parameters(), lr=self.lr, momentum=self.momentum)

# TODO: check if generated trajectory are expressed in the same frame of the agent
def get_positives(trajectories, ground_truth) -> torch.Tensor:
    """ Get positive samples wrt the actual GT

    Parameters
    ----------
    trajectories: the pre-generated set of trajectories
    ground_truth: the future trajectory for the agent

    Return
    ------
    positive_traj: as defined in the original CoverNet paper, 
        'positive samples determined by the element in the trajectory set
        closest to the actual ground truth in minimum average 
        of point-wise Euclidean distances'
    """
    euclidean_dist = torch.stack([torch.pow(torch.sub(trajectories, gt), 2) 
                                  for gt in ground_truth]).sum(dim=3).sqrt() 
    mean_euclidean_dist = euclidean_dist.mean(dim=2)
    positive_traj = mean_euclidean_dist.argmin(dim=1)
    return positive_traj

def prepare_trajectories(epsilon, download_link, directory) -> torch.Tensor:
    """ Function to download and extract trajectory sets for CoverNet 

    Parameters
    ----------
    epsilon: value (in meters) relative to the space coverage
    download_link: link from which to download trajectory sets
    directory: directory where to download trajectory sets

    Return
    ------
    trajectories: tensor of the trajectory set for the specified epsilon
    """
    # 1. Download and extract trajectories
    filename_zip = 'nuscenes-prediction-challenge-trajectory-sets.zip'
    filename = filename_zip[:-4]
    filename_dir = os.path.join(directory, filename)
    filename_zipdir = os.path.join(directory, filename_zip)
    if (not os.path.isdir(filename_dir) 
        or any(e not in os.listdir(filename_dir)
               for e in ['epsilon_2.pkl', 'epsilon_4.pkl', 'epsilon_8.pkl'])):
        print("Downloading trajectories ...")
        os.makedirs(directory, exist_ok=True)
        urllib.request.urlretrieve(download_link, filename_zipdir)
        with zipfile.ZipFile(filename_zipdir, 'r') as archive:
            archive.extractall(directory)
        os.remove(filename_zipdir)

    # 2. Generate trajectories
    traj_set_path = os.path.join(filename_dir, 'epsilon_' + str(epsilon) + '.pkl')
    trajectories = pickle.load(open(traj_set_path, 'rb'))
    return torch.Tensor(trajectories)

**P2T**

In [None]:
class P2T(nn.Module):
    """ P2T Model for trajectory prediction """
    def __init__(self, mdp, mdp_horizon=MDP_HORIZON, initial_state=INITIAL_STATE, 
                 policy_samples=POLICY_SAMPLES, traj_clusters=TRAJ_CLUSTERS,
                 from_checkpoint=False, pretrain_traj_gen=True, 
                 train_rm_epoches=TRAIN_RM_EPOCHES, pretrain_tg_epoches=PRETRAIN_TG_EPOCHES, 
                 train_tg_epoches=TRAIN_TG_EPOCHES, reward_model_lr=REWARD_MODEL_LR, 
                 traj_gen_lr_pre=TRAJ_GEN_LR_PRE, traj_gen_lr=TRAJ_GEN_LR, rm_logdir=RM_LOGDIR, 
                 pre_tg_logdir=PRE_TG_LOGDIR, ft_tg_logdir=FT_TG_LOGDIR, max_norm=MAX_CLIP_NORM):
        """ P2T Model initialization
        
        Parameters
        ----------
        mdp: Markov Decision Process class instance
        mdp_horizon: horizon (in seconds) of the MDP
        initial_state: initial state of the agent
        policy_samples: how many samples to extract from the policy
        traj_clusters: num of trajectory clusters
        from_checkpoint: True if the model is instantiated from checkpoints
        pretrain_traj_gen: True if the Trajectory Generator should be pretrained
        train_rm_epoches: # epoches to train the Reward Model
        pretrain_tg_epoches: # epoches to pre-train the Trajectory Generator
        train_tg_epoches: # epoches to train (fine-tune) the Trajectory Generator
        reward_model_lr: learning rate of the Reward Model
        traj_gen_lr_pre: learning rate of the Trajectory Generator (pre-train)
        traj_gen_lr: learning rate of the Trajectory Generator (fine-tune)
        max_norm: maximum norm to clip gradients
        """
        self.mdp = mdp
        self.mdp_horizon = mdp_horizon
        self.initial_state = initial_state
        self.policy_samples = policy_samples
        self.traj_clusters = traj_clusters
        self.rm_logger = SummaryWriter(rm_logdir)
        self.pre_tg_logger = SummaryWriter(pre_tg_logdir)
        self.ft_tg_logger = SummaryWriter(ft_tg_logdir)
        self.pretrain_traj_gen = pretrain_traj_gen
        self.pretrain_tg_epoches = pretrain_tg_epoches
        self.train_rm_epoches = train_rm_epoches
        self.train_tg_epoches = train_tg_epoches
        self.reward_model_lr = reward_model_lr
        self.traj_gen_lr_pre = traj_gen_lr_pre
        self.traj_gen_lr = traj_gen_lr
        self.max_norm = max_norm
        if not from_checkpoint:
            self.reward_model = RewardModel()
            self.traj_generator = Trajectory_Generator(pretrain_traj_gen)
        else:
            # TODO: handle checkpoints loading
            pass

    def train(self, trainval_dm):
        """ P2T function for training all components 
        
        Parameters
        ----------
        trainval_dm: PyTorch Lightning datamodule containing trainval data
        """
        print("Starting P2T training")
        start = time.time()
        self.train_reward_model(trainval_dm)
        self.train_traj_generator(trainval_dm)
        print("P2T Model trained in %f s" % (time.time() - start))

    def train_reward_model(self, trainval_dm):
        """ Reward Model training 
        
        Parameters
        ----------
        trainval_dm: PyTorch Lightning datamodule containing trainval data
        """
        print("Starting P2T Reward Model training")
        start = time.time()
        print("Reward Model trained in %f s" % (time.time() - start))

    @ignore_warnings(category=ConvergenceWarning)
    def train_traj_generator(self, trainval_dm):
        """ Trajectory Generator training
        
        Parameters
        ----------
        trainval_dm: PyTorch Lightning datamodule containing trainval data
        """
        # Training initialization
        print("Starting P2T Trajectory Generator training")
        start = time.time()
        self.reward_model.eval()
        loss = TrajGenLoss('train')
        optimizer = torch.optim.Adam(
            self.traj_generator.parameters(), lr=self.traj_gen_lr)
        train_dataloader = trainval_dm.train_dataloader()
        val_dataloader = trainval_dm.val_dataloader()

        # Trajectory Generator pre-training
        if self.pretrain_traj_gen:
            self.pretrain_traj_generator(self, trainval_dm)

        # Training Loop
        for e in range(self.train_tg_epoches):
            print("-------- [TG] Epoch %d --------" % e)

            # Train
            self.traj_generator.train()
            for b, data in enumerate(train_dataloader):
                
                # Data preparation
                (x_img_static, gt, past, motion_feats, _, agents, _) = \
                    self.traj_data_preparation(data, trainval_dm)               

                # Reward Model inference
                rew_path, rew_goal, img_feats = self.reward_model(motion_feats, x_img_static)

                # MaxEntropy Reinforcement Learning
                grid_extent = trainval_dm.train_dataset.grid_extent
                svf, policy = max_entropy_rl(self.mdp, self.initial_state, 
                                             rew_path.detach(), rew_goal.detach())
                plan, scene_feats, agent_feats = \
                    sample_policy(self.mdp, self.initial_state, policy, 
                                  self.policy_samples, grid_extent, img_feats, agents)

                # Trajectory Prediction
                sc_feat_size = self.traj_generator.scene_feat_size
                ag_feat_size = self.traj_generator.agent_feat_size
                plan = plan.reshape(-1, self.mdp.horizon, 2).permute(1, 0, 2).to(device)
                scene_feats = scene_feats.reshape(
                    -1, self.mdp.horizon, sc_feat_size).permute(1, 0, 2).to(device)
                agent_feats = agent_feats.reshape(
                    -1, self.mdp.horizon, ag_feat_size).permute(1, 0, 2).to(device)
                past = past.unsqueeze(2).repeat(1, 1, self.policy_samples, 1)
                past = past.reshape(past.shape[0], -1, past.shape[3])
                traj_pred = self.traj_generator(past, plan, scene_feats, agent_feats)

                # K-Means trajectory clustering
                # NOTE: problems with multiprocessing in Windows. Verify on Ubuntu
                # TODO: try to avoid loops in final clustering
                traj_pred = traj_pred.reshape(
                    -1, self.num_samples, traj_pred.shape[1], traj_pred.shape[2])
                traj_flat = traj_pred.flatten(-2).detach().cpu().numpy()
                clust_ids = [self.kmeans_cluster(t, self.traj_clusters) for t in traj_flat]                
                traj_clust = torch.empty(traj_pred.shape[0], self.traj_clusters, 
                                         traj_pred.shape[2], traj_pred.shape[3])
                for n in range(traj_pred.shape[0]):
                    centroids = torch.empty(
                            self.traj_clusters, traj_pred.shape[2], traj_pred.shape[3])
                    for cl in list(range(self.traj_clusters)):
                        centroids[cl] = traj_pred[cl, np.where(clust_ids == cl)[0]].mean(dim=0)
                    traj_clust[n] = centroids
                traj_clust = traj_clust.to(device)
                
                # Learning
                batch_loss = loss(traj_clust, gt)
                optimizer.zero_grad()
                batch_loss.backward()
                a = torch.nn.utils.clip_grad_norm_(
                    self.traj_generator.parameters(), self.max_norm)
                optimizer.step()

                # Logging
                loss_val = batch_loss.item()
                iterations = e*len(train_dataloader) + b
                self.ft_tg_logger.add_scalar('train loss', loss_val, iterations)
                print("[TG %d] %d - train loss = %f" % (e, b, loss_val)) 

            # Val
            val_losses = []
            self.traj_generator.eval()
            for b, data in enumerate(val_dataloader):

                # Data preparation
                (x_img_static, gt, past, motion_feats, _, agents, _) = \
                    self.traj_data_preparation(data, trainval_dm)               

                # Reward Model inference
                rew_path, rew_goal, img_feats = self.reward_model(motion_feats, x_img_static)

                # MaxEntropy Reinforcement Learning
                grid_extent = trainval_dm.train_dataset.grid_extent
                svf, policy = max_entropy_rl(self.mdp, self.initial_state, 
                                             rew_path.detach(), rew_goal.detach())
                plan, scene_feats, agent_feats = \
                    sample_policy(self.mdp, self.initial_state, policy, 
                                  self.policy_samples, grid_extent, img_feats, agents)

                # Trajectory Prediction
                sc_feat_size = self.traj_generator.scene_feat_size
                ag_feat_size = self.traj_generator.agent_feat_size
                plan = plan.reshape(-1, self.mdp.horizon, 2).permute(1, 0, 2).to(device)
                scene_feats = scene_feats.reshape(
                    -1, self.mdp.horizon, sc_feat_size).permute(1, 0, 2).to(device)
                agent_feats = agent_feats.reshape(
                    -1, self.mdp.horizon, ag_feat_size).permute(1, 0, 2).to(device)
                past = past.unsqueeze(2).repeat(1, 1, self.policy_samples, 1)
                past = past.reshape(past.shape[0], -1, past.shape[3])
                traj_pred = self.traj_generator(past, plan, scene_feats, agent_feats)

                # K-Means trajectory clustering
                # NOTE: problems with multiprocessing in Windows. Verify on Ubuntu
                # TODO: try to avoid loops in final clustering
                traj_pred = traj_pred.reshape(
                    -1, self.num_samples, traj_pred.shape[1], traj_pred.shape[2])
                traj_flat = traj_pred.flatten(-2).detach().cpu().numpy()
                clust_ids = [self.kmeans_cluster(t, self.traj_clusters) for t in traj_flat]                
                traj_clust = torch.empty(traj_pred.shape[0], self.traj_clusters, 
                                         traj_pred.shape[2], traj_pred.shape[3])
                for n in range(traj_pred.shape[0]):
                    centroids = torch.empty(
                            self.traj_clusters, traj_pred.shape[2], traj_pred.shape[3])
                    for cl in list(range(self.traj_clusters)):
                        centroids[cl] = traj_pred[cl, np.where(clust_ids == cl)[0]].mean(dim=0)
                    traj_clust[n] = centroids
                traj_clust = traj_clust.to(device)

                # Logging
                loss_val = loss(traj_clust, gt).item()
                val_losses.append(loss_val)
                print("[TG %d] %d - val loss = %f" % (e, b, loss_val))  

            # Validation logging
            self.ft_tg_logger.add_scalar('val loss', np.array(val_losses).mean(), e)                     
        
        self.ft_tg_logger.close()
        print("Trajectory Generator trained in %f s" % (time.time() - start))

    def pretrain_traj_generator(self, trainval_dm):
        """ Trajectory Generator pre-training
        
        Parameters
        ----------
        trainval_dm: PyTorch Lightning datamodule containing trainval data
        """
        # Training initialization
        print("Starting P2T Trajectory Generator pre-training")
        start = time.time()
        loss = TrajGenLoss('pretrain')
        optimizer = torch.optim.Adam(
            self.traj_generator.parameters(), lr=self.traj_gen_lr_pre)
        train_dataloader = trainval_dm.train_dataloader()
        val_dataloader = trainval_dm.val_dataloader()

        # Training loop
        for e in range(self.pretrain_tg_epoches):
            print("-------- [TG (Pre)] Epoch %d --------" % e)

            # Train
            self.traj_generator.train()
            for b, data in enumerate(train_dataloader):

                # Data preparation
                (x_img_static, gt, past, motion_feats, plan_e, agents, grid_idcs) = \
                    self.traj_data_preparation(data, trainval_dm)
                grid_idcs.to(device)
                plan_e.to(device)

                # Reward Model inference
                _, _, img_feats = self.reward_model(x_img_static, motion_feats)

                # Trajectory Prediction
                scene_feats, agent_feats = \
                    extract_plan_features(grid_idcs, img_feats, agents)
                traj_pred = self.traj_generator(past, plan_e, scene_feats, agent_feats)

                # Learning
                batch_loss = loss(traj_pred, gt)
                optimizer.zero_grad()
                batch_loss.backward()
                a = torch.nn.utils.clip_grad_norm_(
                    self.traj_generator.parameters(), self.max_norm)
                optimizer.step()    

                # Logging
                loss_val = batch_loss.item()
                iterations = e*len(train_dataloader) + b
                self.pre_tg_logger.add_scalar('train loss', loss_val, iterations)
                print("[TG (pre) %d] %d - train loss = %f" % (e, b, loss_val)) 

            # Val
            val_losses = []
            self.traj_generator.eval()
            for b, data in enumerate(val_dataloader):

                with torch.no_grad():

                    # Data preparation
                    (x_img_static, gt, past, motion_feats, plan_e, agents, grid_idcs) = \
                        self.traj_data_preparation(data, trainval_dm)
                    grid_idcs.to(device)
                    plan_e.to(device)

                    # Reward Model inference
                    _, _, img_feats = self.reward_model(x_img_static, motion_feats)

                    # Trajectory Prediction
                    scene_feats, agent_feats = \
                        extract_plan_features(grid_idcs, img_feats, agents)
                    traj_pred = self.traj_generator(past, plan_e, scene_feats, agent_feats)

                    # Logging
                    loss_val = loss(traj_pred, gt).item()
                    val_losses.append(loss_val)
                    print("[TG (pre) %d] %d - val loss = %f" % (e, b, loss_val))                    

            # Validation logging
            self.pre_tg_logger.add_scalar('val loss', np.array(val_losses).mean(), e)

        self.pre_tg_logger.close()
        print("Trajectory Generator pre-trained in %f s" % (time.time() - start))

    def traj_data_preparation(self, data, datamodule):
        """ Prepare data for Trajectory Generator training. Useful to avoid repeated code 
        
        Parameters
        ----------
        data: batch of data containing state, scene, future and index
        datamodule: PL DataModule contatining trainval data

        Return
        ------
        gt: future of the agent (ground truth)
        past: history of the agent 
        motion_feats: motion and position features for the reward model
        plan_e: coords of the grid cells relative to the SVF (expert)
        agents: tensor with states of other agents in the scene
        grid_idcs: grid coords of the SVF (expert)
        """
        _, x_img_static, _, gt, _ = data
        past, _, motion_feats, plan_e, agents, grid_idcs = \
            extract_expert_data(datamodule.train_dataset, data)
        past = past.to(device)
        motion_feats = motion_feats.to(device)
        agents = agents.to(device)
        x_img_static = x_img.to(device)
        return (x_img_static, gt, past, motion_feats, 
                plan_e, agents, grid_idcs)

    def forward(self, x):
        """ P2T inference """
        pass

    @staticmethod
    def kmeans_cluster(data, n_clusters):
        cluster_data = KMeans(n_clusters, n_init=1, max_iter=100).fit(data)
        return cluster_data.labels_

In [None]:
# TODO: Reinforcement Learning part

class MDP:
    def __init__(self):
        pass

def max_entropy_rl(mpd, initial_state, reward_path, reward_goal):
    pass

def sample_policy(mdp, initial_state, policy, policy_samples, grid_extent, img_feats, agents):
    pass

In [None]:
class RewardModel(pl.LightningModule):
    """ Model to extract rewards for Max-Ent RL"""
    def __init__(self, **kwargs):
        super().__init__()

        backbone = resnet34(pretrained=True)
        self.cnn_feat = nn.Sequential(backbone.conv1, backbone.bn1, backbone.relu, backbone.maxpool, backbone.layer1)
        self.conv1 = nn.Conv2d(in_channels=64, out_channels=32, kernel_size=2, stride=2)
        self.cnn_1 = nn.Conv2d(in_channels=32+3, out_channels=32, kernel_size=1)
        self.cnn_2 = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=1)
        # self.cnn_g_1 = nn.Conv2d(in_channels=32+3, out_channels=32, kernel_size=1)
        # self.cnn_g_2 = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=1)
        self.cnn_p = nn.Sequential(self.cnn_1, self.cnn_2)
        self.cnn_g = nn.Sequential(self.cnn_1, self.cnn_2)
        
        self.log_sig = nn.LogSigmoid()
        self.relu = nn.ReLU()
        # TODO: define motion feats x and y
        """
            y = (np.linspace(self.grid_extent[3] - grid_size_m/(self.grid_dim*2),
                            self.grid_extent[2] + grid_size_m/(self.grid_dim*2),
                            self.grid_dim)).reshape(-1, 1).repeat(self.grid_dim, axis=1)
            x = (np.linspace(self.grid_extent[0] + grid_size_m/(self.grid_dim*2),
                            self.grid_extent[1] - grid_size_m/(self.grid_dim*2),
                            self.grid_dim)).reshape(-1, 1).repeat(self.grid_dim, axis=1).transpose()
        """
        self.y = torch.linspace(40 - 50/(25*2),
            -10 + 50/(25*2),
            25).reshape(-1, 1).repeat_interleave(25, 1)
        self.x = torch.linspace(-25 + 50/(25*2),
            25 - 50/(25*2),
            25).reshape(-1, 1).repeat_interleave(25, 1).transpose()
        self.motion_feats = torch.zeros((3, 25, 25))

    def forward(self, x) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """ Network inference """
        # TODO: change state extraction
        img, motion_feats = x

        img_feats = self.cnn_feat(img)
        img_feats = self.conv1(img_feats)
        # TODO: check if relu needed
        img_feats = self.relu(img_feats)

        x = torch.cat([img_feats, motion_feats], 1)
        r_path = self.log_sig(self.cnn_p(x))
        r_goal = self.log_sig(self.cnn_g(x))
        
        return r_path, r_goal, img_feats

    def training_step(self, batch, batch_idx):
        """ Training step of the model

        Parameters
        ----------
        batch: batch of data
        batch_idx: index of the actual batch (from 0 to len(dataset))
        """
        # Collect data
        x_state, x_img, gt, _ = batch
        print(x_state.shape())
        v = x_state[:,0]
        if np.isnan(v):
            v = 0
        self.motion_feats[0] = v
        self.motion_feats[1] = self.x/50
        self.motion_feats[2] = self.y/50

        # Inference
        y_hat = self((x_img, self.motion_feats))
        
        # TODO: log-likelihood loss
        loss = None 
        # Log
        self.log('train_loss', loss.item(), on_step=True)
            
        return loss

In [None]:
# TODO: check all shapes and correct working
# NOTE: in particular, check if permutation (1, 0, 2) is a problem or should be done
class Trajectory_Generator(nn.Module):
    """ Trajectory Generator class """
    def __init__(self, pretrain=True, traj_hidden_size=TRAJ_HIDDEN_SIZE, plan_hidden_size=PLAN_HIDDEN_SIZE,
                 att_hidden_size=ATT_HIDDEN_SIZE, pos_embedding_size=POS_EMBEDDING_SIZE, 
                 scene_embedding_size=SCENE_EMBEDDING_SIZE, agent_embedding_size=AGENT_EMBEDDING_SIZE,
                 scene_features_size=SCENE_FEATURES_SIZE, agent_features_size=AGENT_FEATURES_SIZE, 
                 dyn_features_size=DYN_FEATURES_SIZE, slope=ACTIVATION_SLOPE,
                 traj_samples=SAMPLES_PER_SECOND*TRAJ_HORIZON, lr=TRAJ_GEN_LR_PRE):
        """ Trajectory Generator initialization
        
        Parameters
        ----------
        pretrain: True if the model should be first pretrained to speed up convergence
        traj_hidden_size: size of the hidden layer of the GRU trajectory encoder/decoder
        plan_hidden_size: size of the hidden layer of the GRU plan encoder
        att_hidden_size: size of the hidden layer of the attention part in the final decoder
        pos_embedding_size: size of the linear layer for the position embedding 
        scene_embedding_size: size of the linear layer for the scene embedding
        agent_embedding_size: size of the linear layer for the agent embedding
        scene_features_size: size of the scene features at each grid location
        agent_features_size: size of the agent features at each grid location
        dynamic_features_size: additional motion features to add to the embedding layer
            0 -> x, y
            1 -> x, y, velocity
            2 -> x, y, velocity, acceleration
            3 -> x, y, velocity, acceleration, yaw rate
        slope: slope (positive or negative) of the activation function
        traj_samples: number of samples to consider in the trajectory
        """
        self.scene_feat_size = scene_features_size
        self.agent_feat_size = agent_features_size
        super(Trajectory_Generator, self).__init__()
        # ---------------------------------------------------------------------------------------------------- #
        # ---------------------------------------| Generator Structure |-------------------------------------- # 
        # ---------------------------------------------------------------------------------------------------- #
        self.motion_encoder = MotionEncoder(
            traj_hidden_size, pos_embedding_size, 
            dyn_features_size, slope
        )
        self.plan_encoder = PlanEncoder(
            plan_hidden_size, pos_embedding_size, 
            scene_embedding_size, agent_embedding_size,
            scene_features_size, agent_features_size, slope
        )
        self.att_decoder = AttentionDecoder(
            att_hidden_size, traj_hidden_size, plan_hidden_size, traj_samples
        )
        # ---------------------------------------------------------------------------------------------------- #

    def forward(self, hist_motion, plan, scene_feats, agent_feats):
        """ Trajectory Generator inference
        Input:
            - hist_motion: history motion tensor
            - plan: tensor of waypoints
            - scene_feats: scene features
            - agent_feats: agent features
        Output: trajectory -> decoded trajectory tensor
        """
        enc_motion = self.motion_encoder(hist_motion)
        enc_plan = self.plan_encoder(plan, scene_feats, agent_feats)
        trajectory = self.att_decoder(hist_motion, enc_motion, enc_plan)
        return trajectory


class MotionEncoder(nn.Module):
    """ Motion Encoder class for Trajectory Generator """
    def __init__(self, traj_hidden_size, pos_embedding_size, dyn_features_size, slope=ACTIVATION_SLOPE):
        """ Motion Encoder initialization
        
        Parameters
        ----------
        traj_hidden_size: size of the hidden layer of the GRU trajectory encoder/decoder
        plan_hidden_size: size of the hidden layer of the GRU plan encoder
        pos_embedding_size: size of the linear layer for the position embedding (x-y)
        dyn_features_size: additional motion features to add to the embedding layer
            0 -> x, y
            1 -> x, y, velocity
            2 -> x, y, velocity, acceleration
            3 -> x, y, velocity, acceleration, yaw rate
        slope: slope (positive or negative) of the activation function 
        """
        self.embedding = nn.Linear(2+dyn_features_size, pos_embedding_size)
        self.activation = nn.LeakyReLU(slope)
        self.encoder = nn.GRU(pos_embedding_size, traj_hidden_size)

    def forward(self, hist_motion) -> torch.Tensor:
        """ Motion Encoder inference 

        Input: hist_motion -> history motion tensor
        Output: enc_motion -> encoded motion tensor
        """
        emb_features = self.activation(self.embedding(hist_motion))
        output, enc_motion = self.encoder(emb_features)
        return enc_motion


class PlanEncoder(nn.Module):
    """ Plan Encoder class for Trajectory Generator """
    def __init__(self, plan_hidden_size, 
                 pos_embedding_size, scene_embedding_size, agent_embedding_size, 
                 scene_features_size, agent_features_size, slope=ACTIVATION_SLOPE):
        """ Plan Encoder initialization
        
        Parameters
        ----------
        plan_hidden_size: size of the hidden layer of the GRU plan encoder
        pos_embedding_size: size of the linear layer for the position embedding 
        scene_embedding_size: size of the linear layer for the scene embedding
        agent_embedding_size: size of the linear layer for the agent embedding
        scene_features_size: size of the scene features at each grid location
        agent_features_size: size of the agent features at each grid location
        slope: slope (positive or negative) of the activation function 
        """
        self.pos_embedding = nn.Linear(2, pos_embedding_size)
        self.scene_embedding = nn.Linear(scene_features_size, scene_embedding_size)
        self.agent_embedding = nn.Linear(agent_features_size, agent_embedding_size)
        self.activation = nn.LeakyReLU(slope)
        self.encoder = nn.GRU(
            pos_embedding_size + scene_embedding_size + agent_embedding_size,
            plan_hidden_size, bidirectional=True)
    
    def forward(self, plan, scene_feats, agent_feats) -> torch.Tensor:
        """ Plan Encoder inference
        Input:
            - plan: tensor of waypoints
            - scene_feats: scene features
            - agent_feats: agent features
        Output: enc_plan -> encoded plan tensot
        """
        # Embedding
        emb_features = self.activation(torch.cat((
            self.pos_embedding(plan),
            self.scene_embedding(scene_feats),
            self.agent_embedding(agent_feats)),
            dim=2
        ))
        # Reorganizing plans
        plan_sum = torch.sum(torch.abs(plan), dim=2)
        plan_lengths = torch.sum(plan_sum[1:, :]!=0, dim=0) + 1
        plan_lengths_sorted, indices = torch.sort(plan_lengths, descending=True)
        # Reorganizing embeddings
        emb_packed = nn.utils.rnn.pack_padded_sequence(
            emb_features[:, indices, :], plan_lengths_sorted.cpu(), batch_first=False)
        # Encoding
        enc_plan_packed, output = self.encoder(emb_packed)
        enc_plan_unpacked, _ = nn.utils.rnn.pad_packed_sequence(enc_plan_packed)
        enc_plan = enc_plan_unpacked[:, indices.sort(), :]
        return enc_plan


class AttentionDecoder(nn.Module):
    """ Attention Decoder class for Trajectory Generator """
    def __init__(self, att_hidden_size, traj_hidden_size, plan_hidden_size, 
                 traj_samples=SAMPLES_PER_SECOND*TRAJ_HORIZON):
        """ Attention Decoder initialization
        
        Parameters
        ----------
        att_hidden_size: size of the hidden layer of the attention part in the final decoder
        traj_hidden_size: size of the hidden layer of the GRU trajectory encoder/decoder
        plan_hidden_size: size of the hidden layer of the GRU plan encoder
        traj_samples: number of samples to consider in the trajectory
        """
        self.traj_samples = traj_samples
        self.attention = nn.Sequential(
            nn.Linear(2*plan_hidden_size + traj_hidden_size, att_hidden_size),
            nn.Tanh(),
            nn.Linear(att_hidden_size, 1),
            nn.Softmax(dim=0)
        )
        self.dec_state_op = nn.Linear(traj_hidden_size, 2)
        self.decoder = nn.GRUCell(2*plan_hidden_size, traj_hidden_size)

    def forward(self, hist_motion, enc_motion, enc_plan, device) -> torch.Tensor:
        """ Attention Decoder inference
        
        Input: 
            - hist_motion: history motion tensor
            - enc_motion: encoded motion tensor
            - enc_plan: encoded plan tensor
            - device: execution device (e.g. cuda:0)
        Output: dec_traj -> decoded trajectory tensor
        """
        # Initialization
        dec_traj = torch.empty(
            (self.traj_samples, hist_motion.shape[1], 2), 
            dtype=torch.float32).to(device)
        motion = enc_motion.squeeze()
        # Attention loop
        for s in range(self.traj_samples):
            att_input = torch.cat((motion.repeat(enc_plan.shape[0], 1, 1), enc_plan), dim=2)
            att_features = self.attention(att_input)
            dec_input = (att_features.repeat(1, 1, enc_plan.shape[2])*enc_plan).sum(dim=0)
            motion = self.decoder(dec_input, motion)
            dec_traj[s] = self.dec_state_op(motion)
        return dec_traj.permute(1, 0, 2)


# TODO: handle the presence of no clusters with np.inf
class TrajGenLoss(nn.Module):
    """ Loss for the Trajectory Generator training """
    def __init__(self, phase='train', loss_fun='min_ade_k'):
        """ Trajectory Generator loss initialization

        Parameters
        ----------
        phase: 'pretrain' or 'train'
        loss_fun: function to compute loss; available ['min_ade_k']
        """
        if phase == 'pretrain':
            self.loss_fun = nn.MSELoss()
            if loss_fun is not None:
                warnings.warn("In pretrain phase only MSELoss is available")
        else:
            if loss_fun == 'min_ade_k':
                self.loss_fun = self.min_ade_k
            else:
                # TODO: implement also different kinds of loss
                pass

    def min_ade_k(self, pred, gt):
        """ Min_Ade_K loss for the Trajectory Generator 
        
        Parameters
        ----------
        pred: prediction
        gt: ground truth
        """
        pass

    def forward(self, x):
        """ Trajectory Generator loss computation """
        pass

## Utilities

**Data extraction**

In [None]:
def extract_expert_data(dataset, batch):
    """ Extract Expert data from the passed dataset 
    
    Parameters
    ----------
    dataset: dataset from which to extract expert data
    batch: batch of data at current time

    Return
    ------
    """
    pass

def extract_plan_features(plan, raster_feats, agent_feats):
    """ Given a plan, extract location coordinates and map/agent features
    
    Parameters
    ----------
    plan: plan from which to extract data (single or batch)
    raster_feats: tensor of scene features
    agent_feats: tensor of agent features

    Return
    ------
    """
    pass

**Metrics**

In [10]:
def compute_metrics(predictions: List[data_classes.Prediction], ground_truths: List[np.ndarray], 
                    helper, aggregators=AGGREGATORS) -> Dict[str, Any]:#Dict[str, Dict[str, List[float]]]:
    """ Utility eval function to compute dataset metrics

    Parameters
    ----------
    predictions: list of predictions made by the model (in Prediction class format)
    ground_truths: the real trajectories of the agent (SHAPE -> [len(dataset), n_samples, state_dim])
    helper: nuScenes dataset helper
    aggregators: functions to aggregate metrics (e.g. mean)

    Return
    ------
    metric_output: dictionary of the computed metrics:
        - minADE_5: The average of pointwise L2 distances between the predicted trajectory 
                    and ground truth over the 5 most likely predictions.
        - minADE_10: The average of pointwise L2 distances between the predicted trajectory 
                    and ground truth over the 10 most likely predictions.
        - missRateTop_2_5: Proportion of misses relative to the 5 most likely trajectories
                        over all agents
        - missRateTop_2_10: Proportion of misses relative to the 10 most likely trajectories
                        over all agents
        - minFDE_1: The final displacement error (FDE) is the L2 distance 
                    between the final points of the prediction and ground truth, computed
                    on the most likely trajectory
        - offRoadRate: the fraction of trajectories that are not entirely contained
                    in the drivable area of the map.
    """
    # 1. Define metrics
    print("\t - Metrics definition ...")
    aggregators = \
        [metrics.deserialize_aggregator(agg) for agg in aggregators]
    min_ade = metrics.MinADEK([5, 10], aggregators)
    miss_rate = metrics.MissRateTopK([5, 10], aggregators)
    min_fde = metrics.MinFDEK([1], aggregators)
    if helper is not None:
        # FIXME: instantiating offRoadRate class makes RAM explode
        #offRoadRate = metrics.OffRoadRate(self.helper, self.aggregators)
        pass
    else:
        offRoadRate = None

    # 2. Compute metrics
    metric_list = []
    print("\t - Effective metrics computation ...")
    for p, pred in enumerate(tqdm(predictions)):
        # TODO: check for argument shapes
        minADE_5 = min_ade(ground_truths[p], pred)[0][0]
        minADE_10 = min_ade(ground_truths[p], pred)[0][1]
        missRateTop_2_5 = miss_rate(ground_truths[p], pred)[0][0]
        missRateTop_2_10 = miss_rate(ground_truths[p], pred)[0][1]
        minFDE_1 = min_fde(ground_truths[p], pred)
        #offRoadRate = offRoadRate(ground_truth[i], prediction)
        metric = {'minADE_5': minADE_5, 'missRateTop_2_5': missRateTop_2_5,
                  'minADE_10': minADE_10, 'missRateTop_2_10': missRateTop_2_10,
                  'minFDE_1': minFDE_1}#, 'offRoadRate': offRoadRate}
        metric_list.append(metric)

    # 3. Aggregate
    print("\t - Metrics aggregation ...")
    aggregations: Dict[str, Dict[str, List[float]]] = defaultdict(dict)
    metric_names = list(metric_list[0].keys())
    metrics_dict = {name: np.array([metric_list[i][name] for i in range(len(metric_list))]) 
                    for name in metric_names}
    for metric in metric_names:
        for agg in aggregators:
            aggregations[metric][agg.name] = agg(metrics_dict[metric])

    return aggregations    

**Plotting**

In [11]:
def plot_train_data(train_iterations, val_iterations, epoches, train_losses, val_losses):
    """ Plot a graph with the training trend

    Parameters
    ----------
    train_iterations: number of iterations for each epoch [train]
    val_iterations: number of iterations for each epoch [val]
    epoches: actual epoch number (starting from 1)
    train_losses: array of loss values [train]
    val_losses: array of loss values [val]
    """
    # Data preparation
    train_iterations_list = list(range(epoches*(train_iterations)))
    val_iterations_list = list(range(epoches*(val_iterations)))
    epoches_list = list(range(epoches))

    # Adjust validation array dimension
    val_error = len(val_losses) - len(val_iterations_list)
    if val_error > 0:
        val_losses = val_losses[:-val_error]

    # Per-iteration plot
    fig = plt.figure()
    plt.title('Per-iteration Loss [train]')
    plt.xlabel('Iterations')
    plt.ylabel('Value')
    l1, = plt.plot(train_iterations_list, train_losses, c='blue')
    plt.legend(handles=[l1], labels=['Train loss'], loc='best')
    plt.show()
    fig = plt.figure()
    plt.title('Per-iteration Loss [val]')
    plt.xlabel('Iterations')
    plt.ylabel('Value')
    l2, = plt.plot(val_iterations_list, val_losses, c='red')
    plt.legend(handles=[l2], labels=['Validation loss'], loc='best')
    plt.show()

    # Per-epoch plot
    fig = plt.figure()
    plt.title('Per-epoch Loss')
    plt.xlabel('Epoches')
    plt.ylabel('Value')
    train_avg_losses = [np.array(train_losses[i:i+train_iterations]).mean() 
                        for i in range(0, len(train_losses), train_iterations)]
    val_avg_losses = [np.array(val_losses[i:i+val_iterations]).mean() 
                      for i in range(0, len(val_losses), val_iterations)]
    l1, = plt.plot(epoches_list, train_avg_losses, c='blue')
    l2, = plt.plot(epoches_list, val_avg_losses, c='red')
    plt.legend(handles=[l1, l2], labels=['Train loss', 'Validation loss'], loc='best')
    plt.show()

def plot_agent_future(raster, future, agent_pos=(0,0), reference_frame='local', color='green'):
    """ Plot agent's future trajectory

    Parameters
    ----------
    raster: raster map tensor (image)
    future: future trajectory of the agent (predicted or GT) [x,y]
    agent_pos: position of the agent (needed in case of local coords)
    reference_frame: frame to which future coordinates refer
    color: color of the plotted trajectory
    """
    # Show raster map
    plt.imshow(raster.permute(1, 2, 0))

    # Show trajectory
    x, y = [], []
    for i in range(len(future)):
        point = (agent_pos[0], agent_pos[1]) if i == 0 else future[i].numpy()
        if reference_frame == 'local' and i > 0:
            point = (point[0] + agent_pos[0], -point[1] + agent_pos[1])
        x.append(point[0])
        y.append(point[1])
    
    plt.plot(x, y, color=color, markersize=10, linewidth=5)
    plt.show()

## Main

**Initialization**

In [None]:
# ---------- Dataset initialization ---------- #
# Initialize nuScenes helper

print("nuScenes Helper initialization ...")
start_time = time.time()
pl.seed_everything(PL_SEED)
if ENVIRONMENT == 'local':
    
    if PREPARE_DATASET:
        nusc = NuScenes(version=DATASET_VERSION, dataroot=DATAROOT, verbose=True)
        with open(os.path.join(ROOT, 'nuscenes_checkpoint'+FILENAME_EXT), 'wb') as f:
            pickle.dump(nusc, f, protocol=pickle.HIGHEST_PROTOCOL)
    elif not 'nusc' in locals():
        if HELPER_NEEDED:
            with open(os.path.join(ROOT, 'nuscenes_checkpoint'+FILENAME_EXT), 'rb') as f:
                nusc = pickle.load(f)
elif ENVIRONMENT == 'colab':
    if PREPARE_DATASET or HELPER_NEEDED:
        nusc = NuScenes(version=DATASET_VERSION, dataroot=DATAROOT, verbose=True)
helper = PredictHelper(nusc) if HELPER_NEEDED else None
print("nuScenes Helper initialization done in %f s\n" % (time.time() - start_time))

# Initialize dataset and data module
print("\nDataset and Data Module initialization ...")
start_time = time.time()
train_dataset = nuScenesDataset(helper, preprocessed=PREPROCESSED, split='train')
val_dataset = nuScenesDataset(helper, preprocessed=PREPROCESSED, split='val')
trainval_dm = nuScenesDataModule(train_dataset, val_dataset, num_workers=NUM_WORKERS)
trainval_dm.setup(stage='fit')
print("Dataset and Data Module initialization done in %f s\n" % (time.time() - start_time))

# ---------- Network initialization ---------- #
start_time = time.time()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if PREDICTION_MODEL == 'CoverNet':
    print("\nCoverNet model initialization ...")
    model = CoverNet(K_SIZE, EPSILON, TRAJ_LINK, TRAJ_DIR, device)
    print("CoverNet model intialization done in %f s\n" % (time.time() - start_time))
elif PREDICTION_MODEL == 'P2T':
    print("\nP2T model initialization ...")
    model = P2T()
    print("P2T model intialization done in %f s\n" % (time.time() - start_time))

# ---------- Training initialization ---------- #
print("\nTrainer initialization ...")
start_time = time.time()
GPUS = min(1, torch.cuda.device_count())
checkpoint_callback = ModelCheckpoint(dirpath=CHECKPOINT_DIR,
                                      save_top_k=TOP_K_SAVE,
                                      monitor=CHECKPOINT_MONITOR)
trainer = pl.Trainer(callbacks=[checkpoint_callback],
                     progress_bar_refresh_rate=PROGRESS_BAR_REFRESH_RATE, 
                     gpus=GPUS, max_epochs=TRAIN_EPOCHES)
print("Trainer intialization done in %f s\n" % (time.time() - start_time))

**Training loop**

In [None]:
if PREDICTION_MODEL == 'CoverNet':
    trainer.fit(model, trainval_dm)
elif PREDICTION_MODEL == 'P2T':
    # NOTE: we can handle training of all components in a single function
    #P2T.train()
    pass

**Testing**

In [None]:
# Dataloader initialization
print("Loading test dataloader ...")
trainval_dm.setup(stage='test')
test_dataloader = trainval_dm.test_dataloader()
test_generator = iter(test_dataloader)

# Trained model initialization
# TODO: istantiate kwargs for network in a better way
print("\nCoverNet trained model initialization ...")
checkpoint_name = 'epoch=19-step=80460.ckpt'
net_args = {'K_size': K_SIZE, 'epsilon': EPSILON, 'traj_link': TRAJ_LINK, 'traj_dir': TRAJ_DIR, 'device': device}
model = CoverNet.load_from_checkpoint(checkpoint_path=os.path.join(BEST_CHECKPOINT_DIR, checkpoint_name), 
                                      map_location=None, hparams_file=None, strict=True, 
                                      K_size=K_SIZE, epsilon=EPSILON, traj_link=TRAJ_LINK, traj_dir=TRAJ_DIR, device=device).to(device)
model.eval()

# ---------- CoverNet Metrics computation ---------- #
# TODO: generalize metrics computation
predictions = []
ground_truths = []
start = time.time()
reduced_traj = model.trajectories[:, :model.traj_samples].numpy()
print("\nCoverNet metrics computation ...")
print("1 - Producing predictions ...")
for i, token in enumerate(tqdm(val_dataset.tokens)):
    with torch.no_grad():
        x_state, _, x_img, gt, _ = val_dataset[i]
        x_state = x_state.to(device)
        x_img = x_img.to(device)
        x_state = torch.unsqueeze(torch.flatten(x_state, 0, 1), 0)
        x_img = torch.unsqueeze(x_img, 0)
        pred_logits = model((x_img, x_state))
        pred_probs = F.softmax(pred_logits, dim=1)[0]
        top_indices = pred_probs.argsort()[-MAX_PREDICTED_MODES:]
        cutted_probs = pred_probs[top_indices].cpu().numpy()
        cutted_traj = reduced_traj[top_indices.cpu()]
    i_t, s_t = token.split("_")
    ground_truths.append(gt.numpy())
    predictions.append(data_classes.Prediction(i_t, s_t, cutted_traj, cutted_probs))
print("2 - Computing metrics ...")
convernet_metrics = compute_metrics(predictions, ground_truths, helper)
print("Metric computation done in %f s" % (time.time() - start))


In [None]:
## Obtained with trajectory horizon = 6 seconds
convernet_metrics

defaultdict(dict,
            {'minADE_5': {'RowMean': 2.8969106674194336},
             'missRateTop_2_5': {'RowMean': 0.7489215794712974},
             'minADE_10': {'RowMean': 2.187161684036255},
             'missRateTop_2_10': {'RowMean': 0.6338900564096892},
             'minFDE_1': {'RowMean': [[11.689470291137695]]}})

In [None]:
## Obtained with trajectory horizon = 3 seconds
convernet_metrics

defaultdict(dict,
            {'minADE_5': {'RowMean': 1.126030445098877},
             'missRateTop_2_5': {'RowMean': 0.35128857427275745},
             'minADE_10': {'RowMean': 0.8562875986099243},
             'missRateTop_2_10': {'RowMean': 0.23891162482026324},
             'minFDE_1': {'RowMean': [[4.1349053382873535]]}})

## Code Debugging

**Training loop** (manual - debug only)

In [None]:
if DEBUG_MODE:

    # Dataset preparation
    train_dataloader = torch.utils.data.DataLoader(train_dataset, BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, drop_last=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, drop_last=True)

    # Training preparation
    optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
    model = model.to(device)

    # Plotting preparation
    train_loss_arr = []
    val_loss_arr = []
    train_iterations = len(train_dataset) // BATCH_SIZE
    val_iterations = len(val_dataset) // BATCH_SIZE

    # Training loop
    for i in range(TRAIN_EPOCHES):
        print("-------- Epoch %d --------" % i)
        model.train()

        # Training
        for j, data in enumerate(train_dataloader):
            
            # Data preparation
            x_state, x_img_static, x_img_dynamic, gt, idx = data
            x_state = x_state.to(device)
            x_img_static = x_img_static.to(device)
            x_img_dynamic = x_img_dynamic.to(device)
            x_state = torch.flatten(x_state, 0, 1)
            with torch.no_grad():
                reduced_traj = model.trajectories[:, :SAMPLES_PER_SECOND*TRAJ_HORIZON]
                y = get_positives(reduced_traj, gt)

            # Inference
            optimizer.zero_grad()
            traj_logits = model((x_img_dynamic, x_state))
            y = y.to(device)
            loss = F.cross_entropy(traj_logits, y)
            loss.backward()
            optimizer.step()

            # Logging
            loss_val = loss.item()
            train_loss_arr.append(loss_val)
            print("[%d] %d - train loss = %f" % (i, j, loss_val))

        # Validation
        model.train(mode=False)
        for j, data in enumerate(val_dataloader):

            # Data preparation
            x_state, x_img_static, x_img_dynamic, gt, idx = data
            x_state = x_state.to(device)
            x_img_static = x_img_static.to(device)
            x_img_dynamic = x_img_dynamic.to(device)
            x_state = torch.flatten(x_state, 0, 1)
            reduced_traj = model.trajectories[:, :SAMPLES_PER_SECOND*TRAJ_HORIZON]
            y = get_positives(reduced_traj, gt)

            # Inference
            traj_logits = model((x_img_dynamic, x_state))
            y = y.to(device)
            loss = F.cross_entropy(traj_logits, y)

            # Logging
            loss_val = loss.item()
            val_loss_arr.append(loss_val)
            print("[%d] %d - val loss = %f" % (i, j, loss_val))

        # Plotting
        if (i+1) % PLOT_PERIOD == 0:
            plot_train_data(train_iterations, val_iterations, i+1, train_loss_arr, val_loss_arr)
            a = input("Press Enter to continue...")
            plt.close('all')
                

**Dataset debugging**

In [35]:
# Initialize nuScenes
HELPER_NEEDED = True
if ENVIRONMENT == 'local':
    if PREPARE_DATASET:
        nusc = NuScenes(version=DATASET_VERSION, dataroot=DATAROOT, verbose=True)
        with open(os.path.join(ROOT, 'nuscenes_checkpoint'+FILENAME_EXT), 'wb') as f:
            pickle.dump(nusc, f, protocol=pickle.HIGHEST_PROTOCOL)
    elif not 'nusc' in locals():
        if HELPER_NEEDED:
            with open(os.path.join(ROOT, 'nuscenes_checkpoint'+FILENAME_EXT), 'rb') as f:
                nusc = pickle.load(f)
elif ENVIRONMENT == 'colab':
    if PREPARE_DATASET or HELPER_NEEDED:
        nusc = NuScenes(version=DATASET_VERSION, dataroot=DATAROOT, verbose=True)

In [36]:
helper = PredictHelper(nusc)
dataset = nuScenesDataset(helper, preprocessed=PREPROCESSED)
train_dataloader = torch.utils.data.DataLoader(dataset, BATCH_SIZE, True, num_workers=NUM_WORKERS)
train_generator = iter(train_dataloader)

In [None]:
# Useful to check ideal number of workers and batch size
x = time.time()
try:
    state, img_static, img_dynamic, gt, idxs = next(train_generator)
except StopIteration:
    train_generator = iter(train_dataloader)
    state, img_static, img_dynamic, gt, idxs = next(train_generator)
print(time.time() - x)

In [None]:
state, img_static, img_dynamic, gt, idx = dataset[np.random.randint(len(dataset))]
plt.imshow(img_dynamic.permute(1, 2, 0))
plt.show()
print("State input size:", state.shape)
print("Ground truth size:", gt.shape)

In [None]:
instance_token, sample_token = dataset.tokens[idx].split("_")
long_gt = torch.Tensor(
            dataset.helper.get_future_for_agent(instance_token, sample_token,
                                                seconds=100, in_agent_frame=True))
# TODO: check how to get agent position in the map                                        
plot_agent_future(img_dynamic, long_gt, agent_pos=(250,400), reference_frame='local')

**Network debugging**

In [None]:
test_states, test_imgs_static, test_imgs_dynamic, test_gts, _ = next(train_generator)
test_states = torch.flatten(test_states, 0, 1)

print(test_imgs_dynamic.size())
print(test_states.size())

In [60]:
# Prediction
model = CoverNet(K_SIZE, EPSILON, TRAJ_LINK, TRAJ_DIR, device='cuda:0')
traj_logits = model((test_imgs_dynamic, test_states))

# Output 5 and 10 most likely trajectories for this batch
top_5_trajectories = model.trajectories[traj_logits.argsort(descending=True)[:5]]
top_10_trajectories = model.trajectories[traj_logits.argsort(descending=True)[:10]]