In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
from matplotlib import collections as mc

from ysdc_dataset_api.dataset import MotionPredictionDataset
from ysdc_dataset_api.features import FeatureRenderer
from ysdc_dataset_api.utils import get_file_paths, scenes_generator, transform_2d_points

  res = transform @ ph


## Raw data

In [None]:
dataset_path = '/path/to/dataset/train/'
filepaths = get_file_paths(dataset_path)

In [None]:
scene = next(scenes_generator(filepaths))

In [None]:
# Number of known history steps
# Index 0 is farthest (-5s) into the past, and index 24 represents current time
print('Number of history steps:', len(scene.past_vehicle_tracks))

In [None]:
print('Number of vehicles seen at current time:', len(scene.past_vehicle_tracks[-1].tracks))
print(scene.past_vehicle_tracks[-1].tracks)

In [None]:
print('Number of pedestrians seen at current time:', len(scene.past_pedestrian_tracks[-1].tracks))
print(scene.past_pedestrian_tracks[-1].tracks)

In [None]:
print('Number of vehicles to predict:', len(scene.prediction_requests))
print(scene.prediction_requests)

In [None]:
# Number of future steps
# Index 0 is closest (0.2s into the future), index 24 is farthest (5s into the future)
print('Number of future steps to predict:', len(scene.future_vehicle_tracks))

In [None]:
print('First future state:', scene.future_vehicle_tracks[0].tracks)

In [None]:
print('Last future state:', scene.future_vehicle_tracks[24].tracks)

## Basic usage

In [None]:
renderer_config = {
    # parameters of feature maps to render
    'feature_map_params': {
        'rows': 400,
        'cols': 400,
        'resolution': 0.25,  # number of meters in one pixel
    },
    'renderers_groups': [
        # Having several feature map groups
        # allows to independently render feature maps with different history length.
        # This could be useful to render static features (road graph, etc.) once.
        {
            # start: int, first timestamp into the past to render, 0 – prediction time
            # stop: int, last timestamp to render inclusively, 24 – farthest known point into the past
            # step: int, grid step size,
            #            step=1 renders all points between start and stop,
            #            step=2 renders every second point, etc.
            'time_grid_params': {
                'start': 0,
                'stop': 0,
                'step': 1,
            },
            'renderers': [
                # each value is rendered at its own channel
                # occupancy -- 1 channel
                # velocity -- 2 channels (x, y)
                # acceleration -- 2 channels (x, y)
                # yaw -- 1 channel
                {'vehicles': ['occupancy', 'velocity', 'acceleration', 'yaw']},
                # only occupancy and velocity are available for pedestrians
                {'pedestrians': ['occupancy', 'velocity']},
            ]
        },
        {
            'time_grid_params': {
                'start': 0,
                'stop': 0,
                'step': 1,
            },
            'renderers': [
                {
                    'road_graph': [
                        'crosswalk_occupancy',
                        'crosswalk_availability',
                        # 'lane_availability',  # Currently unavailable due to problem in dataset
                        'lane_direction',
                        'lane_occupancy',
                        'lane_priority',
                        'lane_speed_limit',
                        'road_polygons',
                    ]
                }
            ]
        }
    ]
}

In [None]:
# path to dataset dir containing sub directories
dataset_path = '/path/to/dataset/train/'
# path to file with scene tags
scene_tags_fpath = '/path/to/dataset/train_tags_file'

In [None]:
renderer = FeatureRenderer(renderer_config)

In [None]:
renderer.to_feature_map_tf

In [None]:
dataset = MotionPredictionDataset(
    dataset_path=dataset_path,
    scene_tags_fpath=scene_tags_fpath,
    feature_producer=renderer,
    transform_ground_truth_to_agent_frame=True,
)

In [None]:
# Number of scenes in dataset.
# Actual number of objects in dataset is bigger,
# since we consider multiple agents in a scene for prediction.
dataset.num_scenes

In [None]:
dataset_iter = iter(dataset)

In [None]:
# Take some scene
for i in range(10):
    data_item = next(dataset_iter)

In [None]:
# One data item contains rendered feature maps and ground truth trajectory.
# Feature maps are centered around current actor.
# Ground truth trajectory is transformed to actor coordinate system:
# actor is located at origin (0, 0) headed to positive x direction  at prediction time.
data_item.keys()

In [None]:
# Feature maps are in channels first format
data_item['feature_maps'].shape

In [None]:
# Plot vehicles occupancy, pedestrian occupancy, lane occupancy and road polygon
plt.figure(figsize=(10, 10))
plt.imshow(data_item['feature_maps'][0], origin='lower', cmap='binary', alpha=0.7)
plt.imshow(data_item['feature_maps'][6], origin='lower', cmap='binary', alpha=0.5)
plt.imshow(data_item['feature_maps'][12], origin='lower', cmap='binary', alpha=0.2)
plt.imshow(data_item['feature_maps'][15], origin='lower', cmap='binary', alpha=0.1)

## Filtration by tags

In [None]:
# To filter scenes by tags one should specify a filter function
# Scene tags dict has following structure:
# {
#     'day_time': one of {'kNight', 'kMorning', 'kAfternoon', 'kEvening'}
#     'season': one of {'kWinter', 'kSpring', 'kSummer', 'kAutumn'}
#     'track': one of {'Moscow' , 'Skolkovo', 'Innopolis', 'AnnArbor', 'Modiin', 'TelAviv'}
#     'sun_phase': one of {'kAstronomicalNight', 'kTwilight', 'kDaylight'}
#     'precipitation': one of {'kNoPrecipitation', 'kRain', 'kSleet', 'kSnow'}
# }
# Full description of protobuf message is available at tags.proto file in sources


def filter_scene(scene_tags_dict):
    if scene_tags_dict['track'] == 'AnnArbor' and scene_tags_dict['precipitation'] == 'kRain':
        return True
    else:
        return False

In [None]:
# Trajectory tags list can include any number of the following non-mutually exclusive tags.
# [
#     'kMoveLeft', 'kMoveRight', 'kMoveForward', 'kMoveBack',
#     'kAcceleration', 'kDeceleration', 'kUniform',
#     'kStopping', 'kStarting', 'kStationary'
# ]


def filter_trajectory(trajectory_tags_list):
    if 'kMoveRight' in trajectory_tags_list:
        return True
    else:
        return False

In [None]:
# Let's try to filter scenes

dataset = MotionPredictionDataset(
    dataset_path=dataset_path,
    scene_tags_fpath=scene_tags_fpath,
    feature_producer=renderer,
    transform_ground_truth_to_agent_frame=True,
    scene_tags_filter=filter_scene,
    trajectory_tags_filter=filter_trajectory,
)

In [None]:
# Rainy Ann-Arbor is pretty rare occasion
dataset.num_scenes

In [None]:
dataset_iter = iter(dataset)

In [None]:
for i in range(10):
    data_item = next(dataset_iter)

In [None]:
transformed_gt = transform2dpoints(data_item['ground_truth_trajectory'], renderer.to_feature_map_tf)
transformed_gt = np.round(transformed_gt - 0.5).astype(np.int32)

In [None]:
# Looks like car is moving right
plt.figure(figsize=(10, 10))
plt.imshow(data_item['feature_maps'][0], origin='lower', cmap='binary', alpha=0.7)
plt.imshow(data_item['feature_maps'][6], origin='lower', cmap='binary', alpha=0.5)
plt.imshow(data_item['feature_maps'][12], origin='lower', cmap='binary', alpha=0.2)
plt.imshow(data_item['feature_maps'][15], origin='lower', cmap='binary', alpha=0.1)
ax = plt.gca()
ax.add_collection(mc.LineCollection([transformed_gt], color='green'))

## Prerendered dataset

In [None]:
# We use raw protobuf dataset here to extract ground truth trajectories
prerenderer_dataset = MotionPredictionDataset(
    dataset_path='/path/to/dataset/train/',
    scene_tags_fpath='/path/to/dataset/train_tags_file',
    prerendered_dataset_path='/path/to/pre_rendered_dataset/train/',
)

In [None]:
dataset_iter = iter(prerenderer_dataset)

In [None]:
for i in range(10):
    item = next(dataset_iter)

In [None]:
plt.figure(figsize=(10, 10))
plt.imshow(item['prerendered_feature_map'][0], origin='lower', cmap='binary', alpha=0.7)
plt.imshow(item['prerendered_feature_map'][6], origin='lower', cmap='binary', alpha=0.5)
plt.imshow(item['prerendered_feature_map'][13], origin='lower', cmap='binary', alpha=0.2)
plt.imshow(item['prerendered_feature_map'][16], origin='lower', cmap='binary', alpha=0.1)

## Model evluation

In [2]:
def filter_ood_validation_data(scene_tags_dict):
    if (scene_tags_dict['track'] in ['Skolkovo', 'Modiin', 'TelAviv'] and
        scene_tags_dict[
            'precipitation'] in ['kNoPrecipitation', 'kRain', 'kSnow']):
        return True
    else:
        return False

In [3]:
ood_validation_dataset = MotionPredictionDataset(
    dataset_path='/path/to/nips_dataset/validation/',
    prerendered_dataset_path='/path/to/pre_rendered_dataset/validation/',
    scene_tags_fpath='/path/to/nips_dataset/validation_scene_tags_with_prec.txt',
    scene_tags_filter=filter_ood_validation_data,
)

6023/500000 scenes fit the filter criteria.


In [4]:
ood_validation_dataset.num_scenes

6023

In [11]:
from sdc.config import build_parser

parser = build_parser()
args = parser.parse_args('')

def ipynb_patch_args(args):
    args.dir_checkpoint = '/path/to/model_checkpoints'

    # Backbone model details
    # Deep Imitative Model: MobileNetv2 feature encoder, autoregressive flow decoder
    args.model_name = 'dim'
    args.model_dim_hidden = 512
    args.exp_device = 'cuda:0'

    # Used in scoring generated trajectories and obtaining per-plan/per-scene confidence scores.
    # See `sdc.oatomobile.torch.baselines.robust_imitative_planning.py` for details.
    args.rip_per_plan_algorithm = 'LQ'
    args.rip_per_scene_algorithm = 'LQ'

    # Number of ensemble members
    args.rip_k = 3

    # Data loading
    args.exp_batch_size = 32
    args.data_num_workers = 4
    args.data_prefetch_factor = 2

    return args

c = ipynb_patch_args(args)

In [12]:
from sdc.oatomobile.torch.baselines import init_rip
from sdc.oatomobile.torch.baselines.robust_imitative_planning import load_rip_checkpoints
from typing import Mapping

class Model:
    def __init__(self, c):
        self.c = c

        # Initialize torch hub dir
        torch.hub.set_dir(f'{c.dir_checkpoint}/torch_hub')

    def load(self):
        model, full_model_name, _, _ = init_rip(c=self.c)
        checkpoint_dir = f'{c.dir_checkpoint}/{full_model_name}'
        self.model = load_rip_checkpoints(
            model=model, device=c.exp_device, k=c.rip_k,
            checkpoint_dir=checkpoint_dir)

    def predict(self, batch: Mapping[str, torch.Tensor]):
        """
        Args:
            batch: Mapping[str, torch.Tensor], with 'feature_maps' key/value

        Returns:
            Sequence of dicts. Each has the following structure:
                {
                    predictions_list: Sequence[np.ndarray],
                    plan_confidence_scores_list: Sequence[np.ndarray],
                    pred_request_confidence_score: float,
                }
        """
        self.model.eval()
        predictions, plan_confidence_scores, pred_request_confidence_scores = (
            self.model(**batch))
        predictions = predictions.detach().cpu().numpy()
        plan_confidence_scores = plan_confidence_scores.detach().cpu().numpy()
        pred_request_confidence_scores = pred_request_confidence_scores.detach().cpu().numpy()
        return [
            {
                'predictions_list': predictions[i],
                'plan_confidence_scores_list': plan_confidence_scores[i],
                'pred_request_confidence_score':
                    pred_request_confidence_scores[i]
            } for i in range(predictions.shape[0])]

# Initialize and load ensemble of k models from checkpoints
model = Model(c=c)
model.load()

RIP kwargs:
{'device': 'cuda:0',
 'k': 3,
 'model_name': 'bc',
 'num_preds': 5,
 'per_plan_algorithm': 'LQ',
 'per_scene_algorithm': 'LQ',
 'samples_per_model': 10}
Building RIP agent with backbone model bc, per-plan algorithm LQ, per-scene algorithm LQ, 3 ensemble members.
Model kwargs:
{'device': 'cuda:0',
 'dim_hidden': 512,
 'in_channels': 17,
 'output_shape': (25, 2)}
Model kwargs:
{'device': 'cuda:0',
 'dim_hidden': 512,
 'in_channels': 17,
 'output_shape': (25, 2)}
Model kwargs:
{'device': 'cuda:0',
 'dim_hidden': 512,
 'in_channels': 17,
 'output_shape': (25, 2)}


Using cache found in /home/nband/.cache/torch/hub/pytorch_vision_v0.9.0
Using cache found in /home/nband/.cache/torch/hub/pytorch_vision_v0.9.0
Using cache found in /home/nband/.cache/torch/hub/pytorch_vision_v0.9.0


PermissionError: [Errno 13] Permission denied: '/path'

In [None]:
# Init dataloader
dataloader_kwargs = {
    'batch_size': c.exp_batch_size,
    'num_workers': c.data_num_workers,
    'prefetch_factor': c.data_prefetch_factor,
    'pin_memory': True
}

print(f'Building dataloaders with kwargs {dataloader_kwargs}.')
ood_validation_dataloader = torch.utils.data.DataLoader(ood_validation_dataset, **dataloader_kwargs)

In [None]:
from ysdc_dataset_api.evaluation import Submission, object_prediction_from_model_output, save_submission_proto
from sdc.oatomobile.torch.baselines import batch_transform
import tqdm.notebook as tqdm
from functools import partial

In [None]:
submission = Submission()

batch_cast = partial(
    batch_transform, device=c.exp_device, downsample_hw=None,
    data_use_prerendered=True)

for batch in tqdm.tqdm(ood_validation_dataloader):
    batch_output = model.predict(batch_cast(batch))

    for i, data_item_output in enumerate(batch_output):
        proto = object_prediction_from_model_output(
            track_id=batch['track_id'][i],
            scene_id=batch['scene_id'][i],
            model_output=data_item_output)

        submission.predictions.append(proto)

In [None]:
save_submission_proto('/path/to/submission.pb', submission=submission)