In [1]:
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.models.resnet import resnet50
from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path
import os

import argparse

In [4]:
cfg = {
    'format_version': 4,
    'data_path': "/home/axot/lyft/data",
    'model_params': {
        'model_architecture': 'efficientnet-b1',
        'history_num_frames': 10,
        'future_num_frames': 50,
        'lr': 1e-4,
        'history_step_size': 1,
        'history_delta_time': 0.1,
        'future_step_size': 1,
        'future_delta_time': 0.1,
    },
    'raster_params': {
        'raster_size': [224, 224],
        'pixel_size': [0.5, 0.5],
        'ego_center': [0.25, 0.5],
        'map_type': 'py_semantic',
        'satellite_map_key': 'aerial_map/aerial_map.png',
        'semantic_map_key': 'semantic_map/semantic_map.pb',
        'dataset_meta_key': 'meta.json',
        'filter_agents_threshold': 0.5,
        'disable_traffic_light_faces': False
    },
    'train_data_loader': {
        'key': 'scenes/train.zarr',
        'batch_size': 8,
        'shuffle': True,
        'num_workers': 4
    },
    'test_data_loader': {
        'key': 'scenes/test.zarr',
        'batch_size': 16,
        'shuffle': False,
        'num_workers': 4
    }
}

In [5]:
# load eval data
DIR_INPUT = cfg["data_path"]
os.environ["L5KIT_DATA_FOLDER"] = DIR_INPUT
dm = LocalDataManager(None)
rasterizer = build_rasterizer(cfg, dm)

num_frames_to_chop = 100
eval_cfg = {
    "key": "scenes/sample.zarr",
    "batch_size": 12,
    "shuffle": False,
    "num_workers": 4}

eval_base_path = create_chopped_dataset(dm.require(eval_cfg["key"]), 
                                        cfg["raster_params"]["filter_agents_threshold"], 
                                        num_frames_to_chop, 
                                        cfg["model_params"]["future_num_frames"], MIN_FUTURE_STEPS)

eval_zarr_path = str(Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name)
eval_mask_path = str(Path(eval_base_path) / "mask.npz")
eval_gt_path = str(Path(eval_base_path) / "gt.csv")

eval_zarr = ChunkedDataset(eval_zarr_path).open()
eval_mask = np.load(eval_mask_path)["arr_0"]
eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask)
eval_dataloader = DataLoader(eval_dataset, shuffle=eval_cfg["shuffle"], batch_size=eval_cfg["batch_size"], 
                             num_workers=eval_cfg["num_workers"])
print(eval_dataset)

copying: 100%|██████████| 100/100 [00:01<00:00, 73.27it/s]
  dataset = AgentDataset(cfg=cfg, zarr_dataset=zarr_dataset, rasterizer=rasterizer, agents_mask=agents_mask)
extracting GT: 100%|██████████| 480/480 [00:02<00:00, 173.64it/s]

+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|    100     |   10000    |   751440   |     130740    |       0.28      |        100.00        |        75.14         |        10.00         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+



  eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask)


In [6]:
model.eval()
torch.set_grad_enabled(False)

# build a dict to retrieve future trajectories from GT
gt_rows = {}
for row in read_gt_csv(eval_gt_path):
    gt_rows[row["track_id"] + row["timestamp"]] = row["coord"]

eval_ego_dataset = EgoDataset(cfg, eval_dataset.dataset, rasterizer)

for frame_number in range(99, 99+100*50, 100):  # start from last frame of scene_0 and increase by 100
    agent_indices = eval_dataset.get_frame_indices(frame_number) 
    if not len(agent_indices):
        continue

    # get AV point-of-view frame
    data_ego = eval_ego_dataset[frame_number]
    im_ego = rasterizer.to_rgb(data_ego["image"].transpose(1, 2, 0))
    center = np.asarray(cfg["raster_params"]["ego_center"]) * cfg["raster_params"]["raster_size"]
    
    for v_index in agent_indices:
        img = im_ego.copy()
        
        predicted_positions = []
        target_positions = []
        
        data_agent = eval_dataset[v_index]

        out_net = model(torch.from_numpy(data_agent["image"]).unsqueeze(0).to(device))
        out_pos = out_net[0].reshape(-1, 2).detach().cpu().numpy()
        
        
        
        predicted_positions.append(transform_points(out_pos, data_agent["world_from_agent"]))
        
    predicted_positions = transform_points(np.concatenate(predicted_positions), data_ego["raster_from_world"])
    # retrieve target positions from the GT and store as absolute coordinates
    track_id, timestamp = data_agent["track_id"], data_agent["timestamp"]
    target_positions.append(gt_rows[str(track_id) + str(timestamp)] + data_agent["centroid"][:2])

    # convert coordinates to AV point-of-view so we can draw them
    target_positions = transform_points(np.concatenate(target_positions), data_ego["raster_from_world"])
#         print(target_positions)

    draw_trajectory(img, predicted_positions, PREDICTED_POINTS_COLOR)
    draw_trajectory(img, target_positions, TARGET_POINTS_COLOR)

    plt.imshow(img[::-1])
    plt.show()

NameError: name 'model' is not defined

In [24]:
agent_indices = eval_dataset.get_frame_indices(599) 
agent_indices

array([4, 5])

In [25]:
data_agent = eval_dataset[4]

In [37]:
data_agent.keys()

dict_keys(['image', 'target_positions', 'target_yaws', 'target_availabilities', 'history_positions', 'history_yaws', 'history_availabilities', 'world_to_image', 'raster_from_world', 'raster_from_agent', 'agent_from_world', 'world_from_agent', 'track_id', 'timestamp', 'centroid', 'yaw', 'extent'])

In [39]:
data_agent['history_positions'][0] 

array([0.0000000e+00, 1.1368684e-13], dtype=float32)

In [58]:
np.tile([data_agent["history_positions"][0]], (len(out_pos), 1) ,)

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.

In [56]:
data_agent["history_positions"]

array([[ 0.0000000e+00,  0.0000000e+00],
       [-1.4693557e-03, -8.7270926e-04],
       [-6.5898284e-04, -2.2371109e-03],
       [-5.9982651e-04, -3.0538440e-03],
       [-7.7665702e-04, -5.1465635e-03],
       [-7.8683562e-04, -2.7389820e-03],
       [-1.3217841e-03, -6.4308255e-04],
       [-4.5893426e-04, -1.9763161e-03],
       [-1.7969140e-04, -5.0759576e-03],
       [ 5.1936740e-04, -7.9262536e-03],
       [ 4.4213615e-05, -1.1190006e-02]], dtype=float32)