In [1]:
import os
import sys

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader, RandomSampler


from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer


sys.path.append("..")

from src.batteries import t2d, load_checkpoint
from src.batteries.progress import tqdm
from src.models.genet import genet_normal
from src.models import ModelWithConfidence 

In [2]:
os.environ["L5KIT_DATA_FOLDER"] = "../data"

In [3]:
cfg = {
    "format_version": 4,
    "model_params": {
        "history_num_frames": 10,
        "history_step_size": 1,
        "history_delta_time": 0.1,
        "future_num_frames": 50,
        "future_step_size": 1,
        "future_delta_time": 0.1,
    },
    "raster_params": {
        "raster_size": [224, 224],
        "pixel_size": [0.5, 0.5],
        "ego_center": [0.25, 0.5],
        "map_type": "py_semantic",
        "satellite_map_key": "aerial_map/aerial_map.png",
        "semantic_map_key": "semantic_map/semantic_map.pb",
        "dataset_meta_key": "meta.json",
        "filter_agents_threshold": 0.5,
    },
    "train_data_loader": {
        "key": "scenes/train.zarr",
        "batch_size": 12,
        "shuffle": True,
        "num_workers": 4,
    },
}

history_n_frames = cfg["model_params"]["history_num_frames"]
future_n_frames = cfg["model_params"]["future_num_frames"]
n_trajectories = 3
model = ModelWithConfidence(
    backbone=genet_normal(
        in_channels=3 + (history_n_frames + 1) * 2,
        num_classes=2 * future_n_frames * n_trajectories + n_trajectories,
    ),
    future_num_frames=future_n_frames,
    num_trajectories=n_trajectories,
)
load_checkpoint("../logs/genet_normal_confidence/stage_0/best.pth", model)

<= Loaded model from '../logs/genet_normal_confidence/stage_0/best.pth'
Stage: stage_0
Epoch: 2
Metrics:
{'train': {'loss': 277.9709114074707}, 'valid': {'loss': 188.6406391143799}}


In [4]:
device = torch.device("cuda:0")
model = model.to(device)

In [5]:
dm = LocalDataManager(None)
rasterizer = build_rasterizer(cfg, dm)

valid_zarr = ChunkedDataset(dm.require("scenes/validate.zarr")).open()
valid_dataset = AgentDataset(cfg, valid_zarr, rasterizer)
valid_sampler = RandomSampler(valid_dataset, replacement=True, num_samples=10_000)
valid_loader = DataLoader(
    valid_dataset,
    batch_size=64,
    shuffle=False,
    sampler=valid_sampler,
    num_workers=16,
)

len(valid_loader)

157

In [None]:
model.eval()

predicted = []
actual = []
with torch.no_grad(), tqdm(total=len(valid_loader), desc="valid") as progress:
    for idx, batch in enumerate(valid_loader):
        batch = t2d(batch, device)

        target_availabilities = batch["target_availabilities"].unsqueeze(-1)
        targets = batch["target_positions"]
        outputs = model(batch["image"]).reshape(targets.shape)
        
        predicted.append(outputs.detach().cpu().numpy())
        actual.append(targets.cpu().numpy())

        progress.update(1)

predicted

In [None]:
def pytorch_neg_multi_log_likelihood(gt, pred, confidences, avails):
    """
    Compute a negative log-likelihood for the multi-modal scenario.
    log-sum-exp trick is used here to avoid underflow and overflow, For more information about it see:
    https://en.wikipedia.org/wiki/LogSumExp#log-sum-exp_trick_for_log-domain_calculations
    https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/
    https://leimao.github.io/blog/LogSumExp/

    Args:
        gt (Tensor): array of shape (time)x(2D coords)
        pred (Tensor): array of shape (modes)x(time)x(2D coords)
        confidences (Tensor): array of shape (modes) with a confidence for each mode in each sample
        avails (Tensor): array of shape (time) with the availability for each gt timestep

    Returns:
        Tensor: negative log-likelihood for this example, a single float number
    """
    
    assert len(pred.shape) == 3, f"expected 3D (MxTxC) array for pred, got {pred.shape}"
    num_modes, future_len, num_coords = pred.shape

    assert gt.shape == (future_len, num_coords), f"expected 2D (Time x Coords) array for gt, got {gt.shape}"
    assert confidences.shape == (num_modes,), f"expected 1D (Modes) array for gt, got {confidences.shape}"
    assert abs(torch.sum(confidences).item() - 1.0) < 1e-6, "confidences should sum to 1"
    assert avails.shape == (future_len,), f"expected 1D (Time) array for gt, got {avails.shape}"
    # assert all data are valid
    assert torch.isfinite(pred).all(), "invalid value found in pred"
    assert torch.isfinite(gt).all(), "invalid value found in gt"
    assert torch.isfinite(confidences).all(), "invalid value found in confidences"
    assert torch.isfinite(avails).all(), "invalid value found in avails"

    gt = torch.unsqueeze(gt, 0)  # add modes
    avails = avails[None, :, None]  # add modes and cords

    error = torch.sum(((gt - pred) * avails) ** 2, dim=-1)  # reduce coords and use availability

    with np.errstate(divide="ignore"):  # when confidence is 0 log goes to -inf, but we're fine with it
        error = torch.log(confidences) - 0.5 * torch.sum(error, dim=-1)  # reduce time

    # use max aggregator on modes for numerical stability
    max_value = error.max()  # error are negative at this point, so max() gives the minimum one
    error = -torch.log(torch.sum(torch.exp(error - max_value), dim=-1)) - max_value  # reduce modes
    return error