In [None]:
import os

## =====================================================================================
## This is a temporarly fix for the freezing and the cuda issues. You can add this
## utility script instead of kaggle_l5kit until Kaggle resolve these issues.
## 
## You will be able to train and submit your results, but not all the functionality of
## l5kit will work properly.

## More details here:
## https://www.kaggle.com/c/lyft-motion-prediction-autonomous-vehicles/discussion/177125

## this script transports l5kit and dependencies
os.system('pip install --target=/kaggle/working pymap3d==2.1.0')
os.system('pip install --target=/kaggle/working protobuf==3.12.2')
os.system('pip install --target=/kaggle/working transforms3d')
os.system('pip install --target=/kaggle/working zarr')
os.system('pip install --target=/kaggle/working ptable')

os.system('pip install --no-dependencies --target=/kaggle/working l5kit')
# os.system('pip install --target=/kaggle/working timm')

In [None]:
!pip install timm

In [None]:
# import packages
import os, gc
import zarr
import numpy as np 
import pandas as pd 
from tqdm import tqdm
from typing import Dict, List, Callable
from collections import Counter
from prettytable import PrettyTable
from collections import OrderedDict
import math
import pickle
from IPython.display import FileLink

#level5 toolkit
from l5kit.data import PERCEPTION_LABELS
from l5kit.dataset import EgoDataset, AgentDataset
from l5kit.data import ChunkedDataset, LocalDataManager

# level5 toolkit 
from l5kit.configs import load_config_data
from l5kit.geometry import transform_points
from l5kit.rasterization import build_rasterizer
from l5kit.visualization import draw_trajectory, draw_reference_trajectory, TARGET_POINTS_COLOR, PREDICTED_POINTS_COLOR, write_gif
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset, export_zarr_to_csv, write_gt_csv
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace

# visualization
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import animation
from colorama import Fore, Back, Style

# deep learning
import torch
from torch import nn, optim, Tensor
from torch.utils.data import DataLoader
from torchvision.models.resnet import resnet18, resnet50, resnet34
from torchvision.models.mobilenet import mobilenet_v2
from torch.nn import functional as F
import timm

# ensemble learning
from sklearn.cluster import KMeans

# check files in directory
print((os.listdir('../input/lyft-motion-prediction-autonomous-vehicles/')))

plt.rc('animation', html='jshtml')

%matplotlib inline

In [None]:
# root directory
DIR_INPUT = "/kaggle/input/lyft-motion-prediction-autonomous-vehicles"

# set env variable for data
os.environ["L5KIT_DATA_FOLDER"] = DIR_INPUT
dm = LocalDataManager(None)
# print(training_cfg)

# Model Definition

## LyftModel - simple Resnet

In [None]:
class LyftModel(nn.Module):
    
    def __init__(self, cfg):
        super().__init__()
        
        # set pretrained=True while training
        self.backbone = resnet50(pretrained=True) 
        
        num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
        num_in_channels = 3 + num_history_channels

        self.backbone.conv1 = nn.Conv2d(
            # num_in_channels = 25
            num_in_channels,
            self.backbone.conv1.out_channels,
            kernel_size=self.backbone.conv1.kernel_size,
            stride=self.backbone.conv1.stride,
            padding=self.backbone.conv1.padding,
            bias=False,
        )
        
        # This is 512 for resnet18 and resnet34;
        # And it is 2048 for the other resnets
        backbone_out_features = 2048
        
        # X, Y coords for the future positions (output shape: Bx50x2)
        num_targets = 2 * cfg["model_params"]["future_num_frames"]

        # You can add more layers here.
        self.head = nn.Sequential(
            # nn.Dropout(0.2),
            nn.Linear(in_features=backbone_out_features, out_features=4096),
        )

        self.logit = nn.Linear(4096, out_features=num_targets)
        
    def forward(self, x):
        x = self.backbone.conv1(x)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)

        x = self.backbone.avgpool(x)
        x = torch.flatten(x, 1)
        
        x = self.head(x)
        x = self.logit(x)
        
        return x

In [None]:
class LyftMixModel(nn.Module):
    
    def __init__(self, cfg):
        super().__init__()
        
        # set pretrained=True while training
        self.backbone = timm.create_model('mixnet_xl',pretrained=True)
        
        num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
        num_in_channels = 3 + num_history_channels

        self.backbone.conv_stem = nn.Conv2d(
            num_in_channels,
            self.backbone.conv_stem.out_channels,
            kernel_size=self.backbone.conv_stem.kernel_size,
            stride=self.backbone.conv_stem.stride,
            padding=self.backbone.conv_stem.padding,
            bias=False,
        )
        
        # This is 512 for resnet18 and resnet34;
        # And it is 2048 for the other resnets
        backbone_out_features = 1536
        
        # X, Y coords for the future positions (output shape: Bx50x2)
        num_targets = 2 * cfg["model_params"]["future_num_frames"]

        # You can add more layers here.
        self.head = nn.Sequential(
            # nn.Dropout(0.2),
            nn.Linear(in_features=backbone_out_features, out_features=4096),
        )

        self.logit = nn.Linear(4096, out_features=num_targets)
        
    def forward(self, x):
        x = self.backbone.conv_stem(x)
        x = self.backbone.bn1(x)
        x = self.backbone.act1(x)
        
        x = self.backbone.blocks(x)

        x = self.backbone.conv_head(x)
        x = self.backbone.bn2(x)
        x = self.backbone.act2(x)
        x = self.backbone.global_pool(x)
        x = torch.flatten(x, 1)
        
        x = self.head(x)
        x = self.logit(x)
        
        return x

## Mixnet

In [None]:
class LyftMixnet(nn.Module):
    
    def __init__(self, cfg, classify='mixnet_l'):
        super().__init__()
        
        # set pretrained=True while training
        self.backbone = timm.create_model(classify, pretrained=False) 
        
        num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
        num_in_channels = 3 + num_history_channels
        
        self.backbone.conv_stem = nn.Conv2d(
            # num_in_channels = 25
            num_in_channels,
            self.backbone.conv_stem.out_channels,
            kernel_size=self.backbone.conv_stem.kernel_size,
            stride=self.backbone.conv_stem.stride,
            padding=self.backbone.conv_stem.padding,
            bias=False,
        )
        
        # X, Y coords for the future positions (output shape: Bx50x2)
        num_targets = 2 * cfg["model_params"]["future_num_frames"]
        self.logit = nn.Linear(self.backbone.classifier.out_features, out_features=num_targets)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.logit(x)
        return x

## MobileNet v2

In [None]:
class LyftMobile(nn.Module):
    
    def __init__(self, cfg):
        super().__init__()
        
        # set pretrained=True while training
        self.backbone = mobilenet_v2(pretrained=True) 
        
        num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
        num_in_channels = 3 + num_history_channels

        self.backbone.features[0][0] = nn.Conv2d(
            # num_in_channels = 25
            num_in_channels,
            self.backbone.features[0][0].out_channels,
            kernel_size=self.backbone.features[0][0].kernel_size,
            stride=self.backbone.features[0][0].stride,
            padding=self.backbone.features[0][0].padding,
            bias=False,
        )
                
        # X, Y coords for the future positions (output shape: Bx50x2)
        num_targets = 2 * cfg["model_params"]["future_num_frames"]

        # Fully connected layer.
        self.backbone.classifier[1] = nn.Linear(
            in_features=self.backbone.classifier[1].in_features,
            out_features=num_targets
        )

        
    def forward(self, x):
        x = self.backbone(x)
        return x

## Multi-Modal

In [None]:
# --- Function utils ---
# Original code from https://github.com/lyft/l5kit/blob/20ab033c01610d711c3d36e1963ecec86e8b85b6/l5kit/l5kit/evaluation/metrics.py

def pytorch_neg_multi_log_likelihood_batch(
    gt: Tensor, pred: Tensor, confidences: Tensor, avails: Tensor
) -> Tensor:
    """
    Compute a negative log-likelihood for the multi-modal scenario.
    log-sum-exp trick is used here to avoid underflow and overflow, For more information about it see:
    https://en.wikipedia.org/wiki/LogSumExp#log-sum-exp_trick_for_log-domain_calculations
    https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/
    https://leimao.github.io/blog/LogSumExp/
    Args:
        gt (Tensor): array of shape (bs)x(time)x(2D coords)
        pred (Tensor): array of shape (bs)x(modes)x(time)x(2D coords)
        confidences (Tensor): array of shape (bs)x(modes) with a confidence for each mode in each sample
        avails (Tensor): array of shape (bs)x(time) with the availability for each gt timestep
    Returns:
        Tensor: negative log-likelihood for this example, a single float number
    """
    assert len(pred.shape) == 4, f"expected 3D (MxTxC) array for pred, got {pred.shape}"
    batch_size, num_modes, future_len, num_coords = pred.shape

    assert gt.shape == (batch_size, future_len, num_coords), f"expected 2D (Time x Coords) array for gt, got {gt.shape}"
    assert confidences.shape == (batch_size, num_modes), f"expected 1D (Modes) array for gt, got {confidences.shape}"
    if not torch.allclose(torch.sum(confidences, dim=1), confidences.new_ones((batch_size,))):
        print(confidences)
    assert torch.allclose(torch.sum(confidences, dim=1), confidences.new_ones((batch_size,))), "confidences should sum to 1"
    assert avails.shape == (batch_size, future_len), f"expected 1D (Time) array for gt, got {avails.shape}"
    # assert all data are valid
    assert torch.isfinite(pred).all(), "invalid value found in pred"
    assert torch.isfinite(gt).all(), "invalid value found in gt"
    assert torch.isfinite(confidences).all(), "invalid value found in confidences"
    assert torch.isfinite(avails).all(), "invalid value found in avails"

    # convert to (batch_size, num_modes, future_len, num_coords)
    gt = torch.unsqueeze(gt, 1)  # add modes
    avails = avails[:, None, :, None]  # add modes and cords

    # error (batch_size, num_modes, future_len)
    error = torch.sum(((gt - pred) * avails) ** 2, dim=-1)  # reduce coords and use availability

    with np.errstate(divide="ignore"):  # when confidence is 0 log goes to -inf, but we're fine with it
        # error (batch_size, num_modes)
        error = torch.log(confidences) - 0.5 * torch.sum(error, dim=-1)  # reduce time

    # use max aggregator on modes for numerical stability
    # error (batch_size, num_modes)
    max_value, _ = error.max(dim=1, keepdim=True)  # error are negative at this point, so max() gives the minimum one
    error = -torch.log(torch.sum(torch.exp(error - max_value), dim=-1, keepdim=True)) - max_value  # reduce modes
    # print("error", error)
    return torch.mean(error)


def pytorch_neg_multi_log_likelihood_single(
    gt: Tensor, pred: Tensor, avails: Tensor
) -> Tensor:
    """

    Args:
        gt (Tensor): array of shape (bs)x(time)x(2D coords)
        pred (Tensor): array of shape (bs)x(time)x(2D coords)
        avails (Tensor): array of shape (bs)x(time) with the availability for each gt timestep
    Returns:
        Tensor: negative log-likelihood for this example, a single float number
    """
    # pred (bs)x(time)x(2D coords) --> (bs)x(mode=1)x(time)x(2D coords)
    # create confidence (bs)x(mode=1)
    batch_size, future_len, num_coords = pred.shape
    confidences = pred.new_ones((batch_size, 1))
    return pytorch_neg_multi_log_likelihood_batch(gt, pred.unsqueeze(1), confidences, avails)

# Validate Configuration

In [None]:
# validate cfg
validate_cfg = {
    
    'format_version': 4,
    'model_params': {
        'history_num_frames': 10,
        'history_step_size': 1,
        'history_delta_time': 0.1,
        'future_num_frames': 50,
        'future_step_size': 1,
        'future_delta_time': 0.1
    },
    
    'raster_params': {
        'raster_size': [300, 300],
        'pixel_size': [0.5, 0.5],
        'ego_center': [0.25, 0.5],
        'map_type': 'py_semantic',
        'satellite_map_key': 'aerial_map/aerial_map.png',
        'semantic_map_key': 'semantic_map/semantic_map.pb',
        'dataset_meta_key': 'meta.json',
        'filter_agents_threshold': 0.5,
        'disable_traffic_light_faces': False
    },
    
    'validate_data_loader': {
    'key': 'scenes/validate.zarr',
    'batch_size': 6,
    'shuffle': False,
    'num_workers': 4
    }

}

# Hyperparameters

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# choose parameter conf
conf = 2

if conf == 1:
    # resnet stuff
    model = LyftModel(training_cfg).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=2e-6)
    criterion = nn.SmoothL1Loss()
    # criterion = nn.MSELoss(reduction="none")

if conf == 2:
    # mixnet_large

    model = LyftMixnet(validate_cfg, 'mixnet_l').to(device)
    
    WEIGHT_FILE = '../input/resnet-34-pth/model_state_mixnetl_25000_17000_nll.pth'
    model_state = torch.load(WEIGHT_FILE, map_location=device)
    model.load_state_dict(model_state)
    
    optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=2e-6)
    criterion = pytorch_neg_multi_log_likelihood_single
    
if conf == 3:
    # mixnet_medium
    model = LyftMixnet(training_cfg, 'mixnet_m').to(device)
    optimizer = optim.Adam(model.parameters(), lr=4e-4, weight_decay=2e-6)
    criterion = nn.SmoothL1Loss()

if conf == 4:
    model = LyftMobile(training_cfg).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=2e-6)
    criterion = nn.SmoothL1Loss()


## Read Models

In [None]:
model_paths = [
    "../input/resnet-34-pth/mixnetm_35000.pth",
    "../input/resnet-34-pth/model_state_mixnet_xl_12000.pth",
    "../input/resnet-34-pth/model_state_mixnet_xl_nll_8000.pth",
    "../input/resnet-34-pth/model_state_mixnetl_25000_1000_custom_nll.pth",
    "../input/resnet-34-pth/model_state_mixnetl_25000_1000_nll.pth",
    "../input/resnet-34-pth/model_state_mixnetl_25000_9000_nll.pth",
    "../input/resnet-34-pth/model_state_mixnetl_25000_17000_nll.pth",
    "../input/resnet-34-pth/model_state_mixnetl_35000.pth",
    "../input/resnet-34-pth/model_state_mixnetl_25000.pth",
]
def load_model(model_paths):
    models = []
    for i, path in enumerate(model_paths):
        if i == 0:
            model = LyftMixnet(validate_cfg, 'mixnet_m').to(device)
        elif i == 1 or i == 2:
            model = LyftMixModel(validate_cfg).to(device)

        else:
            model = LyftMixnet(validate_cfg, 'mixnet_l').to(device)
        model_state = torch.load(path, map_location=device)
        model.load_state_dict(model_state)
        models.append(model)
    return models
models = load_model(model_paths)

In [None]:
# validation configuration
valid_cfg = validate_cfg["validate_data_loader"]

# Rasterizer
rasterizer = build_rasterizer(validate_cfg, dm)

# Validation dataset/dataloader
valid_zarr = ChunkedDataset(dm.require(valid_cfg["key"])).open()
valid_dataset = AgentDataset(validate_cfg, valid_zarr, rasterizer)
whole_size = valid_dataset.__len__()
valid_dataset_use, valid_dataset_valid, _ = torch.utils.data.random_split(valid_dataset, [7000, 2000, whole_size-9000], generator=torch.Generator().manual_seed(42))

valid_dataloader = DataLoader(valid_dataset_use,
                             shuffle=valid_cfg["shuffle"],
                             batch_size=valid_cfg["batch_size"],
                             num_workers=valid_cfg["num_workers"])

valid_dataloader_valid = DataLoader(valid_dataset_valid,
                             shuffle=valid_cfg["shuffle"],
                             batch_size=valid_cfg["batch_size"],
                             num_workers=valid_cfg["num_workers"])

print(valid_dataloader.dataset.__len__())


In [39]:
def kmeans_ensemble(models:List[Callable], data:Dict, num_cluster=3) -> np.ndarray:
    """

    Args:
        models (Iterable): list of models
        data (Dict): data from dataloader
    Returns:
        np.ndarray: array of shape (modes)x(timesteps)x(2D coords), predicted tractories modeled by clustering
    """
    assert len(models) >= num_cluster
    
    inputs = data["image"].to(device)
    target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
    targets = data["target_positions"].to(device)
    clusters = []
    
    batch_size = targets.shape[0]
    num_model = len(models)
    
    for model in models:
        outputs = model(inputs)
        clusters.append(outputs.cpu().numpy().copy())
    
    num_outputs = outputs.shape[1] # 100

    arr = np.empty((num_model, num_outputs) )
    batch_centers = np.empty((batch_size, num_cluster, num_outputs//2, 2))
    for i in range(batch_size):
        for j in range(num_model):
            
            assert clusters[j][i].shape == arr[j].shape
            arr[j] = clusters[j][i]
            
        km = KMeans(n_clusters=num_cluster).fit(arr)
        centers = km.cluster_centers_
        centers = centers.reshape(num_cluster,50,2)
        batch_centers[i] = centers
    return batch_centers

    

In [34]:
def multiple(models:List[Callable], data:Dict) -> np.ndarray:
    """

    Args:
        models (Iterable): list of models
        data (Dict): data from dataloader
    Returns:
        np.ndarray: array of shape (modes)x(timesteps)x(2D coords), predicted tractories modeled by clustering
    """
    assert len(models) > 0
    
    inputs = data["image"].to(device)
    target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
    targets = data["target_positions"].to(device)
    res = []
    
    batch_size = targets.shape[0]
    num_model = len(models)
    
    for i, model in enumerate(models):
        outputs = model(inputs)
        res.append(outputs.cpu().numpy().copy().reshape((batch_size, 50, 2)))
        
        
    batch_output = np.empty((batch_size, num_model, 50, 2))
    
    for i in range(batch_size):
        for j in range(num_model):
            
            batch_output[i][j] = res[j][i]
            
    return batch_output
    

# Final Evaluation

In [12]:
# Final - Evaluate Validation Dataset 
model.eval()
torch.set_grad_enabled(False)

# store information for evaluation
future_coords_offsets_pd = []
timestamps = []
# coordinates ground truth
valid_coords_gts = []
# target avalabilities
target_avail_pd = []
agent_ids = []
progress_bar = tqdm(valid_dataloader)
for data in progress_bar:
    
    inputs = data["image"].to(device)
    target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
    targets = data["target_positions"].to(device)
    
    outputs = model(inputs).reshape(targets.shape)
    
    future_coords_offsets_pd.append(outputs.cpu().numpy().copy())
    timestamps.append(data["timestamp"].numpy().copy())
    agent_ids.append(data["track_id"].numpy().copy())
    valid_coords_gts.append(data["target_positions"].numpy().copy())
    target_avail_pd.append(target_availabilities.cpu().numpy().copy())

100%|██████████| 1167/1167 [08:19<00:00,  2.34it/s]


# Final Evaluation -- Ensemble

In [40]:
# Final - Evaluate Validation Dataset 
for model in models:
    model.eval()
torch.set_grad_enabled(False)

# store information for evaluation
future_coords_offsets_pd = []
timestamps = []
# coordinates ground truth
valid_coords_gts = []
# target avalabilities
target_avail_pd = []
agent_ids = []
progress_bar = tqdm(valid_dataloader)
for data in progress_bar:
    
    outputs = multiple(models, data)
    future_coords_offsets_pd.append(outputs.copy())
    timestamps.append(data["timestamp"].numpy().copy())
    agent_ids.append(data["track_id"].numpy().copy())
    valid_coords_gts.append(data["target_positions"].numpy().copy())
    target_avail_pd.append(data["target_availabilities"].unsqueeze(-1).numpy().copy())

 17%|█▋        | 198/1167 [01:55<09:23,  1.72it/s]


KeyboardInterrupt: 

# Final Evaluation -- Kmeans

In [41]:
# Final - Evaluate Validation Dataset 
for model in models:
    model.eval()
torch.set_grad_enabled(False)

# store information for evaluation
future_coords_offsets_pd = []
timestamps = []
# coordinates ground truth
valid_coords_gts = []
# target avalabilities
target_avail_pd = []
agent_ids = []
progress_bar = tqdm(valid_dataloader)
for data in progress_bar:
    
    outputs = kmeans_ensemble(models, data)
    future_coords_offsets_pd.append(outputs.copy())
    timestamps.append(data["timestamp"].numpy().copy())
    agent_ids.append(data["track_id"].numpy().copy())
    valid_coords_gts.append(data["target_positions"].numpy().copy())
    target_avail_pd.append(data["target_availabilities"].unsqueeze(-1).numpy().copy())

100%|██████████| 1167/1167 [13:42<00:00,  1.42it/s]


## Concatenate Data

In [21]:
timestamps_concat = np.concatenate(timestamps)
track_ids_concat = np.concatenate(agent_ids)
coords_concat = np.concatenate(future_coords_offsets_pd)
gt_valid_final = np.concatenate(valid_coords_gts)
target_avail_concat = np.concatenate(target_avail_pd)

# gt_valid_2D = np.reshape(gt_valid_final, (70000, 100))

# log_like = neg_multi_log_likelihood (
#     ground_truth=gt_valid_2D,
#     pred=coords_concat,
#     confidences=np.array([1,0,0]),
#     avails=target_avail_concat
# )

# print("Negative multi Likelihood is:", log_like)

In [43]:
len(models)

8

# Compute metrics

In [42]:
# Negative Log Likelihood Metrics
eval_gt_path = "valid_gt_7000.csv"
pred_path = 'submission_mixnetl_ensemble.csv'

# generate ground truth csv
write_gt_csv(
    csv_path=eval_gt_path, 
    timestamps=timestamps_concat, 
    track_ids=track_ids_concat, 
    coords=gt_valid_final, 
    avails=target_avail_concat.squeeze(-1)
)

num_examples = gt_valid_final.shape[0]
confidence = np.array([0.33,0.33,0.34])
confidences= np.empty((num_examples, 3))
for i in range(num_examples):
    confidences[i] = confidence
    
    
# submission.csv
write_pred_csv(pred_path,
               timestamps=timestamps_concat,
               track_ids=track_ids_concat,
               coords=coords_concat,
               confs=confidences
              )

metrics = compute_metrics_csv(eval_gt_path, pred_path, [neg_multi_log_likelihood, time_displace])
for metric_name, metric_mean in metrics.items():
    print(metric_name, metric_mean)
    
# Save Metric
metric = 'metric_mixnetl_kmeans.npy'
np.save(metric,metrics)

neg_multi_log_likelihood 34.93464970678191
time_displace [0.03780884 0.06774837 0.09400525 0.11911281 0.14333772 0.16555054
 0.18603317 0.20607193 0.22537911 0.2399516  0.25563167 0.27254414
 0.28808102 0.30089175 0.31353564 0.32429114 0.33827663 0.34997946
 0.36289177 0.37681631 0.38824709 0.40050204 0.41532554 0.42659337
 0.43842456 0.45155713 0.46308229 0.47460614 0.48670486 0.50179859
 0.51000651 0.52614248 0.54252046 0.55251546 0.56777636 0.57811409
 0.58324775 0.59759121 0.61509243 0.62943898 0.64193703 0.65862458
 0.6745409  0.69017376 0.70823359 0.72041866 0.73800779 0.75194556
 0.76496447 0.79003944]


# Metric Single_Model

In [15]:
# Negative Log Likelihood Metrics
eval_gt_path = "valid_gt_7000.csv"
pred_path = 'submission_mixnetl_42000.csv'

# generate ground truth csv
write_gt_csv(
    csv_path=eval_gt_path, 
    timestamps=timestamps_concat, 
    track_ids=track_ids_concat, 
    coords=gt_valid_final, 
    avails=target_avail_concat.squeeze(-1)
)

    
    
# submission.csv
write_pred_csv(pred_path,
               timestamps=timestamps_concat,
               track_ids=track_ids_concat,
               coords=coords_concat,
               # confs=confidences
              )

metrics = compute_metrics_csv(eval_gt_path, pred_path, [neg_multi_log_likelihood, time_displace])
for metric_name, metric_mean in metrics.items():
    print(metric_name, metric_mean)
    
# Save Metric
metric = 'metric_mixnetl_42000.npy'
np.save(metric,metrics)

neg_multi_log_likelihood 53.71172870034174
time_displace [0.03773215 0.06684401 0.09530268 0.12201825 0.14818366 0.17338283
 0.19758767 0.22078182 0.24258761 0.26052195 0.28114266 0.30400775
 0.32426746 0.34373423 0.36105989 0.37776627 0.3983554  0.41617274
 0.43362092 0.45443441 0.47207964 0.49163032 0.51341603 0.53085735
 0.5490904  0.5684535  0.58860053 0.60813105 0.62938755 0.65290465
 0.66944903 0.69292696 0.71680625 0.73534621 0.75950115 0.77799942
 0.78870733 0.80942547 0.83417625 0.85535263 0.8724096  0.89552867
 0.91580872 0.93774403 0.96052092 0.97349923 0.99529021 1.00818272
 1.02306923 1.04947916]


In [23]:
os.chdir(r'/kaggle/working')
FileLink(metric)

In [None]:
FileLink(eval_gt_path)

In [None]:
FileLink(pred_path)

## Plot Prediction Tractories

In [None]:
model.eval()
torch.set_grad_enabled(False)

# Uncomment to choose satelliter or semantic rasterizer
# validate_cfg["raster_params"]["map_type"] = "py_satellite"
validate_cfg["raster_params"]["map_type"] = "py_semantic"

rast = build_rasterizer(validate_cfg, dm)

eval_ego_dataset = EgoDataset(validate_cfg, valid_dataset.dataset, rast)
num_frames = 2 # randomly pick _ frames
random_frames = np.random.randint(0,len(eval_ego_dataset)-1, (num_frames,))

for frame_number in random_frames:  
    agent_indices = valid_dataset.get_frame_indices(frame_number) 
    if not len(agent_indices):
        continue

    # get AV point-of-view frame
    data_ego = eval_ego_dataset[frame_number]
    im_ego = rasterizer.to_rgb(data_ego["image"].transpose(1, 2, 0))
    center = np.asarray(validate_cfg["raster_params"]["ego_center"]) * validate_cfg["raster_params"]["raster_size"]
    
    predicted_positions = []
    target_positions = []

    for v_index in agent_indices:
        data_agent = valid_dataset[v_index]

        out_net = model(torch.from_numpy(data_agent["image"]).unsqueeze(0).to(device))
        out_pos = out_net[0].reshape(-1, 2).detach().cpu().numpy()
        # store absolute world coordinates
        predicted_positions.append(transform_points(out_pos, data_agent["world_from_agent"]))
        # retrieve target positions from the GT and store as absolute coordinates
        track_id, timestamp = data_agent["track_id"], data_agent["timestamp"]
        target_positions.append(transform_points(data_agent["target_positions"], data_agent["world_from_agent"]) )

    # convert coordinates to AV point-of-view so we can draw them
    predicted_positions = transform_points(np.concatenate(predicted_positions), data_ego["raster_from_world"])
    target_positions = transform_points(np.concatenate(target_positions), data_ego["raster_from_world"])
    
    # make sure ground truth and prediction have the same data size
    assert len(target_positions) == len(predicted_positions)
    
    # draw_trajectory(im_ego, predicted_positions, PREDICTED_POINTS_COLOR)
    draw_trajectory(im_ego, target_positions, TARGET_POINTS_COLOR)
    
    plt.rcParams['figure.figsize'] = 6, 6
    plt.imshow(im_ego[::-1])
    plt.show()
    
    draw_trajectory(im_ego, predicted_positions, PREDICTED_POINTS_COLOR)
    
    plt.rcParams['figure.figsize'] = 6, 6
    plt.imshow(im_ego[::-1])
    plt.show()

# Plot Ensemble Predictions

In [None]:
model.eval()
torch.set_grad_enabled(False)

# Uncomment to choose satelliter or semantic rasterizer
# validate_cfg["raster_params"]["map_type"] = "py_satellite"
validate_cfg["raster_params"]["map_type"] = "py_semantic"

rast = build_rasterizer(validate_cfg, dm)

eval_ego_dataset = EgoDataset(validate_cfg, valid_dataset.dataset, rast)
num_frames = 1 # randomly pick _ frames
random_frames = np.random.randint(0,len(eval_ego_dataset)-1, (num_frames,))

for frame_number in random_frames:  
    agent_indices = valid_dataset.get_frame_indices(frame_number) 
    if not len(agent_indices):
        continue

    # get AV point-of-view frame
    data_ego = eval_ego_dataset[frame_number]
    im_ego = rasterizer.to_rgb(data_ego["image"].transpose(1, 2, 0))
    center = np.asarray(validate_cfg["raster_params"]["ego_center"]) * validate_cfg["raster_params"]["raster_size"]
    
    target_positions = []
    
    for model in models:
        
        predicted_positions = []
        for v_index in agent_indices:
            data_agent = valid_dataset[v_index]

            out_net = model(torch.from_numpy(data_agent["image"]).unsqueeze(0).to(device))
            out_pos = out_net[0].reshape(-1, 2).detach().cpu().numpy()
            # store absolute world coordinates
            predicted_positions.append(transform_points(out_pos, data_agent["world_from_agent"]))
            # retrieve target positions from the GT and store as absolute coordinates
            track_id, timestamp = data_agent["track_id"], data_agent["timestamp"]

        # convert coordinates to AV point-of-view so we can draw them
        predicted_positions = transform_points(np.concatenate(predicted_positions), data_ego["raster_from_world"])

        # make sure ground truth and prediction have the same data size
        # different predicted points
        draw_trajectory(im_ego, predicted_positions, PREDICTED_POINTS_COLOR)
        
    target_positions.append(transform_points(data_agent["target_positions"], data_agent["world_from_agent"]) )
    target_positions = transform_points(np.concatenate(target_positions), data_ego["raster_from_world"])
    assert len(target_positions) == len(predicted_positions)
    draw_trajectory(im_ego, target_positions, TARGET_POINTS_COLOR)    
    
    plt.rcParams['figure.figsize'] = 6, 6
    print(im_ego.shape)
    plt.show()

In [None]:
from IPython.display import display, clear_output
import PIL
from IPython.display import Image

num_frames = 1 # randomly pick _ frames
random_frames = np.random.randint(0,10000, (num_frames,))

validate_cfg["raster_params"]["map_type"] = "py_semantic"
rast = build_rasterizer(validate_cfg, dm)

for scene_idx in random_frames:
    indexes = eval_ego_dataset.get_scene_indices(scene_idx)
    images = []

    for idx in indexes:

        data = eval_ego_dataset[idx]
        im = data["image"].transpose(1, 2, 0)
        im = eval_ego_dataset.rasterizer.to_rgb(im)
        target_positions_pixels = transform_points(data_agent["target_positions"], data_agent["raster_from_agent"])
        center_in_pixels = np.asarray(validate_cfg["raster_params"]["ego_center"]) * validate_cfg["raster_params"]["raster_size"]
        # draw_trajectory(im, target_positions_pixels, TARGET_POINTS_COLOR)
        clear_output(wait=True)
        images.append(im)

    write_gif(
    output_filepath="output_scene.gif",
    images=images,
    resolution=(300,300),
    )
    
Image("output_scene.gif")