In [1]:
from typing import Dict,Callable

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import optim,Tensor,unsqueeze
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset
import torch.autograd
import torch.nn as nn
from torchvision.models.resnet import resnet50,resnet18
import torchvision.transforms as transforms
from torchvision import datasets
from torchvision.utils import save_image
import torch.nn.functional as f

from newdataset import MyTrainDataset, my_dataset_worker_init_func

from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, write_gt_csv, read_gt_csv
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace, average_displacement_error_mean, final_displacement_error_mean
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path

import os
os.environ["KMP_DUPLICATE_LIB_OK"]="True"

  "Windows detected. BLOSC_NOLOCK has not been set as it causes memory leaks on Windows."


# 数据处理

In [2]:
# set env variable for data
os.environ["L5KIT_DATA_FOLDER"] = "E:/Downloads/lyft-motion-prediction-autonomous-vehicles"
dm = LocalDataManager(None)
# get config
cfg = load_config_data("./agent_motion_config.yaml")
print(cfg)

{'format_version': 4, 'mode': {'load_mode': False}, 'model_params': {'model_architecture': 'CVAE', 'latent_dim': 256, 'num_layers': 2, 'bidirectional': True, 'history_step_size': 1, 'history_num_frames': 9, 'future_step_size': 1, 'future_num_frames': 50, 'step_time': 0.1, 'render_ego_history': True, 'num_classes': 10}, 'raster_params': {'raster_mode': 1, 'raster_size': [100, 100], 'pixel_size': [0.2, 0.2], 'ego_center': [0.25, 0.5], 'map_type': 'py_semantic', 'satellite_map_key': 'aerial_map/aerial_map.png', 'semantic_map_key': 'semantic_map/semantic_map.pb', 'dataset_meta_key': 'meta.json', 'filter_agents_threshold': 0.5, 'disable_traffic_light_faces': False, 'set_origin_to_bottom': True}, 'train_data_loader': {'key': 'scenes/train.zarr', 'batch_size': 16, 'shuffle': True, 'num_workers': 6}, 'val_data_loader': {'key': 'scenes/validate.zarr', 'batch_size': 16, 'shuffle': False, 'num_workers': 4}, 'scale': 1, 'train_params': {'device': 1, 'epochs': 2}}


In [3]:
if not cfg['mode']['load_mode']:    
    # ===== INIT DATASET
    train_cfg = cfg["train_data_loader"]
    rasterizer = build_rasterizer(cfg, dm)
    train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
    train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
    print(len(train_dataset))
    print(train_dataset)
    print(train_dataset[0].keys())

    train_dataset = MyTrainDataset(cfg, dm, len(train_dataset),raster_mode = cfg["raster_params"]["raster_mode"], num_classes=cfg["model_params"]["num_classes"])
    train_dataloader = DataLoader(
        train_dataset,
        shuffle=train_cfg["shuffle"], 
        batch_size=train_cfg["batch_size"],
        num_workers=train_cfg["num_workers"],
        prefetch_factor = 2,
        pin_memory = True,
        persistent_workers=True,
        worker_init_fn=my_dataset_worker_init_func
    )

234443
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|   16265    |  4039527   | 320124624  |    38735988   |      112.19     |        248.36        |        79.25         |        24.83         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
dict_keys(['frame_index', 'image', 'target_positions', 'target_yaws', 'target_velocities', 'target_availabilities', 'history_positions', 'history_yaws', 'history_v

# 参数

In [4]:
# 基本参数
if cfg["train_params"]["device"] == 1:
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
else:
    device = torch.device("cpu")
torch.backends.cudnn.benchmark = True

epochs = cfg["train_params"]["epochs"]
latent_dim = cfg["model_params"]["latent_dim"]  # LSTM 的单元个数
encoder_fc = 64
num_layers = cfg["model_params"]["num_layers"]
bidirectional = cfg["model_params"]["bidirectional"]

encoder_length = cfg["model_params"]["history_num_frames"]
decoder_length = cfg["model_params"]["future_num_frames"]
num_encoder_tokens = 2
num_decoder_tokens = 2
z_dimension = 32
accumulation_steps = 5 # 梯度累积步数

num_classes = cfg["model_params"]["num_classes"] # 类数
modal_fc = latent_dim*(1+bidirectional) 

In [5]:
def neg_multi_log_likelihood_batch(
    gt: Tensor, pred: Tensor, confidences: Tensor, avails: Tensor
) -> Tensor:
    assert len(pred.shape) == 4, f"expected 3D (MxTxC) array for pred, got {pred.shape}"
    batch_size, num_modes, future_len, num_coords = pred.shape

    assert gt.shape == (batch_size, future_len, num_coords), f"expected 2D (Time x Coords) array for gt, got {gt.shape}"
    assert confidences.shape == (batch_size, num_modes), f"expected 1D (Modes) array for gt, got {confidences.shape}"
    assert torch.allclose(torch.sum(confidences, dim=1), confidences.new_ones((batch_size,))), "confidences should sum to 1"
    assert avails.shape == (batch_size, future_len), f"expected 1D (Time) array for gt, got {avails.shape}"
    # assert all data are valid
    assert torch.isfinite(pred).all(), "invalid value found in pred"
    assert torch.isfinite(gt).all(), "invalid value found in gt"
    assert torch.isfinite(confidences).all(), "invalid value found in confidences"
    assert torch.isfinite(avails).all(), "invalid value found in avails"
    # convert to (batch_size, num_modes, future_len, num_coords)
    gt = torch.unsqueeze(gt, 1)  # add modes
    avails = avails[:, None, :, None]  # add modes and cords

    # error (batch_size, num_modes, future_len)
    error = torch.sum(((gt - pred) * avails) ** 2, dim=-1)  # reduce coords and use availability

    with np.errstate(divide="ignore"):  # when confidence is 0 log goes to -inf, but we're fine with it
        # error (batch_size, num_modes)
        error = torch.log(confidences) - 0.5 * torch.sum(error, dim=-1)  # reduce time

    # use max aggregator on modes for numerical stability
    # error (batch_size, num_modes)
    max_value, _ = error.max(dim=1, keepdim=True)  # error are negative at this point, so max() gives the minimum one
    error = -torch.log(torch.sum(torch.exp(error - max_value), dim=-1, keepdim=True)) - max_value  # reduce modes
    # print("error", error)
    del gt, avails, max_value
    return torch.mean(error)

# 模型

In [6]:
from typing import List, Tuple, Callable, Union
from nuscenes.prediction.models.backbone import calculate_backbone_feature_dim

# Number of entries in Agent State Vector
ASV_DIM = 3


class CoverNet(nn.Module):
    """ Implementation of CoverNet https://arxiv.org/pdf/1911.10298.pdf """

    def __init__(self, backbone: nn.Module, num_modes: int,
                 n_hidden_layers: List[int] = None,
                 input_shape: Tuple[int, int, int] = (3, 500, 500)):
        """
        Inits Covernet.
        :param backbone: Backbone model. Typically ResNetBackBone or MobileNetBackbone
        :param num_modes: Number of modes in the lattice
        :param n_hidden_layers: List of dimensions in the fully connected layers after the backbones.
            If None, set to [4096]
        :param input_shape: Shape of image input. Used to determine the dimensionality of the feature
            vector after the CNN backbone.
        """

        if n_hidden_layers and not isinstance(n_hidden_layers, list):
            raise ValueError(f"Param n_hidden_layers must be a list. Received {type(n_hidden_layers)}")

        super().__init__()

        if not n_hidden_layers:
            n_hidden_layers = [4096]

        self.backbone = backbone

        backbone_feature_dim = calculate_backbone_feature_dim(backbone, input_shape)
        n_hidden_layers = [backbone_feature_dim + ASV_DIM] + n_hidden_layers + [num_modes]

        linear_layers = [nn.Linear(in_dim, out_dim)
                         for in_dim, out_dim in zip(n_hidden_layers[:-1], n_hidden_layers[1:])]

        self.head = nn.ModuleList(linear_layers)

    def forward(self, image_tensor: torch.Tensor,
                agent_state_vector: torch.Tensor) -> torch.Tensor:
        """
        :param image_tensor: Tensor of images in the batch.
        :param agent_state_vector: Tensor of agent state vectors in the batch
        :return: Logits for the batch.
        """

        backbone_features = self.backbone(image_tensor)

        logits = torch.cat([backbone_features, agent_state_vector], dim=1)

        for linear in self.head:
            logits = linear(logits)

        return logits


def mean_pointwise_l2_distance(lattice: torch.Tensor, ground_truth: torch.Tensor) -> torch.Tensor:
    """
    Computes the index of the closest trajectory in the lattice as measured by l1 distance.
    :param lattice: Lattice of pre-generated trajectories. Shape [num_modes, n_timesteps, state_dim]
    :param ground_truth: Ground truth trajectory of agent. Shape [1, n_timesteps, state_dim].
    :return: Index of closest mode in the lattice.
    """
    stacked_ground_truth = ground_truth.repeat(lattice.shape[0], 1, 1)
    return torch.pow(lattice - stacked_ground_truth, 2).sum(dim=2).sqrt().mean(dim=1).argmin()


class ConstantLatticeLoss:
    """
    Computes the loss for a constant lattice CoverNet model.
    """

    def __init__(self, lattice: Union[np.ndarray, torch.Tensor],
                 similarity_function: Callable[[torch.Tensor, torch.Tensor], int] = mean_pointwise_l2_distance):
        """
        Inits the loss.
        :param lattice: numpy array of shape [n_modes, n_timesteps, state_dim]
        :param similarity_function: Function that computes the index of the closest trajectory in the lattice
            to the actual ground truth trajectory of the agent.
        """

        self.lattice = torch.Tensor(lattice)
        self.similarity_func = similarity_function

    def __call__(self, batch_logits: torch.Tensor, batch_ground_truth_trajectory: torch.Tensor) -> torch.Tensor:
        """
        Computes the loss on a batch.
        :param batch_logits: Tensor of shape [batch_size, n_modes]. Output of a linear layer since this class
            uses nn.functional.cross_entropy.
        :param batch_ground_truth_trajectory: Tensor of shape [batch_size, 1, n_timesteps, state_dim]
        :return: Average element-wise loss on the batch.
        """

        # If using GPU, need to copy the lattice to the GPU if haven't done so already
        # This ensures we only copy it once
        if self.lattice.device != batch_logits.device:
            self.lattice = self.lattice.to(batch_logits.device)

        batch_losses = torch.Tensor().requires_grad_(True).to(batch_logits.device)

        for logit, ground_truth in zip(batch_logits, batch_ground_truth_trajectory):

            closest_lattice_trajectory = self.similarity_func(self.lattice, ground_truth)
            label = torch.LongTensor([closest_lattice_trajectory]).to(batch_logits.device)
            classification_loss = f.cross_entropy(logit.unsqueeze(0), label)

            batch_losses = torch.cat((batch_losses, classification_loss.unsqueeze(0)), 0)

        return batch_losses.mean()

class CoverNet_train:
    def __init__(self, config_file, verbose):
        self.parser = Json_Parser(config_file)
        self.config = self.parser.load_parser()    
        self.device = torch.device(self.config['LEARNING']['device'] if torch.cuda.is_available() else 'cpu')
        self.lr = self.config['LEARNING']['lr']
        self.momentum = self.config['LEARNING']['momentum']
        self.n_epochs = self.config['LEARNING']['n_epochs']
        self.batch_size = self.config['LEARNING']['batch_size']
        self.val_batch_size = self.config['LEARNING']['val_batch_size']
        self.num_val_data = self.config['LEARNING']['num_val_data']
        self.num_modes = self.config['LEARNING']['num_modes']
        self.print_size = self.config['LEARNING']['print_size']

        self.train_dataset = DataLoader(NuSceneDataset_CoverNet(train_mode=True, config_file_name=config_file, verbose=verbose), batch_size=self.batch_size, shuffle=True, num_workers=4)
        self.val_dataset = DataLoader(NuSceneDataset_CoverNet(train_mode=False, config_file_name=config_file, verbose=verbose), batch_size=self.val_batch_size, shuffle=True, num_workers=4)

        self.backbone = ResNetBackbone('resnet50')
        # self.resnet_path = self.config['LEARNING']['weight_path']
        # self.backbone.load_state_dict(torch.load(self.resnet_path))
        self.model = CoverNet(self.backbone, self.num_modes)
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=self.momentum) 
        
        ###############################################################
        # self.criterion = nn.CrossEntropyLoss()           ## classification loss
        self.traj_set_path = self.config['LEARNING']['trajectory_set_path']
        self.trajectories_set =torch.Tensor(pickle.load(open(self.traj_set_path, 'rb')))
        self.criterion = ConstantLatticeLoss(self.trajectories_set)
        ###############################################################

        self.model = self.model.to(self.device)
        self.save_name = datetime.now().strftime("%Y%m%d-%H_%M_%S")
        self.writer = SummaryWriter('./result/tensorboard/' + self.save_name)
        self.net_save_path = os.path.join(self.config['LEARNING']['model_save_path'], self.save_name)
        if not os.path.exists(self.net_save_path):
            os.mkdir(self.net_save_path)
        self.writer.add_text('Config', json.dumps(self.config))

        dataset_info = {'train_size' : self.train_dataset.__len__(), 'val_size' : self.val_dataset.__len__(), 
                        'train_batch_size' : self.batch_size, 'val_batch_size' : self.val_batch_size}
        self.writer.add_text('Dataset_size', json.dumps(dataset_info))

    def get_label(self, traj, future):
        scores = torch.full((len(traj),),1e4)
        for i in range(len(traj)):
            if (torch.norm(traj[i,-1]-future[-1]) < 10):    
                scores[i]= torch.norm(traj[i]-future)
            
        ind=torch.argmin(scores)
        
        res=torch.zeros_like(scores)
        res[ind] =1

        return res, ind


    def run(self):
        print("CoverNet learning starts!")
        step = 1
        best_val_loss = 10000
        for epoch in range(self.n_epochs + 1):
            Loss, Val_Loss = [], []

            for data in self.train_dataset:
                # train_mode
                self.model.train()

                img_tensor = data['img'].to(device=self.device)
                agent_state_tensor = torch.Tensor(data['ego_state'].tolist()).to(self.device)
                agent_state_tensor = torch.squeeze(agent_state_tensor, 1)

                prediction = self.model(img_tensor, agent_state_tensor)
                # label = data['label']
                label, anchor_ind = self.get_label(self.trajectories_set, data['future_local_ego_pos'])

                self.optimizer.zero_grad()
                loss = self.criterion(prediction,label)

                ## for calculating gt loss
                # label_onehot = F.one_hot(label, num_classes=self.num_modes)
                # gt_loss = self.criterion(label_onehot.float(),label)
                # print("gt_loss : ",gt_loss)

                loss.backward()
                self.optimizer.step()

                step += 1
                with torch.no_grad():
                    Loss.append(loss.cpu().detach().numpy())

                if step % self.print_size == 0:
                    with torch.no_grad():
                        # eval_mode
                        self.model.eval()

                        k = 0
                        for val_data in self.val_dataset:
                            img_tensor = val_data['img'].to(self.device)
                            agent_state_tensor = torch.Tensor(val_data['ego_state'].tolist()).to(self.device)
                            agent_state_tensor = torch.squeeze(agent_state_tensor, 1)

                            prediction = self.model(img_tensor, agent_state_tensor)
                            label = val_data['label']

                            val_loss = self.criterion(prediction,label)
                            Val_Loss.append(val_loss.detach().cpu().numpy())

                            pred = F.softmax(prediction,dim=-1)
                            self.writer.add_figure('Results', self.plot_results(val_data,pred,anchor_ind), step)
                            k += 1
                            if(k == self.num_val_data):
                                break
                            
                    loss = np.array(Loss).mean()
                    val_loss = np.array(Val_Loss).mean()

                    if val_loss < best_val_loss:
                        best_val_loss = val_loss
                        save_path = os.path.join(self.net_save_path, 'best_val_loss_model.pth')
                        torch.save(self.model.state_dict(), save_path)
                        self.writer.add_scalar('Best Val Loss', best_val_loss)

                    self.writer.add_scalar('Loss', loss, step)
                    self.writer.add_scalar('Val Loss', val_loss, step)


                    print("Epoch: {}/{} | Step: {} | Loss: {:.5f} | Val_Loss: {:.5f}".format(
                            epoch + 1, self.n_epochs, step, loss, val_loss))                    
                    Loss, Val_Loss = [], []
        
            save_path = os.path.join(self.net_save_path, 'epoch_{0}.pth'.format(epoch + 1))
            torch.save(self.model.state_dict(), save_path)

        save_path = os.path.join(self.net_save_path, 'CoverNet.pth')
        torch.save(self.model.state_dict(), save_path)

In [None]:
if not cfg['mode']['load_mode']:    
    # ==== TRAIN LOOP
    losses_avg = []
    for epoch in range(epochs):  # 进行多个epoch的训练
        tr_it = iter(train_dataloader)
        progress_bar = tqdm(range(len(train_dataloader)//cfg['scale']),position=0)
        losses_train = []
        cvae_optimizer.zero_grad(set_to_none = True)
        for i in progress_bar:
            try:
                data,label = next(tr_it)
            except StopIteration:
                tr_it = iter(train_dataloader)
                data,label = next(tr_it)
            cvae.train() # 设置为训练模式
            torch.set_grad_enabled(True)
            y_hat, confidences, mean1, std1, mean2, std2 = cvae(data)  # 输入
            if cfg["train_params"]["device"] == 1:
                with torch.cuda.amp.autocast():
                    NLL,KLD,Cross = loss_function(y_hat, confidences, data, label, mean1, std1, mean2, std2)
                    loss = NLL + (25)*KLD + 20*Cross
#                     if i + 1>= len(train_dataloader)//1:
#                         print(NLL,KLD,Cross)
            else:
                NLL,KLD,Cross = loss_function(y_hat, confidences, data, label, mean1, std1, mean2, std2)
                loss = NLL + (25)*KLD + 20*Cross
#                 if i + 1>= len(train_dataloader)//1:
#                     print(NLL,KLD,Cross)

            # Backward pass
            # 梯度累积模式
#             loss = loss / accumulation_steps
#             loss.backward() 
#             if (i+1) % accumulation_steps == 0:
#                 cvae_optimizer.step()
#                 cvae_optimizer.zero_grad(set_to_none = True)

            # 无梯度累积模式
            cvae_optimizer.zero_grad(set_to_none = True)
            loss.backward()
            cvae_optimizer.step()
            losses_train.append(loss.item())
            progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_train)}")
            del data, y_hat, confidences, mean1, std1, mean2, std2, NLL, KLD, Cross, loss
        losses_avg.append(np.mean(losses_train))

loss: 111.06519024353474 loss(avg): 703.0764715437928:  11%|██▍                   | 1656/14653 [06:23<47:05,  4.60it/s]

In [None]:
covernet_train = CoverNet_train(config_file='./covernet_config.json', verbose=False)
covernet_train.run()

# 结果

In [None]:
if not cfg['mode']['load_mode']:    
    torch.save(cvae.state_dict(),'E:/Downloads/lyft-motion-prediction-autonomous-vehicles/cvae.pth')
    plt.plot(np.arange(len(losses_train)), losses_train, label="train loss")
    plt.legend()
    plt.show()

In [None]:
if not cfg['mode']['load_mode'] and cfg['train_params']['epochs'] > 1: 
    plt.plot(np.arange(len(losses_avg)), losses_avg, label="train loss")
    plt.legend()
    plt.show()

# 评估

In [None]:
eval_cfg = cfg["val_data_loader"]
rasterizer = build_rasterizer(cfg, dm)
eval_zarr = ChunkedDataset(dm.require(eval_cfg["key"])).open()
eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer)
print(len(eval_dataset))
print(eval_dataset[0].keys())
# print(len(eval_dataset))

eval_dataset = MyTrainDataset(cfg, dm, len(eval_dataset),raster_mode = cfg["raster_params"]["raster_mode"])
eval_dataloader = DataLoader(
    eval_dataset,
    shuffle=eval_cfg["shuffle"], 
    batch_size=eval_cfg["batch_size"],
    num_workers=eval_cfg["num_workers"],
    prefetch_factor = 2,
    persistent_workers=True,
    pin_memory = True,
    worker_init_fn=my_dataset_worker_init_func
)
pred_path = "E:/Downloads/lyft-motion-prediction-autonomous-vehicles/pred.csv"
eval_gt_path = "E:/Downloads/lyft-motion-prediction-autonomous-vehicles/gt.csv"
cvae.load_state_dict(torch.load('E:/Downloads/lyft-motion-prediction-autonomous-vehicles/cvae.pth'))
print(len(eval_dataloader))

## 保存预测

In [None]:
# ==== EVAL LOOP
cvae.eval()
torch.set_grad_enabled(False)
losses_test = []

# store information for evaluation
future_coords_offsets_pd = []
gt_coords_offsets_pd = []
timestamps = []
agent_ids = []
availability = []
confs = []
tr_it = iter(eval_dataloader)
progress_bar = tqdm(range(len(eval_dataloader)//cfg['scale']),position=0)

for i in progress_bar:
    try:
        data,_ = next(tr_it)
    except StopIteration:
        tr_it = iter(eval_dataloader)
        data,_ = next(tr_it)
    y_hat, confidences,mean1,std1,mean2,std2 = cvae(data)
#     if cfg["train_params"]["device"] == 1:
#         with torch.cuda.amp.autocast():
#             NLL,KLD,Cross = loss_function(y_hat, confidences, data, mean1, std1, mean2, std2)
#             loss = NLL + (25)*KLD + 20*Cross
#     losses_test.append(loss.item())
#     progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_test)}")
#     print(data)
    # convert agent coordinates into world offsets
    agents_coords = y_hat.detach().cpu().numpy()
    gt_coords = data['target_positions'].numpy()
    world_from_agents = data['world_from_agent'].numpy()
    centroids = data["centroid"].numpy()
    coords_off = []
    for i in range(num_classes):
        coords_off.append(transform_points(agents_coords[:,i,:,:], world_from_agents) - centroids[:, None, :2])
#         coords_offset2 = transform_points(agents_coords[:,1,:,:], world_from_agents) - centroids[:, None, :2]
#         coords_offset3 = transform_points(agents_coords[:,2,:,:], world_from_agents) - centroids[:, None, :2]
    coords_offset = np.stack([coords_offseti for coords_offseti in coords_off],1)
    gt_offset = transform_points(gt_coords, world_from_agents) - centroids[:, None, :2]
    
    future_coords_offsets_pd.append(np.stack(coords_offset))
    gt_coords_offsets_pd.append(np.stack(gt_offset))
    timestamps.append(data["timestamp"].numpy().copy())
    agent_ids.append(data["track_id"].numpy().copy())
    availability.append(data["target_availabilities"].numpy().copy())
    confs.append(confidences.detach().cpu().numpy().copy())

In [None]:
write_pred_csv(pred_path,
               timestamps=np.concatenate(timestamps),
               track_ids=np.concatenate(agent_ids),
               coords=np.concatenate(future_coords_offsets_pd),
               confs=np.concatenate(confs)
              )

write_gt_csv(eval_gt_path,timestamps=np.concatenate(timestamps),
               track_ids=np.concatenate(agent_ids),
               coords=np.concatenate(gt_coords_offsets_pd),avails=np.concatenate(availability))

## 预测结果

In [None]:
metrics = compute_metrics_csv(eval_gt_path, pred_path, [
                              neg_multi_log_likelihood, time_displace])
for metric_name, metric_mean in metrics.items():
    print(metric_name, metric_mean)
    if metric_name=="time_displace":
        FDE = metric_mean
print('FDE1s: {}, FDE3s: {}, FDE5s: {}, ADE1s: {}, ADE3s: {}, ADE5s: {} '.format(
    FDE[10//cfg["model_params"]["future_step_size"]-1], FDE[30//cfg["model_params"]["future_step_size"]-1], FDE[50//cfg["model_params"]["future_step_size"]-1], np.mean(FDE[:10//cfg["model_params"]["future_step_size"]]), np.mean(FDE[:30//cfg["model_params"]["future_step_size"]]), np.mean(FDE[:50//cfg["model_params"]["future_step_size"]])))

In [None]:
multi_vis = False
cvae.eval()
torch.set_grad_enabled(False)

# build a dict to retrieve future trajectories from GT
gt_rows = {}
for row in read_gt_csv(eval_gt_path):
    gt_rows[row["track_id"] + row["timestamp"]] = row["coord"]

eval_ego_dataset = EgoDataset(cfg, eval_zarr, rasterizer)
eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer)

for frame_number in range(99, len(eval_zarr.frames), 100):  # start from last frame of scene_0 and increase by 100
    agent_indices = eval_dataset.get_frame_indices(frame_number) 
    if not len(agent_indices):
        continue

    # get AV point-of-view frame
    data_ego = eval_ego_dataset[frame_number]
    im_ego = rasterizer.to_rgb(data_ego["image"].transpose(1, 2, 0))
    center = np.asarray(cfg["raster_params"]["ego_center"]) * cfg["raster_params"]["raster_size"]
    
    predicted_positions = []
    predicted_positions1 = []
    predicted_positions2 = []
    predicted_positions3 = []
    target_positions = []

    if multi_vis == True:
        for v_index in agent_indices:
            data_agent = eval_dataset[v_index]
            out_net,confs,_,_,_,_ = cvae(data_agent)
            confs = confs.detach().cpu().numpy()
            print(confs)
            out_net1 = out_net[0][0]
            out_net2 = out_net[0][1]
            out_net3 = out_net[0][2]
            out_pos1 = out_net.reshape(-1, 2).detach().cpu().numpy()
            out_pos2 = out_net.reshape(-1, 2).detach().cpu().numpy()
            out_pos3 = out_net.reshape(-1, 2).detach().cpu().numpy()
            # store absolute world coordinates
            predicted_positions1.append(transform_points(out_pos1, data_agent["world_from_agent"]))
            predicted_positions2.append(transform_points(out_pos2, data_agent["world_from_agent"]))
            predicted_positions3.append(transform_points(out_pos3, data_agent["world_from_agent"]))
            # retrieve target positions from the GT and store as absolute coordinates
            track_id, timestamp = data_agent["track_id"], data_agent["timestamp"]
            target_positions.append(gt_rows[str(track_id) + str(timestamp)] + data_agent["centroid"][:2])


        # convert coordinates to AV point-of-view so we can draw them
        predicted_positions1 = transform_points(np.concatenate(predicted_positions1), data_ego["raster_from_world"])
        predicted_positions2 = transform_points(np.concatenate(predicted_positions2), data_ego["raster_from_world"])
        predicted_positions3 = transform_points(np.concatenate(predicted_positions3), data_ego["raster_from_world"])
        target_positions = transform_points(np.concatenate(target_positions), data_ego["raster_from_world"])

        draw_trajectory(im_ego, predicted_positions1, (34,222,79))
        draw_trajectory(im_ego, predicted_positions2, (220,235,21))
        draw_trajectory(im_ego, predicted_positions3, PREDICTED_POINTS_COLOR)
        draw_trajectory(im_ego, target_positions, TARGET_POINTS_COLOR)

        plt.imshow(im_ego)
        plt.show()

    else:
        for v_index in agent_indices:
            data_agent = eval_dataset[v_index]
            out_net,confs,_,_,_,_ = cvae(data_agent)
            confs = confs.detach().cpu().numpy()
    #         print(confs)
            out_net = out_net[0][np.argmax(confs)]
            out_pos = out_net.reshape(-1, 2).detach().cpu().numpy()
            # store absolute world coordinates
            predicted_positions.append(transform_points(out_pos, data_agent["world_from_agent"]))
            # retrieve target positions from the GT and store as absolute coordinates
            track_id, timestamp = data_agent["track_id"], data_agent["timestamp"]
            target_positions.append(gt_rows[str(track_id) + str(timestamp)] + data_agent["centroid"][:2])


        # convert coordinates to AV point-of-view so we can draw them
        predicted_positions = transform_points(np.concatenate(predicted_positions), data_ego["raster_from_world"])
        target_positions = transform_points(np.concatenate(target_positions), data_ego["raster_from_world"])

        draw_trajectory(im_ego, target_positions, TARGET_POINTS_COLOR)
        draw_trajectory(im_ego, predicted_positions, PREDICTED_POINTS_COLOR)


        plt.imshow(im_ego)
        plt.show()