In [None]:
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import optim
from torch.utils.data import DataLoader
import torch.autograd
import torch.nn as nn
from torchvision import transforms
from torchvision import datasets
from torchvision.utils import save_image
import torch.nn.functional as F

from mydataset import MyTrainDataset, my_dataset_worker_init_func

from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path

import os

# 数据处理

In [None]:
# set env variable for data
os.environ["L5KIT_DATA_FOLDER"] = "E:/Downloads/lyft-motion-prediction-autonomous-vehicles"
dm = LocalDataManager(None)
# get config
cfg = load_config_data("./agent_motion_config.yaml")
print(cfg)

In [None]:
# ===== INIT DATASET
train_cfg = cfg["train_data_loader"]
rasterizer = build_rasterizer(cfg, dm)
train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
print(len(train_dataset))
print(train_dataset)
print(train_dataset[0].keys())

train_dataset = MyTrainDataset(cfg, dm, len(train_dataset))
train_dataloader = DataLoader(
    train_dataset,
    shuffle=train_cfg["shuffle"], 
    batch_size=train_cfg["batch_size"],
    num_workers=train_cfg["num_workers"],
    persistent_workers=True,
    worker_init_fn=my_dataset_worker_init_func
)

# 参数

In [None]:
# 基本参数
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
epochs = cfg["train_params"]["epochs"]
latent_dim = cfg["model_params"]["latent_dim"]  # LSTM 的单元个数
num_layers = cfg["model_params"]["num_layers"]
bidirectional = cfg["model_params"]["bidirectional"]
num_classes = 3 # 类数
encoder_length = cfg["model_params"]["history_num_frames"]
decoder_length = cfg["model_params"]["future_num_frames"]
num_encoder_tokens = 2
num_decoder_tokens = 2
z_dimension = 16

# 模型

In [None]:
class CVAE(nn.Module):
    def __init__(self):
        super(CVAE, self).__init__()
        # 定义编码器
        self.encoder = nn.LSTM(
            num_encoder_tokens, latent_dim, num_layers=num_layers, bidirectional=bidirectional, batch_first=True)
        self.encoder_mean1 = nn.Linear(latent_dim*(1+bidirectional), 64)
        self.encoder_mean2 = nn.Linear(64, z_dimension)
        self.encoder_std1 = nn.Linear(latent_dim*(1+bidirectional), 32)
        self.encoder_std2 = nn.Linear(32, z_dimension)
        self.decoder = nn.LSTM(z_dimension, latent_dim, num_layers=num_layers,
                               bidirectional=bidirectional, batch_first=True)
        self.decoder_fc1 = nn.Linear(latent_dim*(1+bidirectional), 32)
        self.decoder_fc2 = nn.Linear(32, 16)
        self.decoder_fc3 = nn.Linear(16, num_decoder_tokens)

    def noise_reparameterize(self, mean, logvar):
        eps = torch.randn(mean.shape).to(device)
        z = mean + eps * torch.exp(logvar)
        return z

    def forward(self, data):
        inputs = torch.FloatTensor(data["history_positions"]).to(device)
        if inputs.dim() == 2:
            inputs.resize_(1, inputs.size()[0], inputs.size()[1])
#         print(inputs.size())
        h0 = torch.autograd.Variable(torch.randn(
            num_layers*(1+bidirectional), inputs.size()[0], latent_dim)).to(device)
        c0 = torch.autograd.Variable(torch.randn(
            num_layers*(1+bidirectional), inputs.size()[0], latent_dim)).to(device)
        out1, _ = self.encoder(inputs, (h0, c0))
#         print(out1.size())
        mean1 = F.relu(self.encoder_mean1(out1), inplace=True)
        mean2 = F.relu(self.encoder_mean2(mean1), inplace=True)
        logstd1 = F.relu(self.encoder_std1(out1), inplace=True)
        logstd2 = F.relu(self.encoder_std2(logstd1), inplace=True)
        z = self.noise_reparameterize(mean2, logstd2)
        out2, _ = self.decoder(z)
        out2 = F.relu(self.decoder_fc1(out2), inplace=True)
        out2 = F.relu(self.decoder_fc2(out2), inplace=True)
        y_hat = self.decoder_fc3(out2)
        return y_hat, mean2, logstd2


def loss_function(y_hat, data, mean, std):
    y_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
    y_true = data["target_positions"].to(device)
    MSE = F.mse_loss(y_hat, y_true, reduction='none')
    # not all the output steps are valid, but we can filter them out from the loss using availabilities
    MSE = MSE * y_availabilities
    MSE = MSE.mean()
    # 因为var是标准差的自然对数，先求自然对数然后平方转换成方差
    var = torch.pow(torch.exp(std), 2)
    KLD = -0.5 * torch.mean(1+torch.log(var)-torch.pow(mean, 2)-var)
    return MSE+KLD


# 创建对象
cvae = CVAE().to(device)
# vae.load_state_dict(torch.load('./VAE_z2.pth'))
cvae_optimizer = torch.optim.Adam(cvae.parameters(),lr = 1e-4)

In [None]:
# print(len(train_dataloader))
# lossdata=[]
# for epoch in range(epochs):  # 进行多个epoch的训练
#     for i, data in enumerate(train_dataloader):
#         if i>=1000:
#             break
#         y_hat, mean, std = cvae(data)  # 输入
#         loss = loss_function(y_hat, data, mean, std)
#         cvae_optimizer.zero_grad()  # 在反向传播之前，先将梯度归0
#         loss.backward()  # 将误差反向传播
#         cvae_optimizer.step()  # 更新参数
#         lossdata.append(loss.item())
#         print('Epoch[{}/{}],cvae_loss:{:.6f} '.format(
#             epoch, epochs, loss.item(),
#         ))
# plt.plot(np.arange(len(lossdata)), lossdata, label="train loss")
# plt.legend()
# plt.show()

In [None]:
# ==== TRAIN LOOP
losses_avg = []
for epoch in range(epochs):  # 进行多个epoch的训练
    tr_it = iter(train_dataloader)
    progress_bar = tqdm(range(len(train_dataloader)//50),position=0)
    losses_train = []
    for _ in progress_bar:
        try:
            data = next(tr_it)
        except StopIteration:
            tr_it = iter(train_dataloader)
            data = next(tr_it)
        cvae.train() # 设置为训练模式
        torch.set_grad_enabled(True)
        y_hat, mean, std = cvae(data)  # 输入
        loss = loss_function(y_hat, data, mean, std)

        # Backward pass
        cvae_optimizer.zero_grad()
        loss.backward()
        cvae_optimizer.step()
        losses_train.append(loss.item())
        progress_bar.set_description(f"loss: {loss.item()} loss(avg): {np.mean(losses_train)}")
    losses_avg.append(np.mean(losses_train))

# 结果

In [None]:
plt.plot(np.arange(len(losses_train)), losses_train, label="train loss")
plt.legend()
plt.show()

In [None]:
plt.plot(np.arange(len(losses_avg)), losses_avg, label="train loss")
plt.legend()
plt.show()

# 评估

In [None]:
# ===== GENERATE AND LOAD CHOPPED DATASET
num_frames_to_chop = 100
eval_cfg = cfg["val_data_loader"]
eval_base_path = create_chopped_dataset(dm.require(eval_cfg["key"]), cfg["raster_params"]["filter_agents_threshold"], 
                              num_frames_to_chop, cfg["model_params"]["future_num_frames"], MIN_FUTURE_STEPS)

In [None]:
eval_zarr_path = str(Path(eval_base_path) / Path(dm.require(eval_cfg["key"])).name)
eval_mask_path = str(Path(eval_base_path) / "mask.npz")
eval_gt_path = str(Path(eval_base_path) / "gt.csv")

eval_zarr = ChunkedDataset(eval_zarr_path).open()
eval_mask = np.load(eval_mask_path)["arr_0"]
# ===== INIT DATASET AND LOAD MASK
eval_dataset = AgentDataset(cfg, eval_zarr, rasterizer, agents_mask=eval_mask)
eval_dataloader = DataLoader(eval_dataset, shuffle=eval_cfg["shuffle"], batch_size=eval_cfg["batch_size"], 
                             num_workers=0,drop_last=False)
print(eval_dataset)
print(eval_dataset[0].keys())

# eval_dataset = MyTrainDataset(cfg, dm, agents_mask=eval_mask)
# eval_dataloader = DataLoader(
#     eval_dataset,
#     shuffle=eval_cfg["shuffle"], 
#     batch_size=eval_cfg["batch_size"],
#     num_workers=eval_cfg["num_workers"],
#     persistent_workers=True,
#     worker_init_fn=my_dataset_worker_init_func,
#     drop_last=True
# )

## 保存预测

In [None]:
# ==== EVAL LOOP
cvae.eval()
torch.set_grad_enabled(False)

# store information for evaluation
future_coords_offsets_pd = []
timestamps = []
agent_ids = []

progress_bar = tqdm(eval_dataloader,position=0)

for data in progress_bar:
    y_hat, mean, std = cvae(data)
#     print(data)
    # convert agent coordinates into world offsets
    agents_coords = y_hat.cpu().numpy()
    world_from_agents = data['world_from_agent'].numpy()
    centroids = data["centroid"].numpy()
    coords_offset = transform_points(agents_coords, world_from_agents) - centroids[:, None, :2]
    
    future_coords_offsets_pd.append(np.stack(coords_offset))
    timestamps.append(data["timestamp"].numpy().copy())
    agent_ids.append(data["track_id"].numpy().copy())
    
pred_path = f"{gettempdir()}/pred.csv"

write_pred_csv(pred_path,
               timestamps=np.concatenate(timestamps),
               track_ids=np.concatenate(agent_ids),
               coords=np.concatenate(future_coords_offsets_pd),
              )

## 预测结果

In [None]:
metrics = compute_metrics_csv(eval_gt_path, pred_path, [neg_multi_log_likelihood, time_displace])
for metric_name, metric_mean in metrics.items():
    print(metric_name, metric_mean)

In [None]:
cvae.eval()
torch.set_grad_enabled(False)

# build a dict to retrieve future trajectories from GT
gt_rows = {}
for row in read_gt_csv(eval_gt_path):
    gt_rows[row["track_id"] + row["timestamp"]] = row["coord"]

eval_ego_dataset = EgoDataset(cfg, eval_dataset.dataset, rasterizer)

for frame_number in range(99, len(eval_zarr.frames), 100):  # start from last frame of scene_0 and increase by 100
    agent_indices = eval_dataset.get_frame_indices(frame_number) 
    if not len(agent_indices):
        continue

    # get AV point-of-view frame
    data_ego = eval_ego_dataset[frame_number]
    im_ego = rasterizer.to_rgb(data_ego["image"].transpose(1, 2, 0))
    center = np.asarray(cfg["raster_params"]["ego_center"]) * cfg["raster_params"]["raster_size"]
    
    predicted_positions = []
    target_positions = []

    for v_index in agent_indices:
        data_agent = eval_dataset[v_index]
        out_net = cvae(data_agent)
        out_pos = out_net[0].reshape(-1, 2).detach().cpu().numpy()
        # store absolute world coordinates
        predicted_positions.append(transform_points(out_pos, data_agent["world_from_agent"]))
        # retrieve target positions from the GT and store as absolute coordinates
        track_id, timestamp = data_agent["track_id"], data_agent["timestamp"]
        target_positions.append(gt_rows[str(track_id) + str(timestamp)] + data_agent["centroid"][:2])


    # convert coordinates to AV point-of-view so we can draw them
    predicted_positions = transform_points(np.concatenate(predicted_positions), data_ego["raster_from_world"])
    target_positions = transform_points(np.concatenate(target_positions), data_ego["raster_from_world"])

    draw_trajectory(im_ego, predicted_positions, PREDICTED_POINTS_COLOR)
    draw_trajectory(im_ego, target_positions, TARGET_POINTS_COLOR)

    plt.imshow(im_ego)
    plt.show()