In [2]:
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np

import torch
from torch import optim
from torch.utils.data import DataLoader
import torch.autograd
import torch.nn as nn
from torchvision import transforms
from torchvision import datasets
from torchvision.utils import save_image
import torch.nn.functional as F

from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
# from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
# from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path

import os

# 数据处理

In [10]:
# set env variable for data
os.environ["L5KIT_DATA_FOLDER"] = "E:/Downloads/lyft-motion-prediction-autonomous-vehicles"
dm = LocalDataManager(None)
# get config
cfg = load_config_data("./agent_motion_config.yaml")
print(cfg)

{'format_version': 4, 'model_params': {'model_architecture': 'CVAE', 'latent_dim': 32, 'history_step_size': 1, 'history_num_frames': 50, 'future_step_size': 1, 'future_num_frames': 50, 'step_time': 0.1, 'render_ego_history': True}, 'raster_params': {'raster_size': [224, 224], 'pixel_size': [0.5, 0.5], 'ego_center': [0.25, 0.5], 'map_type': 'py_semantic', 'satellite_map_key': 'aerial_map/aerial_map.png', 'semantic_map_key': 'semantic_map/semantic_map.pb', 'dataset_meta_key': 'meta.json', 'filter_agents_threshold': 0.5, 'disable_traffic_light_faces': False, 'set_origin_to_bottom': True}, 'train_data_loader': {'key': 'scenes/sample.zarr', 'batch_size': 32, 'shuffle': True, 'num_workers': 0}, 'val_data_loader': {'key': 'scenes/test.zarr', 'batch_size': 32, 'shuffle': False, 'num_workers': 0}, 'train_params': {'checkpoint_every_n_steps': 10000, 'epochs': 5, 'eval_every_n_steps': 10000}}


In [11]:
# ===== INIT DATASET
train_cfg = cfg["train_data_loader"]
rasterizer = build_rasterizer(cfg, dm)
train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
train_dataloader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], 
                             num_workers=train_cfg["num_workers"])
print(train_dataset)

+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|    100     |   24838    |  1893736   |     316008    |       0.69      |        248.38        |        76.24         |        24.83         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+


# 参数

In [12]:
# 基本参数
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
epochs = cfg["train_params"]["epochs"]
latent_dim = cfg["model_params"]["latent_dim"]  # LSTM 的单元个数
num_classes = 3 # 类数
encoder_length = cfg["model_params"]["history_num_frames"]
decoder_length = cfg["model_params"]["future_num_frames"]
num_encoder_tokens = 2
num_decoder_tokens = 2
z_dimension = 2

# 模型

In [15]:
class CVAE(nn.Module):
    def __init__(self):
        super(CVAE, self).__init__()
        # 定义编码器
        self.encoder1 = nn.Sequential(
            nn.LSTM(num_encoder_tokens, latent_dim, batch_first=True),
            nn.BatchNorm2d(latent_dim),
        )
        self.encoder_fc1 = nn.Linear(latent_dim, z_dimension)
        self.encoder_fc2 = nn.Linear(latent_dim, z_dimension)
        self.decoder = nn.Sequential(
            nn.LSTM(z_dimension, latent_dim, batch_first=True),
            nn.BatchNorm2d(latent_dim),
            nn.Linear(latent_dim, 16),
            nn.ReLU(),
            nn.Linear(16, num_decoder_tokens),
            nn.Tanh(),
        )

    def noise_reparameterize(self, mean, logvar):
        eps = torch.randn(mean.shape).to(device)
        z = mean + eps * torch.exp(logvar)
        return z

    def forward(self, x):
        out = self.encoder1(x)
        mean = F.relu(self.encoder_fc1(out))
        logstd = F.relu(self.encoder_fc2(out))
        z = self.noise_reparameterize(mean, logstd)
        y_hat = self.decoder(z)
        return y_hat, mean, logstd


def loss_function(y_hat, y_true, mean, std):
    MSE = F.mse(y_hat, y_true, reduction='mean')
    # 因为var是标准差的自然对数，先求自然对数然后平方转换成方差
    var = torch.pow(torch.exp(std), 2)
    KLD = -0.5 * torch.sum(1+torch.log(var)-torch.pow(mean, 2)-var)
    return MSE+KLD


# 创建对象
cvae = CVAE().to(device)
# vae.load_state_dict(torch.load('./VAE_z2.pth'))
cvae_optimizer = torch.optim.Adam(cvae.parameters(), lr=0.001)

In [17]:
for epoch in range(epochs):  # 进行多个epoch的训练
    for i, data in enumerate(train_dataloader):
        y_hat, mean, std = cvae(data[history_positions])  # 输入
        loss = loss_function(y_hat, data[target_positions], mean, std)
        vae_optimizer.zero_grad()  # 在反向传播之前，先将梯度归0
        loss.backward()  # 将误差反向传播
        vae_optimizer.step()  # 更新参数
        # try:
        print('Epoch[{}/{}],vae_loss:{:.6f} '.format(
            epoch, num_epoch, loss.item(),
        ))

NameError: name 'history_positions' is not defined