In [1]:
from typing import List, Tuple, Dict

import numpy as np
import pandas as pd
import torch
from torch import Tensor
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import l5kit
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from torchvision.models.resnet import resnet50, resnet18, resnet34, resnet101
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path
import utils
import matplotlib.pyplot as plt


import os
import time
import random

import warnings
warnings.filterwarnings("ignore")
from IPython.display import display
from tqdm import tqdm_notebook
import gc, psutil

print(l5kit.__version__)

1.1.0


In [2]:
cfg = {
    'format_version': 4,
    'data_path': '/home/yx/WSY/Prediction/datasets/lyft-motion-prediction-autonomous-vehicles',
    'model_params': {
        'model_architecture': 'resnet50',
        'history_num_frames': 10,
        'history_step_size': 1,
        'history_delta_time': 0.1,
        'future_num_frames': 50,
        'future_step_size': 1,
        'future_delta_time': 0.1,
        'model_name': "single_resnet50",
        'lr': 1e-4,
        'weight_path': '',
        'train': True,
        'predict': False,
    },
    'raster_params': {
        'raster_size': [224, 224],
        'pixel_size': [0.5, 0.5],
        'ego_center': [0.25, 0.5],
        'map_type': 'py_semantic',
        'satellite_map_key': 'aerial_map/aerial_map.png',
        'semantic_map_key': 'semantic_map/semantic_map.pb',
        'dataset_meta_key': 'meta.json',
        'filter_agents_threshold': 0.5,
    },
    'train_data_loader': {
        'key': 'scenes/train.zarr',
        'batch_size': 16,
        'shuffle': True,
        'num_workers': 4,
    },
    'test_data_loader': {
        'key': 'scenes/test.zarr',
        'batch_size': 128,
        'shuffle': False,
        'num_workers': 4,
    },
    'train_params': {
        'steps': 200000,
        'metrics_steps': 1000,
        'update_steps': 1000,
        'checkpoint_steps': 1000,
    }
}

In [3]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
set_seed(42)

In [4]:
class Encoder(nn.Module):
    def __init__(self, embedding_dim=64, h_dim=64, num_layers=1, dropout=0.0, v_dim=2):

        super(Encoder, self).__init__()
        self.v_dim = v_dim
        self.h_dim = h_dim
        self.embedding_dim = embedding_dim
        self.num_layers = num_layers

        self.encoder = nn.LSTM(
            embedding_dim, h_dim, num_layers, dropout=dropout
        )

        self.spatial_embedding = nn.Linear(v_dim, embedding_dim)

    def init_hidden(self, batch):
        return (
            torch.zeros(self.num_layers, batch, self.h_dim).cuda(),
            torch.zeros(self.num_layers, batch, self.h_dim).cuda()
        )

    def forward(self, obs_traj):
        # obs_traj: history_len * batch * input_size
        batch = obs_traj.size(1)
        # 历史轨迹重构为(obs_len * batch, v_dim)并送入embedding层(全连接)，成为
        # print(obs_traj.shape)
        obs_traj_embedding = self.spatial_embedding(obs_traj.contiguous().view(-1, self.v_dim))
        obs_traj_embedding = obs_traj_embedding.view(
            -1, batch, self.embedding_dim
        )
        state_tuple = self.init_hidden(batch)
        output, state = self.encoder(obs_traj_embedding, state_tuple)
        final_h = state[0]
        # obs_traj: batch * h_dim
        return final_h


class Single(nn.Module):
    def __init__(self, cfg: Dict, num_modes=3, h_dim: int = 64):
        super().__init__()
        # encoder参数默认
        self.encoder = Encoder(v_dim=4)
        architecture = cfg["model_params"]["model_architecture"]
        backbone = eval(architecture)(pretrained=True, progress=True)
        self.backbone = backbone
        num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
        num_in_channels = 3 + num_history_channels
        self.backbone.conv1 = nn.Conv2d(
            num_in_channels,
            self.backbone.conv1.out_channels,
            kernel_size=self.backbone.conv1.kernel_size,
            stride=self.backbone.conv1.stride,
            padding=self.backbone.conv1.padding,
            bias=False,
        )
        if architecture == "resnet50":
            backbone_out_features = 2048
        else:
            backbone_out_features = 512
        
        self.future_len = cfg["model_params"]["future_num_frames"]
        num_targets = 2 * self.future_len
        # 全连接层
        self.head = nn.Sequential(
            # nn.Dropout(0.2),
            nn.Linear(in_features=backbone_out_features+h_dim, out_features=4096),
        )
        
        
        # 输出层：输入全连接层的输出, 得到轨迹和概率
        self.logit1 = nn.Linear(in_features=4096, out_features=2048)
        self.logit2 = nn.Linear(in_features=2048, out_features=num_targets)
        
        # 全连接BN层
        self.bn1 = nn.BatchNorm1d(4096)
        self.bn2 = nn.BatchNorm1d(2048)

    def forward(self, x, history_traj):
        x = self.backbone.conv1(x)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)
        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)
        x = self.backbone.avgpool(x)
        x = torch.flatten(x, 1)

        # h: batch * h_dim
        h = self.encoder(history_traj)

        x = torch.cat([x, h.squeeze(0)], dim=1)
        x = self.bn1(self.head(x))
        x = self.bn2(self.logit1(x))
        x = F.relu(self.logit2(x))

        return x

In [5]:
def forward(data, model, device, criterion=nn.MSELoss(reduction="none")):
    inputs = data["image"].to(device)
    history_traj = torch.from_numpy(np.flip(data["history_positions"].numpy(), axis=1))
    history_yaw = torch.from_numpy(np.flip(data["history_yaws"].numpy(), axis=1))
    history_availabilities = torch.unsqueeze(torch.from_numpy(np.flip(data["history_availabilities"].numpy(), axis=1)), 2)
    # print(history_traj.shape, history_yaw.shape, history_availabilities.shape)
    history = torch.cat([history_traj, history_yaw], 2)
    history = torch.cat([history, history_availabilities], 2).to(device)
    target_availabilities = data["target_availabilities"].unsqueeze(-1).to(device)
    targets = data["target_positions"].to(device)
    # Forward pass
    outputs = model(inputs, history.permute(1, 0, 2)).reshape(targets.shape)
    loss = criterion(outputs, targets)
    # not all the output steps are valid, but we can filter them out from the loss using availabilities
    loss = loss * target_availabilities
    loss = loss.mean()
    return loss, outputs

In [6]:
DIR_INPUT = cfg["data_path"]
os.environ["L5KIT_DATA_FOLDER"] = DIR_INPUT
dm = LocalDataManager()
rasterizer = build_rasterizer(cfg, dm)
train_cfg = cfg["train_data_loader"]
train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open(cached=False)  # to prevent run out of memory
train_dataset = AgentDataset(cfg, train_zarr, rasterizer)
train_dataloader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"],
                              batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"])
print(train_dataset, len(train_dataset))

train_writer = SummaryWriter('../log/Single', comment='Single')

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Single(cfg)
weight_path = cfg["model_params"]["weight_path"]

if weight_path:
    model.load_state_dict(torch.load(weight_path))
    print(weight_path, "loaded")
model.to(device)
learning_rate = cfg["model_params"]["lr"]
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=10000,gamma = 0.5)
print(f'device {device}')

+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
| Num Scenes | Num Frames | Num Agents | Num TR lights | Total Time (hr) | Avg Frames per Scene | Avg Agents per Frame | Avg Scene Time (sec) | Avg Frame frequency |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+
|   16265    |  4039527   | 320124624  |    38735988   |      112.19     |        248.36        |        79.25         |        24.83         |        10.00        |
+------------+------------+------------+---------------+-----------------+----------------------+----------------------+----------------------+---------------------+ 22496709
device cuda:0


In [7]:
if cfg["model_params"]["train"]:
    tr_it = iter(train_dataloader)
    n_steps = cfg["train_params"]["steps"]
    progress_bar = tqdm_notebook(range(1, 1 + n_steps), mininterval=5.)
    losses = []
    losses_fde = []
    iterations = []
    metrics = []
    metrics_fde = []
    times = []
    model_name = cfg["model_params"]["model_name"]
    update_steps = cfg['train_params']['update_steps']
    metrics_steps = cfg['train_params']['metrics_steps']
    checkpoint_steps = cfg['train_params']['checkpoint_steps']
    t_start = time.time()
    torch.set_grad_enabled(True)
    print('start train')

    for i in progress_bar:
        try:
            data = next(tr_it)
        except StopIteration:
            tr_it = iter(train_dataloader)
            data = next(tr_it)
        model.train()  # somehow we need this is ever batch or it perform very bad (not sure why)
        loss, pred = forward(data, model, device)

        # Backward pass
        scheduler.step()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        loss_v = loss.item()
        losses.append(loss_v)
        train_writer.add_scalar('loss_v', loss_v, i)

        if i % metrics_steps == 0:
            
            # 求其他尺度指标
            final_target = data['target_positions'][:, -1, :].to(device)
            fde_bs = []
            for index in range(0, cfg['train_data_loader']['batch_size']):
                fde = ((pred[index][-1][0] - final_target[index][0])**2 +
                       (pred[index][-1][1] - final_target[index][1])**2)**0.5
                fde_bs.append(fde.item())
            losses_fde.append(np.mean(fde_bs))
            train_writer.add_scalar('loss_fde', np.mean(fde_bs), i)


        if i % update_steps == 0:
            mean_losses = np.mean(losses)
            losses = []
            train_writer.add_scalar('mean_loss', mean_losses, i)
            mean_fde = np.mean(losses_fde)
            train_writer.add_scalar('mean_fde', mean_fde, i)
            losses_fde = []
            timespent = (time.time() - t_start) / 60
            print('i: %5d' % i,
                  'loss: %10.5f' % loss_v, 'loss(avg): %10.5f' % mean_losses,
                  'loss_fde(avg): %10.5f' % mean_fde, ' %.2fmins' % timespent, end=' | \n')
            if i % checkpoint_steps == 0:
                torch.save(model.state_dict(), f'../model/{model_name}.pth')
                torch.save(optimizer.state_dict(), f'../model/{model_name}_optimizer.pth')
            iterations.append(i)
            metrics.append(mean_losses)
            metrics_fde.append(mean_fde)
            times.append(timespent)

    torch.save(model.state_dict(), f'{model_name}_final.pth')
    torch.save(optimizer.state_dict(), f'{model_name}_optimizer_final.pth')
    results = pd.DataFrame({
        'iterations': iterations,
        'metrics (avg)': metrics,
        'metrics_fde (avg)': metrics_fde,
        'elapsed_time (mins)': times,
    })
    results.to_csv(f'train_metrics_{model_name}_{n_steps}.csv', index=False)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=200000.0), HTML(value='')))

start train



ValueError: step must be greater than zero