In [3]:
import numpy as np
import os, sys, time
os.environ['KMP_DUPLICATE_LIB_OK']='True'

import torch
from torch import nn, optim
from torch.utils.data import DataLoader, Dataset
from torchvision.models.resnet import resnet18
from tqdm import tqdm
from typing import Dict

from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer


sys.path.append('src')
from src.dataset import *
from src.model import *

## lightning
from effnet import EfficientNet

import pytorch_lightning as pl

from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint

from torch.cuda.amp import GradScaler, autocast 

DIR_INPUT = "../../input/lyft-motion-prediction-autonomous-vehicles"
SINGLE_MODE_SUBMISSION = f"{DIR_INPUT}/single_mode_sample_submission.csv"
MULTI_MODE_SUBMISSION = f"{DIR_INPUT}/multi_mode_sample_submission.csv"

DEBUG = False

cfg = {
    'format_version': 4,
    'model_params': {
        'model_architecture': 'resnet50',
        'history_num_frames': 10,
        'history_step_size': 1,
        'history_delta_time': 0.1,
        'future_num_frames': 50,
        'future_step_size': 1,
        'future_delta_time': 0.1
    },
    
    'raster_params': {
        'raster_size': [128, 128],  ## 300 300
        'pixel_size': [0.5, 0.5], ## 0.5 0.5
        'ego_center': [0.25, 0.5],
        'map_type': 'py_semantic',
        'satellite_map_key': 'aerial_map/aerial_map.png',
        'semantic_map_key': 'semantic_map/semantic_map.pb',
        'dataset_meta_key': 'meta.json',
        'filter_agents_threshold': 0.5
    },
    
    'train_data_loader': {
        'key': 'scenes/train.zarr',
        'batch_size': 16,
        'shuffle': True,
        'num_workers': 16,
    },
    
    
    'val_data_loader': {
        'key': 'scenes/validate.zarr',
        'batch_size': 4,
        'shuffle': True,
        'num_workers': 0,
    },
    
    'train_params': {
        'max_num_steps': 100 if DEBUG else 500000,
        'checkpoint_every_n_steps': 5000,
        
        # 'eval_every_n_steps': -1
    }
}

# set env variable for data
os.environ["L5KIT_DATA_FOLDER"] = DIR_INPUT

In [4]:
from torch.utils.data import Dataset, DataLoader
import bz2, pickle
class LyftImageDataset(Dataset):

    def __init__(self, data_folder):
        super().__init__()
        self.data_folder = data_folder
        self.files = []

        for filename in os.listdir(self.data_folder):
            if filename.endswith(".pbz"):
                self.files.append(filename)

        print(len(self.files))
        print(self.files[0])

    def __getitem__(self, index: int):
        return self.obj_load(self.files[index])

    def obj_load(self, name):
        with bz2.BZ2File(f'{self.data_folder}/{name}', 'rb') as f:
            return pickle.load(f)

    def __len__(self):
        return len(self.files)

...
train_cfg = cfg["train_data_loader"]
#train_zarr = ChunkedDataset(dm.require(train_cfg["key"])).open()
train_dataset = LyftImageDataset('../cache/pre_300px__0_5__10')
train_loader = DataLoader(train_dataset,
                              shuffle=train_cfg["shuffle"],
                              batch_size=train_cfg["batch_size"], num_workers=train_cfg['num_workers'])

772009
sample_0.pbz


In [5]:
%%time
for i, batch in enumerate(train_loader):
    print(batch['image'].shape)
    if i == 0:
        break
    

torch.Size([16, 25, 300, 300])
CPU times: user 1.44 s, sys: 262 ms, total: 1.7 s
Wall time: 3.63 s


In [6]:
from src.loss import *
from linformer_pytorch import Linformer
from effnet import *

class LyftModel(pl.LightningModule):
    def __init__(self, cfg: Dict, num_modes=3):
        super().__init__()

        ## c
        num_history_channels = (cfg["model_params"]["history_num_frames"] + 1) * 2
        num_in_channels = 3 + num_history_channels

        # X, Y coords for the future positions (output shape: Bx50x2)
        self.future_len = cfg["model_params"]["future_num_frames"]
        num_targets = 2 * self.future_len

        # TODO: support other than resnet18?
        self.backbone = EfficientNet.from_pretrained('efficientnet-b1', in_channels=num_in_channels)
        backbone_out_features = self.backbone._fc.weight.shape[1]

        self.gru = torch.nn.GRU(input_size=1280, hidden_size=1280, 
                    num_layers=1, dropout=0.3, batch_first=True, bidirectional=True)
        # You can add more layers here.

        self.num_preds = num_targets * num_modes
        self.num_modes = num_modes
        
        self.head = nn.Sequential(nn.Linear(10240, 4096), nn.Dropout(0.3), nn.ReLU(), nn.Linear(4096, 2048))
        self.logit = nn.Linear(2048, out_features=self.num_preds + num_modes)

    def init_weight():
        init_layer(self.logit)

    def forward(self, x):
        x = self.backbone(x)
        x = F.avg_pool2d(x, kernel_size=(2, 2))
        x = torch.mean(x, dim=3)
        x = F.avg_pool1d(x, kernel_size=3, stride=1, padding=1)

        x = F.dropout(x, p=0.3, training=self.training)
        x = x.transpose(1, 2)
        x, _ = self.gru(x)
        x = x.transpose(1, 2)
        x = torch.flatten(x, 1)
        #x = F.dropout(x, p=0.3, training=self.training)
        x = self.head(x)
        x = self.logit(x)

        # pred (bs)x(modes)x(time)x(2D coords)
        # confidences (bs)x(modes)
        bs, _ = x.shape
        pred, confidences = torch.split(x, self.num_preds, dim=1)
        pred = pred.view(bs, self.num_modes, self.future_len, 2)
        assert confidences.shape == (bs, self.num_modes)
        confidences = torch.softmax(confidences, dim=1)
        return pred, confidences

    def training_step(self, batch, batch_idx):
        data = batch
        inputs = data["image"].to(device)
        target_availabilities = data["target_availabilities"].to(device)
        targets = data["target_positions"].to(device)

        pred, confidences = self(inputs)
        loss = pytorch_neg_multi_log_likelihood_batch(targets, pred, confidences, target_availabilities)

        result = pl.TrainResult(loss)
        result.log('train_loss', loss, on_epoch=True)
        return result

    # def validation_step(self, batch, batch_idx):
    #     data = batch
    #     inputs = data["image"].to(device)
    #     target_availabilities = data["target_availabilities"].to(device)
    #     targets = data["target_positions"].to(device)

    #     pred, confidences = self(inputs)
    #     loss = pytorch_neg_multi_log_likelihood_batch(targets, pred, confidences, target_availabilities)

    #     result = pl.EvalResult(checkpoint_on=loss)
    #     result.log('val_loss', loss)
    #     return result

    def configure_optimizers(self):
        self.optimizer = torch.optim.AdamW(self.parameters(), lr=3e-1)
        #self.scheduler = torch.optim.lr_scheduler.OneCycleLR(
        #                            self.optimizer, max_lr=3e-3,
        #                            anneal_strategy='linear', div_factor=100,
        #                            steps_per_epoch=int(22000000/128),
        #                            epochs=1)
        return [self.optimizer]#, [self.scheduler]

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = LyftModel(cfg).to(device)
# model.training_step(batch, 0)

dummy_input = batch
model.training_step(dummy_input, [1])

# dummy_input = torch.zeros((1, 25, 128, 128)).to(device)
# model(dummy_input)

['_fc.weight', '_fc.bias', 'fc1.weight', 'fc1.bias', 'fc2.weight', 'fc2.bias', 'fc3.weight', 'fc3.bias']
Loaded pretrained weights for efficientnet-b1




{'minimize': tensor(2194.2346, device='cuda:0', grad_fn=<MeanBackward0>), 'checkpoint_on': tensor(2194.2346, device='cuda:0'), 'step_train_loss': tensor(2194.2346, device='cuda:0'), 'epoch_train_loss': tensor(2194.2346, device='cuda:0')}

In [8]:
trainer = Trainer(gpus=1, max_epochs=10, precision=16, gradient_clip_val=0.5)
trainer.fit(model, train_loader)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]
Using native 16bit precision.

  | Name     | Type         | Params
------------------------------------------
0 | backbone | EfficientNet | 7 M   
1 | gru      | GRU          | 19 M  
2 | head     | Sequential   | 50 M  
3 | logit    | Linear       | 620 K 


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…



AssertionError: confidences should sum to 1