# Notebook to load test set sequence and generate a model prediction video

In [17]:
import json
import time
import os
import argparse
import random
from pathlib import Path

import torch
from torch.utils.data import DataLoader
from torch.cuda import amp
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
import numpy as np
from PIL import Image
import cv2
import wandb
from tqdm import tqdm
from matplotlib import pyplot as plt
from scipy import interpolate

from data.comma_dataset import CommaDataset
from models.encoder import Encoder
from models.decoder import Decoder
from models.e2e_model import End2EndNet
from utils import paths, logging
from utils.losses import grad_l1_loss

## Load in best model

In [2]:
# Load in model checkpoint for best epoch
checkpoint_path = Path("/mnt/sda/datasets/comma2k19/checkpoints/splendid-snowflake-230/checkpoint_24.tar")
checkpoint = torch.load(checkpoint_path)

# Load in config
config_path = Path("configs/resnet34_sequence.json").resolve()
with config_path.open('r') as fr:
    config = json.load(fr)
print(config)

# Load in dataset config
ds_config_path = (Path("data/dataset_lists") / config['dataset']['dataset_file']).resolve()
with ds_config_path.open('r') as fr:
    ds_args = json.load(fr)['args']
print(ds_args)

{'model': {'encoder': {'name': 'resnet34', 'feature_len': 512}, 'decoder': {'name': 'LSTMDecoder', 'params': {'hidden_size': 512, 'num_layers': 1}}}, 'training': {'num_epochs': 30, 'log_iterations': 20, 'checkpoint_dir': '/mnt/sda/datasets/comma2k19/checkpoints'}, 'dataset': {'dataset_file': 'trainval_set.json', 'predict_speed': False}, 'train_loader': {'batch_size': 35, 'num_workers': 16}, 'val_loader': {'batch_size': 64, 'num_workers': 16}, 'scheduler': {'step_size': 5, 'gamma': 0.8}, 'optimizer': {'name': 'Adam', 'lr': 0.01}}
{'root_dir': '/mnt/sda/datasets/comma2k19/processed_dataset', 'future_steps': 40, 'past_steps': 4, 'dataset_size': 35000, 'max_bin_size': 5350, 'num_bins': 100, 'trainval_split': 0.9, 'train_size': 31500, 'val_size': 3500}


In [3]:
# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

encoder = Encoder(config['model']['encoder'])
decoder = Decoder(config['model']['decoder'])
e2e_net = End2EndNet(encoder, decoder)
e2e_net.load_state_dict(checkpoint['model_state_dict'])
e2e_net.to(device)
e2e_net.eval()

Using device: cuda


End2EndNet(
  (encoder): Encoder(
    (encoder): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e

In [4]:
# Define image transforms
img_transforms = Compose([
    Resize([288, 384]),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

## Load in test video and run inference on each frame

In [5]:
test_routes_path = Path("data/dataset_lists/test_set_routes.json").resolve()
with test_routes_path.open('r') as fr:
    test_routes = json.load(fr)['test_routes']
test_routes

['2018-07-19--15-34-31',
 '2018-09-24--19-08-05',
 '2018-07-12--18-23-15',
 '2018-07-12--16-42-23',
 '2018-11-15--01-05-25',
 '2018-10-30--13-54-09']

In [6]:
# Try first route from id 2900 to 3100 and 6900 to 7100 (about 10 seconds each)
route_path = Path("/mnt/sda/datasets/comma2k19/processed_dataset") / test_routes[0]
id_range = (2900, 3100)

In [7]:
# Load route data arrays
orientations = np.load(route_path / "frame_orientations.npy")
positions = np.load(route_path / "frame_positions.npy")

video_frames = []
model_preds = []
with torch.no_grad():
    for frame_id in range(*id_range):
        # Convert positions to reference frame
        local_path = paths.get_local_path(positions, orientations, frame_id)

        # Divide data into previous and future arrays
        previous_path = local_path[frame_id - ds_args['past_steps'] : frame_id + 1]
        prev_path = torch.from_numpy(previous_path)

        # Grab previous and current frames
        frames = []
        for f_id in range(frame_id - ds_args['past_steps'], frame_id + 1):
            filename = str(f_id).zfill(6) + '.jpg'
            frame = Image.open(str(route_path / "images" / filename))
            
            if f_id == frame_id:
                video_frames.append(frame)

            # Apply transforms to frame
            frame = img_transforms(frame)
            frames.append(frame)

        # Stack frames into single array (T, C, H, W)
        frames = torch.stack(frames)

        # Add singleton batch size and send to gpu
        frames = torch.unsqueeze(frames, 0).to(device)
        prev_path = torch.unsqueeze(prev_path, 0).float().to(device)

        # Forward pass
        model_output = e2e_net(frames, prev_path)
        model_output = model_output.reshape((ds_args['future_steps'], 3))
        model_output = model_output.detach().cpu().numpy()

        model_preds.append(model_output)      

## Visualize predictions and generate video

In [19]:
pil_imgs = []
for frame, path in zip(video_frames, model_preds):
    # Draw path and convert to PIL
    img = np.array(frame)
    paths.draw_path(paths.smooth_path(path), img)
    pil_img = Image.fromarray(img)
    print(pil_img.size)
    # Shrink it down a bit
    pil_img = pil_img.resize((pil_img.size, hsize), Image.ANTIALIAS)
    pil_imgs.append(pil_img)

(1164, 874)


NameError: name 'basewidth' is not defined

In [14]:
# Generate GIF using PIL
save_path = "/home/methier/projects/end-to-end-driving/test_video.gif"
img, *imgs = pil_imgs
img.save(fp=save_path, format='GIF', append_images=imgs,
         save_all=True, duration=50, loop=0)