In [12]:
import sys
import os

sys.path.insert(0, os.path.abspath(".."))

import time
import torch
from tool.darknet2pytorch import Darknet
import cv2
import imageio
import shutil
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from nuscenes.nuscenes import NuScenes
from cfg.train.cfg_yolov4_BEV_area_nuScenes import Cfg as cfg

# check for cuda
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

# params
cfg.weights = "../checkpoints/area_focalloss_alpha50_gamma6_rows50_sgd_nomax/Yolo_BEV_area_nuScenes_epoch39__BESTSOFAR.pth"
cfg.dataset_dir = "../../data/nuScenes/"
cfg.cfgfile = "../cfg/model/yolov4_BEV_area_nuScenes.cfg"

# read scenes
scenes = []
with open(os.path.join(cfg.dataset_dir, "splits", "test_scenes.txt")) as f:
    for l in f:
        scenes.append(l.strip())

# nuscenes
version = "v1.0-trainval"
nusc = NuScenes(version=version, dataroot=cfg.dataset_dir, verbose=False)

In [13]:
# remove "model" bug from parallel GPU training
state_dict = torch.load(cfg.weights, map_location=torch.device(device))
if "module" in list(state_dict.keys())[0]:
    state_dict_tmp = {}
    for k in state_dict.keys():
        state_dict_tmp[k[7:]] = state_dict[k]
    state_dict = state_dict_tmp

# model
m = Darknet(cfg.cfgfile, model_type="BEV_dist")
m.load_state_dict(state_dict)
if torch.cuda.device_count() > 1:
    m = torch.nn.DataParallel(m)
m.to(device)
m.eval()
print("Done")

Done


In [24]:
scene_id = 2
sensor = "CAM_FRONT_RIGHT"
thr = 0.60
# params
pred_times = []
preds = []
frames = []

# get current scene
scene = nusc.get("scene", scenes[scene_id])
current_token = scene["first_sample_token"]
n_frames = scene["nbr_samples"]
while current_token != "":

    # get sample
    sample = nusc.get("sample", current_token)
    current_token = sample["next"]

    # get image path
    img_path = nusc.get_sample_data(sample["data"][sensor])[0]
    
    # prepare img for network
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    frame = img.copy()
    img = img.astype(float) / 255.0
    img = cv2.resize(img, (cfg.width, cfg.height))
    img = img.transpose(2,0,1)
    input = torch.Tensor(img).unsqueeze(0).to(device)

    # predict
    start = time.time()
    pred = m(input)
    end = time.time()

    # prepare pred and frame
    pred = pred.squeeze().detach().cpu().numpy().transpose()
    pred = np.where(pred >= thr, pred, 0.0)
    pred = np.flipud(pred)
    frame = cv2.resize(frame, (cfg.width, cfg.height))
    
    # collect data
    pred_times.append(end - start)
    preds.append(pred)
    frames.append(frame)

In [25]:
# save preds momentarily to keep plt colormap
os.makedirs("preds_tmp")
new_preds = []
for i,p in enumerate(preds):
     plt.imsave(f"preds_tmp/{i}.png", p)
     new_preds.append(plt.imread(f"preds_tmp/{i}.png")[..., :3])
shutil.rmtree("preds_tmp")

In [28]:
# put images side by side
sequence = []
for f, p in zip(frames, new_preds):

    # rescaling predictions
    if (p.shape[0] * 2 < f.shape[0]):
        p = cv2.resize(p, (p.shape[1]*2, p.shape[0]*2))

    # compute padding values
    pad_tb = (f.shape[0] - p.shape[0]) // 2
    odd_tb = False if pad_tb % 2 == 0 else True

    # apply paddings
    pad_top = np.zeros((pad_tb, p.shape[1], 3))
    pad_bot = np.zeros((pad_tb - 1, p.shape[1], 3)) if odd_tb else pad_top.copy()
    p = np.vstack((pad_top, p, pad_bot))

    pad_lr = np.zeros((p.shape[0], 5, 3))
    p = np.hstack((pad_lr, p, pad_lr))

    # create frames
    p *= 255
    f, p= f.astype("uint8"), p.astype("uint8")
    collage = np.hstack((f, p))
    sequence.append(collage)

imageio.mimsave(f"{scenes[scene_id]}_{sensor}.gif", sequence, duration=0.35)

In [27]:
t = np.array(pred_times)
1 / t.mean()

3.2131154247204345