# Data setup and Environment Init

In [None]:
!rm -rf waymo-od > /dev/null
!git clone https://github.com/waymo-research/waymo-open-dataset.git waymo-od
!cd waymo-od && git branch -a
!cd waymo-od && git checkout remotes/origin/master
!pip3 install --upgrade pip
!pip3 install waymo-open-dataset-tf-2-1-0==1.2.0 

In [2]:
from google.colab import auth
auth.authenticate_user()

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount = True)

In [None]:
!echo "deb http://packages.cloud.google.com/apt gcsfuse-bionic main" > /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
!apt -qq update
!apt -qq install gcsfuse

!mkdir -p data/training
!gcsfuse --only-dir training/ waymo_open_dataset_v_1_2_0_individual_files data/training/

!mkdir -p data/testing
!gcsfuse --only-dir testing/ waymo_open_dataset_v_1_2_0_individual_files data/testing/

In [5]:
import os
import gc
import tensorflow as tf
import math
import numpy as np
import itertools
import matplotlib.pyplot as plt
from google.colab import files

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.utils.data as data_utils
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from waymo_open_dataset.utils import range_image_utils
from waymo_open_dataset.utils import transform_utils
from waymo_open_dataset.utils import  frame_utils
from waymo_open_dataset import dataset_pb2 as open_dataset

from Net_Lib import parse_cfg, box_iou, create_network, Model, YOLO_loss, Novel_loss, LSTM_loss

list_train = os.listdir(path='data/training')
list_test = os.listdir(path='data/testing')

# Calculation of Necessary Components for all Models (Anchor boxes and Training Indices)

In [None]:
# Choose random data repos from the bank to use in clustering
indices = np.random.choice(len(list_train), 20, replace=False)
for i in range(len(indices)):
  if(i == 0):
    train_labels, _ = create_yolobatch(indices[i], True)
  else:
    new_labels, _ = create_yolobatch(indices[i], True)
    train_labels = torch.cat((train_labels, new_labels), 0)

# Find anchor box sizes through k-means clustering
cluster_labels = train_labels[(train_labels[:,-2] != -1),:]
num_examples = cluster_labels.shape[0]
num_clusters = 5
# train
num_iterations = 5000
min = torch.min(cluster_labels[:,2:4], 0)[0]
max = torch.max(cluster_labels[:,2:4], 0)[0]
widths = torch.from_numpy(np.random.uniform(min[0].item(),max[0].item(),(num_clusters,1)))
heights = torch.from_numpy(np.random.uniform(min[1].item(),max[1].item(),(num_clusters,1)))
xy = torch.zeros((num_clusters, 2))
centers = torch.cat((xy, widths, heights), 1)
assignments = torch.zeros(num_examples)
for _ in range(num_iterations):
  test = torch.cat((torch.zeros((num_examples, 2)), cluster_labels[:,2:4]), 1)
  distances = box_iou(centers, test)
  _, assignments = torch.max(distances, 0)

  for k in range(num_clusters):
    if(torch.sum(assignments==k).item() > 0):
      centers[k,2:] = torch.mean(cluster_labels[assignments==k,2:4],axis=0)
    else:
      centers[k, 2] = torch.from_numpy(np.random.uniform(min[0].item(), max[0].item(), (1,1)))
      centers[k, 3] = torch.from_numpy(np.random.uniform(min[1].item(), max[1].item(), (1,1)))

# Just return the desired portion
anchors = centers[:,2:]

In [7]:
train_idxs = np.random.choice(len(list_train)-25,100)
validation = np.arange(len(list_train))[-25:]
torch.save(train_idxs, "train_idx.pt")
torch.save(validation, "valid_idx.pt")

# Yolo Model


## Batch-maker

In [6]:
# Data extraction and preparation
def create_yolobatch(index,train):
  if(train):
    loc = "data/training/"
    req_list = list_train
  else:
    loc = "data/testing/"
    req_list = list_test
  train_images = []
  train_labels = []
  img_yes = False
  imagenum = 0
  for dataset in req_list[index:index+1]:
    dataset = tf.data.TFRecordDataset(loc+dataset, compression_type='')
    for data in dataset:
      frame = open_dataset.Frame()
      frame.ParseFromString(bytearray(data.numpy()))

      # Get image itself
      for index, image in enumerate(frame.images):
        if(image.name == 1):
          img = tf.image.decode_jpeg(image.image)
          img = tf.image.resize(img, [640, 960])
          train_images.append(img)

      # Get label data
      for cam_labels in frame.projected_lidar_labels:
        if(cam_labels.name != 1):
          continue
        for label in cam_labels.labels:
          train_labels.append(np.array([label.box.center_x//2, label.box.center_y//2,
                                            label.box.width//2, label.box.length//2, 
                                            label.type, imagenum]))
          img_yes = True
      
      # Write specialized "empty image" output
      if(not img_yes):
        train_labels.append(np.array([-1,-1,-1,-1,-1,imagenum]))
      else:
        img_yes = False
      imagenum += 1
  
  train_labels = torch.from_numpy(np.array(train_labels))
  train_images = torch.from_numpy(np.array(train_images).transpose(0,3,1,2))
  return train_labels, train_images

## Perform Basic YOLO Training using Library of Functions

In [None]:
n_tot_train = len(list_train)  # number of overall training examples

nepoch_readbatch = 25          # number of epochs through training set
batchsize = 16                 # minibatch size
a_cuda = True                  # whether or not to enable cuda gpu acceleration

res = 32                       # resolution of image breakdown
H = 1280//(2*res)                   # height of the grid over images
W = 1920//(2*res)                   # width of the grid over images

network_spec = parse_cfg("yolo.cfg")
module_list = create_network(network_spec, 3)
yolo = Model(module_list).float()
if(a_cuda):
  yolo.cuda()
anchors = torch.load("anch.pt")
if(a_cuda):
  anchors.cuda()
yloss = YOLO_loss(5, 1, 1, 1, res, anchors, (H, W), a_cuda, batchsize)
losses = []

# use ADAM optimizer
optimizer = optim.Adam(yolo.parameters(), lr=1E-5)

for i in [0]:
  labels, images = create_yolobatch(i, True)
  ntrain = images.shape[0]
  
  # for loop to get from different batches of read-in images
  for iepoch in range(nepoch_readbatch):
    num_its = int(ntrain/batchsize)
    ep_loss = np.zeros(num_its)
    print(iepoch) 
    for t in range(int(ntrain / batchsize)):
        batchindices = np.random.choice(ntrain, batchsize, replace=False)

        # before the forward pass, clean the gradient buffers of all parameters
        optimizer.zero_grad()

        # forward pass
        this_batch = Variable(images[batchindices,...].float())
        if(a_cuda):
          this_batch = this_batch.cuda()
        out = yolo(this_batch)

        # MSE loss
        label_indices = (labels[:, -1][..., None] == torch.tensor(batchindices)).any(-1).nonzero().squeeze()
        true_out = labels[label_indices,:]
        if(a_cuda):
          true_out = true_out.cuda()
        
        loss = yloss(out, true_out)
        ep_loss[t] = loss.item()

        # backward pass
        loss.backward()

        # update parameters using SGD
        optimizer.step()

        # remove from memory
        this_batch = None
        true_out = None
        out = None
        gc.collect()
        if(a_cuda):
          torch.cuda.empty_cache()
        
    losses.append(np.mean(ep_loss))

In [None]:
fig, ax = plt.subplots()
ax.plot(losses)
plt.xlabel('Epoch Number')
plt.ylabel('Loss')
plt.title('YOLOV2 Loss on Waymo Subset')

## Perform full-scale YOLO Training

In [None]:
model_save_name = 'checkpoint.pt'
path = F"/content/gdrive/My Drive/{model_save_name}" 
checkpoint = torch.load(path)
print(checkpoint['loss'])
print(checkpoint['valid'])
print(checkpoint['epoch'])

In [None]:
model_save_name = 'checkpoint.pt'
path = F"/content/gdrive/My Drive/{model_save_name}" 
resume = True

tot_train = torch.load("train_idx.pt")  # number of overall training examples
tot_valid = torch.load("valid_idx.pt")
n_tot_train = len(tot_train)

nepoch_dataset = 20            # number of epochs through training set
batchsize = 16                 # minibatch size
a_cuda = True                  # whether or not to enable cuda gpu acceleration

res = 32                       # resolution of image breakdown
H = 1280//(2*res)                   # height of the grid over images
W = 1920//(2*res)                   # width of the grid over images

network_spec = parse_cfg("yolo.cfg")
module_list = create_network(network_spec, 3)
yolo = Model(module_list).float()
if(a_cuda):
  yolo.cuda()
anchors = torch.load("anch.pt")

yloss = YOLO_loss(5, 1, 1, 1, res, anchors, (H, W), a_cuda, batchsize)
train_losses = []
valid_losses = []
epoch = 0

# use ADAM optimizer
optimizer = optim.Adam(yolo.parameters(), lr=1E-5, weight_decay=0.0005)

# Code for resume
if(resume):
  checkpoint = torch.load(path)
  yolo.load_state_dict(checkpoint['model_state_dict'])
  optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
  epoch = checkpoint['epoch']+1
  train_losses = checkpoint['loss']
  valid_losses = checkpoint['valid']

num_images = 200*n_tot_train
num_steps = num_images//batchsize

for iepoch in range(epoch, nepoch_dataset):
  ep_loss = np.zeros(num_steps)
  print("Epoch:" +str(iepoch))
  # for loop to train through different batches of read-in images
  for j in range(num_steps):
    print(str(j) + "/" + str(num_steps))
    index = int(np.random.choice(tot_train,1)[0])
    labels, images = create_yolobatch(index, True)
    ntrain = images.shape[0]
    batchindices = np.random.choice(ntrain, batchsize, replace=False)

    # before the forward pass, clean the gradient buffers of all parameters
    optimizer.zero_grad()

    # forward pass
    this_batch = Variable(images[batchindices,...].float())
    if(a_cuda):
      this_batch = this_batch.cuda()
    out = yolo(this_batch)

    # MSE loss
    label_indices = (labels[:, -1][..., None] == torch.tensor(batchindices)).any(-1).nonzero().squeeze()
    true_out = labels[label_indices,:]
    if(a_cuda):
      true_out = true_out.cuda()

    loss = yloss(out, true_out)
    ep_loss[j] = loss.item()

    # backward pass
    loss.backward()

    # update parameters using SGD
    optimizer.step()

    # remove from memory
    this_batch = None
    true_out = None
    out = None
    gc.collect()
    if(a_cuda):
      torch.cuda.empty_cache()

  # Record training loss
  train_losses.append(np.mean(ep_loss))

  # Calculate and record validation loss using test set
  valid = np.zeros(len(tot_valid))
  for i in range(len(tot_valid)):
    index = int(tot_valid[i])
    labels, images = create_yolobatch(index, True)
    ntest = images.shape[0]
    batchindices = np.random.choice(ntest, batchsize, replace=False)
    this_batch = Variable(images[batchindices,...].float())
    if(a_cuda):
      this_batch = this_batch.cuda()
    out = yolo(this_batch)
    label_indices = (labels[:, -1][..., None] == torch.tensor(batchindices)).any(-1).nonzero().squeeze()
    true_out = labels[label_indices,:]
    if(a_cuda):
      true_out = true_out.cuda()
    valid[i] = yloss(out, true_out)

    this_batch = None
    true_out = None
    out = None
    gc.collect()
    if(a_cuda):
      torch.cuda.empty_cache()

  valid_losses.append(np.mean(valid))
  torch.save({
            'epoch': iepoch,
            'model_state_dict': yolo.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_losses,
            'valid': valid_losses
            }, path)

# Novel Model

## Difference Batch-Maker


In [None]:
def create_diffbatch(index, train):
  if(train):
    loc = "data/training/"
    req_list = list_train
  else:
    loc = "data/testing/"
    req_list = list_test

  train_images = []
  train_labels = []
  imagenum = 0
  img_yes = False

  # Variables for tracking to create diffs from same frame context
  prev_context = None
  context = None
  prev_img = None
  img = None
  
  for dataset in req_list[index:index+1]:
    dataset = tf.data.TFRecordDataset(loc+dataset, compression_type='')
    for data in dataset:
      frame = open_dataset.Frame()
      frame.ParseFromString(bytearray(data.numpy()))
      context = frame.context.name
  
      # Indicates new scene
      if(context != prev_context or prev_context == None): 
        for index, image in enumerate(frame.images):
          if(image.name == 1):
            prev_img = tf.image.rgb_to_grayscale(tf.image.decode_jpeg(image.image))
            prev_img = tf.image.resize(prev_img, [640, 960])
        prev_context = context

      # Indicates continuation
      else: 
        for index, image in enumerate(frame.images):
          if(image.name == 1):
            img = tf.image.rgb_to_grayscale(tf.image.decode_jpeg(image.image))
            img = tf.image.resize(img, [640, 960])
            train_images.append(img-prev_img)

        for cam_labels in frame.projected_lidar_labels:
          if(cam_labels.name != 1):
            continue
          for label in cam_labels.labels:
            train_labels.append(np.array([label.box.center_x//2, label.box.center_y//2,
                                          label.box.width//2, label.box.length//2, label.metadata.speed_x, 
                                            label.metadata.speed_y, label.metadata.accel_x,
                                            label.metadata.accel_y, label.type, imagenum]))
            img_yes = True

        if(not img_yes):
          train_labels.append(np.array([-1,-1,-1,-1,-1,-1,-1,-1,-1,imagenum]))
        else:
          img_yes = False
        imagenum += 1

  train_labels = torch.from_numpy(np.array(train_labels))
  train_images = torch.from_numpy(np.array(train_images).transpose(0,3,1,2))
  return train_labels, train_images

## Train Novel Network Using Library of Functions

In [None]:
n_tot_train = len(list_train)  # number of overall training examples

nepoch_readbatch = 25          # number of epochs through training set
batchsize = 16                 # minibatch size
a_cuda = True                  # whether or not to enable cuda gpu acceleration

res = 32                       # resolution of image breakdown
H = 1280//(2*res)                   # height of the grid over images
W = 1920//(2*res)                   # width of the grid over images

network_spec = parse_cfg("novel.cfg")
module_list = create_network(network_spec, 1)
novel = Model(module_list).float()
if(a_cuda):
  novel.cuda()
anchors = torch.load("anch.pt")

nloss = Novel_loss(anchors, 3, 5, 5, 1, 1, res, (H, W), a_cuda, batchsize)
losses = []

# use ADAM optimizer
optimizer = optim.Adam(novel.parameters(), lr=1E-6, weight_decay=0.0005)
#optimizer = optim.SGD(novel.parameters(), lr=0.0001)

for i in [2]:
  labels, images = create_diffbatch(i, True)
  ntrain = images.shape[0]
  # for loop to get from different batches of read-in images
  for iepoch in range(nepoch_readbatch):
    num_its = int(ntrain/batchsize)
    ep_loss = np.zeros(num_its) 
    print(iepoch)
    for t in range(num_its):
        batchindices = np.random.choice(ntrain, batchsize, replace=False)

        # before the forward pass, clean the gradient buffers of all parameters
        optimizer.zero_grad()

        # forward pass
        this_batch = Variable(images[batchindices,...].float())
        if(a_cuda):
          this_batch = this_batch.cuda()
        out = novel(this_batch)

        # MSE loss
        label_indices = (labels[:, -1][..., None] == torch.tensor(batchindices)).any(-1).nonzero().squeeze()
        true_out = labels[label_indices,:]
        if(a_cuda):
          true_out = true_out.cuda()
        
        loss = nloss(out, true_out)
        ep_loss[t] = loss.item()

        # backward pass
        loss.backward()

        # update parameters using SGD
        optimizer.step()

        # remove from memory
        this_batch = None
        true_out = None
        out = None
        gc.collect()
        if(a_cuda):
          torch.cuda.empty_cache()

    losses.append(np.mean(ep_loss))

In [None]:
fig, ax = plt.subplots()
ax.plot(losses)
plt.xlabel('Epoch Number')
plt.ylabel('Loss')
plt.title('Novel Loss on Waymo Subset')

## Perform full-scale Novel Training

In [None]:
model_save_name = 'novel_checkpoint.pt'
path = F"/content/gdrive/My Drive/{model_save_name}" 
checkpoint = torch.load(path)
print(checkpoint['loss'])
print(checkpoint['valid'])
print(checkpoint['epoch'])

In [None]:
model_save_name = 'novel_checkpoint.pt'
path = F"/content/gdrive/My Drive/{model_save_name}" 
resume = True

tot_train = torch.load("train_idx.pt")  # number of overall training examples
tot_valid = torch.load("valid_idx.pt")
n_tot_train = len(tot_train)

nepoch_dataset = 20            # number of epochs through training set
batchsize = 16                 # minibatch size
a_cuda = True                  # whether or not to enable cuda gpu acceleration

res = 32                       # resolution of image breakdown
H = 1280//(2*res)                   # height of the grid over images
W = 1920//(2*res)                   # width of the grid over images
epoch = 0

network_spec = parse_cfg("novel.cfg")
module_list = create_network(network_spec, 1)
novel = Model(module_list).float()
if(a_cuda):
  novel = novel.cuda()
anchors = torch.load("anch.pt")

nloss = Novel_loss(anchors, 3, 5, 5, 1, 1, res, (H, W), a_cuda, batchsize)
train_losses = []
valid_losses = []

# use ADAM optimizer
optimizer = optim.Adam(novel.parameters(), lr=1E-6, weight_decay=0.0005)

# Code for resume
if(resume):
  checkpoint = torch.load(path)
  novel.load_state_dict(checkpoint['model_state_dict'])
  optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
  epoch = checkpoint['epoch']+1
  train_losses = checkpoint['loss']
  valid_losses = checkpoint['valid']

num_images = 200*n_tot_train
num_steps = num_images//batchsize

for iepoch in range(epoch, nepoch_dataset):
  ep_loss = np.zeros(num_steps)
  print("Epoch:" +str(iepoch))
  # for loop to train through different batches of read-in images
  for j in range(num_steps):
    print(str(j) + "/" + str(num_steps))
    index = int(np.random.choice(tot_train,1)[0])
    labels, images = create_diffbatch(index, True)
    ntrain = images.shape[0]
    batchindices = np.random.choice(ntrain, batchsize, replace=False)

    # before the forward pass, clean the gradient buffers of all parameters
    optimizer.zero_grad()

    # forward pass
    this_batch = Variable(images[batchindices,...].float())
    if(a_cuda):
      this_batch = this_batch.cuda()
    out = novel(this_batch)

    # MSE loss
    label_indices = (labels[:, -1][..., None] == torch.tensor(batchindices)).any(-1).nonzero().squeeze()
    true_out = labels[label_indices,:]
    if(a_cuda):
      true_out = true_out.cuda()

    loss = nloss(out, true_out)
    ep_loss[j] = loss.item()

    # backward pass
    loss.backward()

    # update parameters using SGD
    optimizer.step()

    # remove from memory
    this_batch = None
    true_out = None
    out = None
    gc.collect()
    if(a_cuda):
      torch.cuda.empty_cache()

  # Record training loss
  train_losses.append(np.mean(ep_loss))

  # Calculate and record validation loss using test set
  valid = np.zeros(len(tot_valid))
  for i in range(len(tot_valid)):
    index = int(tot_valid[i])
    labels, images = create_diffbatch(index, True)
    ntest = images.shape[0]
    batchindices = np.random.choice(ntest, batchsize, replace=False)
    this_batch = Variable(images[batchindices,...].float())
    if(a_cuda):
      this_batch = this_batch.cuda()
    out = novel(this_batch)
    label_indices = (labels[:, -1][..., None] == torch.tensor(batchindices)).any(-1).nonzero().squeeze()
    true_out = labels[label_indices,:]
    if(a_cuda):
      true_out = true_out.cuda()
    valid[i] = nloss(out, true_out)

    this_batch = None
    true_out = None
    out = None
    gc.collect()
    if(a_cuda):
      torch.cuda.empty_cache()
  valid_losses.append(np.mean(valid))

  torch.save({
            'epoch': iepoch,
            'model_state_dict': novel.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_losses,
            'valid': valid_losses
            }, path)

In [None]:
# Data extraction and preparation
def create_LSTMbatch(index,train):
  if(train):
    loc = "data/training/"
    req_list = list_train
  else:
    loc = "data/testing/"
    req_list = list_test

  train_images = []
  train_labels = []
  img_yes = False
  imagenum = 0
  for dataset in list_train[index:index+1]:
    dataset = tf.data.TFRecordDataset(loc+dataset, compression_type='')
    for data in dataset:
      frame = open_dataset.Frame()
      frame.ParseFromString(bytearray(data.numpy()))

      # Get image itself
      for index, image in enumerate(frame.images):
        if(image.name == 1):
          img = tf.image.decode_jpeg(image.image)
          img = tf.image.resize(img, [640, 960])
          train_images.append(img)

      # Get label data
      for cam_labels in frame.projected_lidar_labels:
        if(cam_labels.name != 1):
          continue
        for label in cam_labels.labels:
          train_labels.append(np.array([label.box.center_x//2, label.box.center_y//2,
                                          label.box.width//2, label.box.length//2, label.metadata.speed_x, 
                                            label.metadata.speed_y, label.metadata.accel_x,
                                            label.metadata.accel_y, label.type, imagenum]))
          img_yes = True
      
      # Write specialized "empty image" output
      if(not img_yes):
        train_labels.append(np.array([-1,-1,-1,-1,-1,-1,-1,-1,-1,imagenum]))
      else:
        img_yes = False
      imagenum += 1
 
  train_labels = torch.from_numpy(np.array(train_labels))
  train_images = torch.from_numpy(np.array(train_images).transpose(0,3,1,2))
  return train_labels, train_images