In [1]:
from pickletools import optimize
import torch, torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import os
from skimage import io, transform

from model import *
from utils import write_results, unique, bbox_iou
from dataset import filter_labels, DetectionDataset, Rescale, ToTensor, Normalise

plt.rcParams['figure.figsize'] = [15,15]

## Rejig Custom Dataset

**Objective:** We want each label to be [x, y, w, h, obj?, c_1, c_2, ..., c_n]

### First find out n_unique classes/categories

In [2]:
# df = pd.read_json("D:/Datasets/bdd100k/labels/det_20/det_train.json")
# filtered_labels = filter_labels("D:/Datasets/bdd100k/labels/det_20/det_train.json")

{'bus', 'car', 'pedestrian', 'traffic light', 'traffic sign', 'truck', 'bicycle'}

In [3]:
classes = [
    'bus', 'car', 'pedestrian', 'traffic light', 
    'traffic sign', 'truck', 'bicycle']

## Load Dataset

### Testing manual pad transform

In [4]:
class Pad(object):
    """
    Add padding to image

    Args:
        output_size (tuple or int): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
    """
    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple)) # make sure output size is EITHER int or tuple
        self.output_size = output_size

    def __call__(self, sample):
        image, categories, bboxes = sample["image"], sample["categories"], sample["bboxes"]

        img_w, img_h = image.shape[1], image.shape[0]
        w, h = self.output_size

        # calculate new width and height
        new_w = int(img_w * min(w/img_w, h/img_h))
        new_h = int(img_h * min(w/img_w, h/img_h))
        resized_image = cv2.resize(image, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
        
        canvas = np.full((self.output_size[1], self.output_size[0], 3), 128)

        canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image

        return {"image": canvas, "categories": categories, "bboxes": bboxes}

In [5]:
filtered_labels = filter_labels("det_train_shortened.json")

train_data = DetectionDataset(
    label_dict=filtered_labels,
    root_dir='images/',
)
## load custom dataset + transforms
transformed_train_data = DetectionDataset(
    label_dict=filtered_labels,
    root_dir='images/',
    transform=transforms.Compose([
        Rescale(608),
        Normalise(0.5, 0.5),
        Pad((608,608)),
        ToTensor()
    ])
)

## dataloader
train_loader = DataLoader(
    transformed_train_data,
    batch_size=1,
    shuffle=True,
    num_workers=0
)

## Define Network

In [6]:
net = Net(cfgfile="cfg/model.cfg")

In [7]:
class TestNet(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)     # in 1 , out 6, 5x5 kernel
        self.conv2 = nn.Conv2d(6, 16, 5)    # in 6, out 16, 5x5 kernel

        self.pool = nn.MaxPool2d(2, 2)      # this time define own maxpooling
        
        # an affine op: y = Wx + b
        self.fc1 = nn.Linear(16*5*5, 120)   # 5x5 is image dim
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # max pooling over a (2,2) window
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dims except batch dim
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x) # no activation on final layer -> output
        return x
testnet = TestNet()

## Define Loss Function and Optimiser

In [8]:
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
criterion = ...

## Train Network

In [9]:
CUDA = torch.cuda.is_available()
for i, data in enumerate(train_loader):
    input_img, cat, bboxes = data.values()

    optimizer.zero_grad()
    #plt.imshow(input_img)
    #input_img = norm_with_padding(input_img, 608)
    outputs = net(Variable(input_img.float()), CUDA)
    

In [10]:
outputs[...,0] # select first bbox attr from all bbox tensors

tensor([[ 12.6306,  16.6703,  16.6036,  ..., 604.9901, 604.6963, 603.0175]])

## Troubleshooting bboxes

In [11]:
outputs = torch.load("pred.pt")

In [12]:
outputs = outputs*(outputs[:,:,4] > 0.5).float().unsqueeze(2)

In [13]:
outputs.shape

torch.Size([1, 10647, 85])

In [14]:
box_corner = outputs.new(outputs.shape)
box_corner[:,:,0] = (outputs[:,:,0] - outputs[:,:,2]/2)
box_corner[:,:,1] = (outputs[:,:,1] - outputs[:,:,3]/2)
box_corner[:,:,2] = (outputs[:,:,0] + outputs[:,:,2]/2)
box_corner[:,:,3] = (outputs[:,:,1] + outputs[:,:,3]/2)
# replace in the prediction tensor
outputs[:,:,:4] = box_corner[:,:,:4]

In [15]:
image_pred = outputs[0] # for each image in batch

In [16]:
image_pred.shape

torch.Size([10647, 85])

In [17]:
image_pred[:,5:5+80].shape

torch.Size([10647, 80])

In [18]:
torch.max(torch.tensor([1, 2, 3, 4, 5]), 0)

torch.return_types.max(
values=tensor(5),
indices=tensor(4))

In [19]:
max_conf, max_conf_idx = torch.max(image_pred[:,5:5+80], 1)

In [20]:
max_conf.shape

torch.Size([10647])

In [21]:
max_conf = max_conf.float().unsqueeze(1) # max_conf (1, 1, 85)
max_conf_score = max_conf_idx.float().unsqueeze(1) # max_conf_score (1, 1, 85)

In [22]:
image_pred[:,:5].shape

torch.Size([10647, 5])

In [23]:
max_conf.shape

torch.Size([10647, 1])

In [24]:
max_conf_score.shape

torch.Size([10647, 1])

In [25]:
seq = (image_pred[:,:5], max_conf, max_conf_score)
image_pred = torch.cat(seq, 1)
image_pred.shape

torch.Size([10647, 7])

In [26]:
image_pred[:,4]

tensor([0., 0., 0.,  ..., 0., 0., 0.])

In [27]:
non_zero_idx = (torch.nonzero(image_pred[:, 4]))

In [28]:
non_zero_idx.shape

torch.Size([15, 1])

In [29]:
image_pred_ = image_pred[non_zero_idx.squeeze(), :].view(-1,7)

In [30]:
image_pred_

tensor([[260.4430,  96.7705, 370.6997, 145.4609,   0.9891,   0.8380,   7.0000],
        [268.3782,  94.7033, 374.9456, 145.8965,   0.7709,   0.8236,   7.0000],
        [ 59.0262, 109.7473, 309.9195, 266.9929,   0.7363,   0.9938,   1.0000],
        [ 91.1993, 100.4477, 304.9465, 276.3017,   0.5328,   0.9873,   1.0000],
        [ 63.6384, 119.8179, 309.9497, 289.3002,   0.9904,   0.9980,   1.0000],
        [ 87.8459, 116.1371, 307.0424, 289.9205,   0.9936,   0.9993,   1.0000],
        [ 71.3474, 173.7214, 168.3941, 327.2916,   0.9487,   0.9987,  16.0000],
        [ 67.1713, 162.9582, 173.7498, 338.0852,   0.9998,   0.9992,  16.0000],
        [ 71.9760, 157.9328, 192.1129, 335.5837,   0.8839,   0.9954,  16.0000],
        [ 69.0054, 183.4312, 167.7904, 336.2758,   0.9358,   0.9983,  16.0000],
        [ 67.9223, 174.6987, 171.9729, 341.0680,   0.9996,   0.9992,  16.0000],
        [ 72.0152, 174.0368, 192.1688, 343.2910,   0.8491,   0.9941,  16.0000],
        [258.3818,  98.5464, 371.5767, 1

In [58]:
nms_conf = 0.4

conf_sort_idx = torch.sort(image_pred_[:,4], descending=True)[1]
image_pred_ordered = image_pred_[conf_sort_idx]

for idx, pred in enumerate(image_pred_ordered):
    try:
        ious = bbox_iou(pred.unsqueeze(0), image_pred_ordered[idx+1:])
    except ValueError:
        break

    except IndexError:
        break

    iou_mask = (ious < nms_conf).float().unsqueeze(1)
    image_pred_ordered[idx+1:] *= iou_mask

    non_zero_idx = torch.nonzero(image_pred_ordered[:,4]).squeeze()
    image_pred_ordered = image_pred_ordered[non_zero_idx].view(-1,7)

In [59]:
image_pred_ordered

tensor([[ 67.1713, 162.9582, 173.7498, 338.0852,   0.9998,   0.9992,  16.0000],
        [ 87.8459, 116.1371, 307.0424, 289.9205,   0.9936,   0.9993,   1.0000],
        [ 63.6384, 119.8179, 309.9497, 289.3002,   0.9904,   0.9980,   1.0000],
        [260.4430,  96.7705, 370.6997, 145.4609,   0.9891,   0.8380,   7.0000],
        [258.3818,  98.5464, 371.5767, 144.0120,   0.9728,   0.8982,   7.0000]])

In [None]:
for idx, pred in enumerate(image_pred_ordered):
    try:
        ious = bbox_iou(pred.unsqueeze(0), image_pred_ordered[idx+1:])
    except ValueError:
        break

    except IndexError:
        break

In [54]:
bbox_iou(image_pred_ordered[1].unsqueeze(0), image_pred_ordered[2].unsqueeze(0))

tensor([0.8703])