In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import minmax_scale
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from PIL import Image
import cv2

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision import models

from data_utils import get_abs_path
from data_utils import parse_annotations_xml, create_annotations_list
from image_utils import get_bounding_box, create_mask, get_bb_from_mask, resize_img_and_bb

In [None]:
root_dir = get_abs_path(1)
annotations_dir = root_dir / 'data' / 'annotations'
images_dir = root_dir / 'data' / 'images'
resized_images_dir = root_dir / 'data' / 'resized_images'
resized_images_dir.mkdir(exist_ok=True, parents=True)

In [None]:
annotations_list = create_annotations_list(annotations_dir)
df = pd.DataFrame(annotations_list)
df = shuffle(df)
df.insert(3, 'resized_img_filename', '')
df.insert(7, 'class_label', '')
df.insert(12, 'bounding_box', '')

class_idxs = {'speedlimit': 0, 'stop': 1, 'crosswalk': 2, 'trafficlight': 3}
class_labels = {0: 'speedlimit', 1: 'stop', 2: 'crosswalk', 3: 'trafficlight'}
df['class_label'] = df['class'].apply(lambda i: class_idxs[i])

print(df['class'].value_counts())
df.tail(3)

In [None]:
img_width = 300
img_height = 400

for idx, row in df.iterrows():
    img_path = row['img_filename']
    bounding_box = get_bounding_box(row)

    resized_img, resized_bounding_box = resize_img_and_bb(img_path, bounding_box, img_width, img_height)

    resized_img_filename = str(resized_images_dir) + '/' + row['name'] + '.png'
    if os.path.isfile(resized_img_filename):
        cv2.imwrite(resized_img_filename, resized_img)

    df.at[idx, 'resized_img_filename'] = resized_img_filename
    df.at[idx, 'bounding_box'] = np.array([ resized_bounding_box[0],
                                            resized_bounding_box[1],
                                            resized_bounding_box[2],
                                            resized_bounding_box[3]])
df.tail(3)

In [None]:
def plot_img_with_mask(df_row):
    img_filename = df_row['resized_img_filename']
    img = cv2.imread(img_filename)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    mask = create_mask(df_row['width'], df_row['height'], df_row['bounding_box']) # TODO flip width and height
    plt.title(df_row['class'])
    plt.imshow(img)
    plt.imshow(mask, alpha=0.6)
    plt.show()

In [None]:
for i in range(3):
    plot_img_with_mask(df.iloc[i].to_dict())

In [None]:
test_size = 0.2
batch_size = 5

X = df[['resized_img_filename', 'bounding_box']]
y = df['class_label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5)

print('train size: %d val_size: %d test size: %d' % (len(X_train), len(X_val), len(X_test)))

In [None]:
def min_bb(in_bb):
    out_bb = []
    out_bb.append(float(in_bb[0]) / 30)
    out_bb.append(float(in_bb[1]) / 30)
    out_bb.append(float(in_bb[2]) / 40)
    out_bb.append(float(in_bb[3]) / 40)
    return np.array(out_bb)


def max_bb(in_bb):
    out_bb = []
    out_bb.append(float(in_bb[0]) * 30)
    out_bb.append(float(in_bb[1]) * 30)
    out_bb.append(float(in_bb[2]) * 40)
    out_bb.append(float(in_bb[3]) * 40)
    return np.array(out_bb)


def train_transform(x, bb):
    train_transforms = transforms.Compose([
        transforms.RandomChoice([
            transforms.RandomInvert(),
            transforms.GaussianBlur(kernel_size=(3,3)),
            transforms.ColorJitter(brightness=0.5, contrast=0.3, saturation=0.4),
            # transforms.RandomSolarize(threshold=1.0),
            # transforms.RandomPerspective(distortion_scale=0.1)
        ]),
    ])
    x = train_transforms(x)
    return x, bb


def normalization(x):
    normalize = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    x = normalize(x)
    return x


class RoadSignsDataset(Dataset):

    def __init__(self, paths, bounding_boxes, labels, apply_train_transforms=False):
        self.paths = paths.values
        self.bounding_boxes = bounding_boxes.values
        self.labels = labels.values
        self.apply_train_transforms = apply_train_transforms

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        path = self.paths[idx]
        label = self.labels[idx]
        bounding_box = self.bounding_boxes[idx]
        x = Image.open(path)
        if self.apply_train_transforms:
            x, bounding_box = train_transform(x, bounding_box)
        x = normalization(x)
        bounding_box = min_bb(bounding_box)
        return x, label, bounding_box

In [None]:
train_dataset = RoadSignsDataset(X_train['resized_img_filename'], X_train['bounding_box'], y_train, apply_train_transforms=True)
val_dataset = RoadSignsDataset(X_val['resized_img_filename'], X_val['bounding_box'], y_val)
test_dataset = RoadSignsDataset(X_test['resized_img_filename'], X_test['bounding_box'], y_test)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

In [None]:
class BBmodel(nn.Module):

    def __init__(self):
        super(BBmodel, self).__init__()
        resnet = models.resnet18(pretrained=True)
        layers = list(resnet.children())[:9]
        self.features1 = nn.Sequential(*layers[:6])
        self.features2 = nn.Sequential(*layers[6:])

        self.classifier = nn.Sequential(
            nn.BatchNorm1d(512),
            nn.Linear(512, 4),
            nn.Dropout())

        self.bb = nn.Sequential(
            nn.BatchNorm1d(512),
            nn.Linear(512, 4),
            nn.Dropout())

    def forward(self, x):
        x = self.features1(x)
        x = self.features2(x)
        x = F.relu(x)
        x = nn.AdaptiveAvgPool2d((1,1))(x)
        x = x.view(x.shape[0], -1)
        return self.classifier(x), self.bb(x)

In [None]:
def loss_function(pred_label, pred_bb, label, bb, classification_factor, bounding_box_factor):

    classification_loss = F.cross_entropy(  pred_label, label,
                                            weight=torch.Tensor([1.0, 3.0, 3.0, 3.0]).cuda(),
                                            reduction='mean')

    bounding_box_loss = F.l1_loss(  pred_bb, bb,
                                    reduction='mean')

    loss = classification_factor * classification_loss
    loss += bounding_box_factor * bounding_box_loss
    return loss


def IOU_metrics(bb_a, bb_b):
    xA = max(bb_a[0], bb_b[0])
    yA = max(bb_a[2], bb_b[2])
    xB = min(bb_a[1], bb_b[1])
    yB = min(bb_a[3], bb_b[3])

    widthA = bb_a[1] - bb_a[0]
    heightA = bb_a[3] - bb_a[2]
    widthB = bb_b[1] - bb_b[0]
    heightB = bb_b[3] - bb_b[2]
    box_a_area = (widthA + 1) * (heightA + 1)
    box_b_area = (widthB + 1) * (heightB + 1)

    area_of_intersection = (xB - xA + 1) * (yB - yA + 1)
    area_of_union = float(box_a_area + box_b_area - area_of_intersection)
    iou = area_of_intersection / area_of_union
    return iou


def evaluate(model, dataloader, classification_factor, bounding_box_factor, train_model=True):
    total_loss = 0
    total_iou = 0
    samples_count = 0
    true_count = 0
    for x, label, bb in dataloader:
        # predictions
        x = x.cuda().float()
        label = label.cuda()
        bb = bb.cuda().float()
        pred_label, pred_bb = model(x)
        # losses
        loss = loss_function(pred_label, pred_bb, label, bb, classification_factor, bounding_box_factor)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # classification accuracy
        _, pred_label = torch.max(pred_label, 1)
        true_count += pred_label.eq(label).sum().item()
        # object localiozation accuracy
        for i in range(len(bb)):
            bb_a = bb[i]
            bb_b = pred_bb[i]
            iou = IOU_metrics(bb_a, bb_b)
            total_iou += iou.cuda().item()
        # epoch loss
        total_loss += loss
        batch_size = label.shape[0]
        samples_count += batch_size

    total_loss = total_loss / samples_count
    classification_accuracy = true_count / samples_count
    mean_iou = total_iou / samples_count
    return total_loss, classification_accuracy, mean_iou


def train(model, optimizer, train_dataloader, val_dataloader, epochs, classification_factor=1.0, bounding_box_factor=1.0) -> None:
    for i in range(epochs):
        # train
        model.eval()
        # model.train()
        train_loss, train_accuracy, train_iou = evaluate(model, train_dataloader, classification_factor, bounding_box_factor)

        # validate
        model.eval()
        val_loss, val_accuracy, val_iou = evaluate(model, val_dataloader, classification_factor, bounding_box_factor)

        print('Epoch: %d/%d' % (i+1, epochs))
        print('train loss: %.3f train acc: %.3f train iou %.03f val loss: %.3f val acc: %.3f val iou %.3f' % (train_loss, train_accuracy, train_iou, val_loss, val_accuracy, val_iou))

In [None]:
def test_classification(model, dataset):
    labels = []
    preds = []
    true_count = 0
    total_iou = 0
    total_count = len(dataset)
    for i in range(total_count):
        x, label, bounding_box = dataset[i]
        xx = torch.FloatTensor(x[None,])

        model.eval()
        pred_label, pred_bb = model(xx.cuda())

        _, pred = torch.max(pred_label, 1)
        pred_idx = pred[0].item()
        if pred_idx == label:
            true_count += 1

        pred_bb = pred_bb.tolist()[0]
        iou = IOU_metrics(bounding_box, pred_bb)
        total_iou += iou

        labels.append(label)
        preds.append(pred_idx)

    test_accuracy = true_count / total_count
    test_mean_iou = total_iou / total_count
    cm = confusion_matrix(labels, preds)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot()
    plt.title('Test accuracy: %.3f, mean IOU %.3f' % (test_accuracy, test_mean_iou))
    plt.show()


def test_bounding_box(model, test_dataset):
    for i in range(len(test_dataset)):
        x, label, bounding_box = test_dataset[i]
        x = torch.FloatTensor(x[None,])

        model.eval()
        pred_label, pred_bb = model(x.cuda())
        _, pred_label = torch.max(pred_label, 1)

        bounding_box = list(bounding_box)
        pred_bb = pred_bb.int().tolist()[0]
        iou = IOU_metrics(bounding_box, pred_bb)

        if i < 40:
            bounding_box = max_bb(bounding_box)
            pred_bb = max_bb(pred_bb)
            plt.title('True: %s Pred: %s \nIOU: %.3f' % (class_labels[label], class_labels[pred_label.item()], iou))
            # print('true box:', bounding_box)
            # print('pred box:', pred_bb)
            # image
            x = torch.swapaxes(x[0], 0, 2)
            x = torch.swapaxes(x, 0, 1)
            plt.imshow(x)
            # true mask
            img = create_mask(400, 300, bounding_box)
            plt.imshow(img[:,:,0], alpha=0.4)
            # pred mask
            img = create_mask(400, 300, pred_bb, r=0, g=255)
            plt.imshow(img[:,:,1], alpha=0.4)
            plt.show()

In [None]:
model = BBmodel().cuda()
model.eval()
print('Model parameters:', sum(p.numel() for p in model.parameters() if p.requires_grad))

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.002, momentum=0.9)
train(model, optimizer, train_dataloader, val_dataloader, 10, classification_factor=0.3, bounding_box_factor=1.5) #25

In [None]:
test_classification(model, test_dataset)
test_bounding_box(model, test_dataset)