In [None]:
import os
import pandas as pd
import torch
import numpy as np
import math
import torchvision
from collections import defaultdict
from torchvision import models
from torchvision.ops import box_iou
from torchvision.io import read_image
from torchvision.transforms import v2
from torchmetrics.detection.mean_ap import MeanAveragePrecision
import matplotlib.patches as patches
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import clear_output, display
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [None]:
class customDataset(Dataset):
    def __init__(self, dataFilePath, csvFilePath, mode = None, splitPercent = None, transform = None):
      self.dataFilePath = dataFilePath
      self.mode = mode
      self.splitPercent = splitPercent
      self.transform = transform
      self.csv = pd.read_csv(csvFilePath)
      self.imgDictionary = (
        self.csv.groupby('image')
        .apply(lambda x: x[['xmin','ymin','xmax','ymax']].values.tolist())
        .to_dict()
      )
      self.dictionaryKeys = list(self.imgDictionary.keys())
      self.percentToNum = math.floor(len(self.dictionaryKeys)*(self.splitPercent/100))
      if self.mode == 'Train':
        self.dictionaryKeys = self.dictionaryKeys[:len(self.dictionaryKeys)-self.percentToNum]
      elif self.mode == 'Test':
        self.dictionaryKeys = self.dictionaryKeys[-self.percentToNum:]
      else:
        print("Invalid Mode")

    def __len__(self):
        return len(self.dictionaryKeys)

    def __getitem__(self,idx):
        self.idxKey = self.dictionaryKeys[idx]
        self.imgPath = os.path.join(self.dataFilePath,self.idxKey)
        image = read_image(self.imgPath)
        boxes = self.imgDictionary[self.idxKey]
        boxes = torch.tensor(boxes)
        boxes[:,(0,2)] *= 512/image.shape[2]
        boxes[:,(1,3)] *= 512/image.shape[1]
        labels = torch.ones(len(self.imgDictionary[self.idxKey]), dtype = torch.int64)
        target = {"boxes":boxes, "labels":labels}
        if self.transform is not None:
          image = self.transform(image)
        return image, target

imgTransforms = v2.Compose([
    v2.Resize((512,512)),
    v2.ToImage(),
    v2.ToDtype(torch.float32)
])

In [None]:
def collateFunction(data):
  return data

In [None]:
trainData = customDataset('/content/drive/MyDrive/Datasets/Car Object Detection/data/training_images','/content/drive/MyDrive/Datasets/Car Object Detection/data/train_solution_bounding_boxes (1).csv', mode = 'Train', splitPercent = 10, transform = imgTransforms)
trainDataloader = DataLoader(trainData, batch_size = 4, collate_fn = collateFunction)

testData = customDataset('/content/drive/MyDrive/Datasets/Car Object Detection/data/training_images','/content/drive/MyDrive/Datasets/Car Object Detection/data/train_solution_bounding_boxes (1).csv', mode = 'Test', splitPercent = 10, transform = imgTransforms)
testDataloader = DataLoader(trainData, batch_size = 4, collate_fn = collateFunction)

  .apply(lambda x: x[['xmin','ymin','xmax','ymax']].values.tolist())
  .apply(lambda x: x[['xmin','ymin','xmax','ymax']].values.tolist())


In [None]:
model = models.detection.fasterrcnn_resnet50_fpn(pretrained = True)
numOfClasses = 2
inFeatures = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = models.detection.faster_rcnn.FastRCNNPredictor(inFeatures, numOfClasses)
# model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/frcnn_weights.pth', map_location=torch.device('cpu' if torch.cuda.is_available() is False else 'cuda:0')))
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(),0.0001)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 172MB/s]


In [None]:
def meanIoU(predBoxes, predLabels, predScores, gtBoxes, gtLabels, numOfClasses, threshold = 0.5):

  iousByClass = []
  for i in range(numOfClasses):
    gtClsMask = gtLabels == i
    predClsMask = predLabels == i

    gtClsBoxes = gtBoxes[gtClsMask]
    predClsBoxes = predBoxes[predClsMask]

    if len(gtClsBoxes) == 0 or len(predClsBoxes) == 0:
      continue

    ious = box_iou(gtClsBoxes, predClsBoxes)

    maxIouPerGtBox = ious.max(dim=1).values
    clsIou = maxIouPerGtBox.mean()

    iousByClass.append(clsIou)
  if len(iousByClass) == 0:
    return 0.0
  return sum(iousByClass)/len(iousByClass)

In [None]:
def test(dataloader, model, numOfClasses, iou_threshold):
  model.eval()
  metric = MeanAveragePrecision()
  size = len(dataloader.dataset)
  batchSize = len(dataloader)

  imageList = []
  groundTruthList = []
  ious = []
  for data in dataloader:
    for dt in data:
      imageList.append(dt[0].to(device))
      groundTruthList.append({'boxes' : dt[1]['boxes'].to(device), 'labels' : dt[1]['labels'].to(device)})
    break
  with torch.no_grad():
    preds = model(imageList)

  for i in range(len(preds)):
    iou = meanIoU(preds[i]['boxes'], preds[i]['labels'], preds[i]['scores'], groundTruthList[i]['boxes'], groundTruthList[i]['labels'], numOfClasses, threshold = iou_threshold)
    ious.append(iou)

  mIoU = sum(ious)/len(ious)
  metric.update(preds, groundTruthList)
  map = metric.compute()
  return map['map'], mIoU
  # print(f"mAP50-95: {map} mIoU: {}")

In [None]:
def plotGraph(numOfEpochs, epochs, maps, mious):
  clear_output(wait=True)
  plt.figure(figsize = (7,5))

  plt.plot(epochs, maps, label = 'mAP', color = 'red')
  plt.plot(epochs, mious, label = 'mIoU', color = 'green')
  plt.xlim(1,numOfEpochs)
  plt.ylim(0,1)
  plt.xlabel('Epocs')
  plt.ylabel('mAP and mIoU')
  plt.title('mAP and mIoU over epoch')
  plt.legend()
  plt.grid(True)
  plt.show()

In [None]:
def train(dataloader, model, optimizer):
  model.train()
  size = len(dataloader.dataset)
  batchSize = len(dataloader)
  numOfBatch = 0
  divider = 10

  for data in trainDataloader:
    imgs = []
    targets = []
    for img, target in data:
      imgs.append(img.to(device))
      tarDict = {}
      tarDict['boxes'] = target['boxes'].to(device)
      tarDict['labels'] = target['labels'].to(device)
      targets.append(tarDict)
    pred = model(imgs, targets)
    loss = sum(loss for loss in pred.values())
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    numOfBatch += 1

    if numOfBatch % divider == 0:
        loss = loss.item(),
        print(f"loss: {loss}  [{numOfBatch}/{batchSize}]")

In [None]:
numOfEpochs = 30
out1 = widgets.Output(layout = widgets.Layout(height = '512px', overflow = 'auto'))
out2 = widgets.Output()
box = widgets.HBox([out1, out2], layout = widgets.Layout(justify_content = 'space-between', gap = '5px'))
display(box)

epochs, maps, mious = [], [], []
for i in range(numOfEpochs):
  with out1:
    print(f"\nEpoch {i+1}\n-------------------------------")
    print("Training\n")
    train(trainDataloader, model, optimizer)
    print("\nTesting\n")
    mAP, mIoU = test(testDataloader, model, 2, 0.6)
    print(f'mAP50-95:: {mAP}, mIoU: {mIoU}')
  with out2:
    maps.append(mAP.cpu())
    mious.append(mIoU.cpu())
    epochs.append(i+1)
    plotGraph(numOfEpochs, epochs, maps, mious)

HBox(children=(Output(layout=Layout(height='512px', overflow='auto')), Output()), layout=Layout(justify_conten…

In [None]:
def filterByScore(pred, threshold, nms = True):
  outputs = []

  if nms:
    nmsOutputs = []
    for i in range(len(pred)):
      boxes = pred[i]['boxes']
      labels = pred[i]['labels']
      scores = pred[i]['scores']

      filter = pred[i]['scores'] > threshold
      output = {
        'boxes' : boxes[filter],
        'labels' : labels[filter],
        'scores' : scores[filter]
        }
      outputs.append(output)

      nms = torchvision.ops.nms(output['boxes'],output['scores'], 0.7)
      nmsOutput = {
          'boxes' : boxes[nms],
          'labels' : labels[nms],
          'scores' : scores[nms]
      }
      nmsOutputs.append(nmsOutput)
    return nmsOutputs
  else:
    for i in range(len(pred)):
      boxes = pred[i]['boxes']
      labels = pred[i]['labels']
      scores = pred[i]['scores']

      filter = pred[i]['scores'] > threshold
      output = {
        'boxes' : boxes[filter],
        'labels' : labels[filter],
        'scores' : scores[filter]
        }
      outputs.append(output)
      return outputs

In [None]:
def plotBoxes(image, pred, classNames):
  image = image.to(torch.uint8)
  image = image.permute(1,2,0)
  image = image.cpu()

  boxes = pred['boxes'].cpu().detach()
  boxes[:,(0,2)] *= image.shape[1]/512
  boxes[:,(1,3)] *= image.shape[0]/512
  labels = pred['labels'].cpu().detach()
  scores = pred['scores'].cpu().detach()

  fig, ax = plt.subplots(1, figsize=(12, 8))
  ax.imshow(image)
  ax.set_xlim([0, image.shape[1]])
  ax.set_ylim([image.shape[0], 0])

  for box, label, score in zip(boxes, labels, scores):
    x1, y1, x2, y2 = box.tolist()
    width, height = x2 - x1, y2 - y1

    rect = patches.Rectangle((x1,y1), width, height, linewidth=2, edgecolor='r', facecolor='none')
    ax.add_patch(rect)
    className = classNames[label]
    text = f'{className} : {score:.3f}'
    ax.text(x1, y1-5, text, fontsize = 12, color = 'white', backgroundcolor = 'red')

  ax.axis('off')
  plt.show()