In [1]:
import os
import torch
import torchvision
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import cv2
from matplotlib import pyplot as plt
import torchvision.transforms as transforms
from datetime import datetime
import numpy as np
import os
import random
import argparse
from PIL import Image
import csv

In [2]:
# dataset definition
class myDataset(Dataset):
    # load the dataset
    def __init__(self,root,transforms=None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(os.path.join(root, "imgs"))))


    # get a row at an index
    def __getitem__(self, idx):
        idx = idx -1
        img_path = os.path.join(self.root, "imgs", self.imgs[idx])
        img = Image.open(img_path).convert("RGB")
        
        self.data = open(os.path.join(self.root, "annotations.csv"))
        data = csv.reader(self.data)
        
        boxes = []
        row = data.__next__()
        
        for x in range(idx+1):
            row = data.__next__()
        
        x1 = int(row[1])
        y1 = int(row[2])
        x2 = int(row[3])
        y2 = int(row[4])
        label = int(row[5])
        boxes.append([x1, y1, x2, y2])
        
        self.data.close()
        
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.tensor([label], dtype=torch.int64)
        
        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target
    
    def __len__(self):
        return len(self.imgs)

In [3]:
import detection.transforms as T

def get_transform(train):
    transforms = []
    transforms.append(T.PILToTensor())
    transforms.append(T.Normalize())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [4]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [5]:
num_classes = 9

In [6]:
dataset = myDataset('db_lisa_tiny', get_transform(train=True))
dataset_test = myDataset('db_lisa_tiny', get_transform(train=False))

In [7]:
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

In [8]:
import detection.utils as utils
data_loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, num_workers=1,collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=1,collate_fn=utils.collate_fn)

In [9]:
model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn()

In [10]:
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (0): ConvNormActivation(
        (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): FrozenBatchNorm2d(16, eps=1e-05)
        (2): Hardswish()
      )
      (1): InvertedResidual(
        (block): Sequential(
          (0): ConvNormActivation(
            (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
            (1): FrozenBatchNorm2d(16, eps=1e-05)
            (2): ReLU(inplace=True)
          )
          (1): ConvNormActivation(
            (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d(16, eps=1e-05)
          )
        )
      )
      (2): InvertedResidual(
        (block): Seque

In [11]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,momentum=0.9, weight_decay=0.0005)

In [12]:
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1)

In [13]:
num_epochs = 8

In [14]:
from detection.engine import train_one_epoch, evaluate

for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        evaluate(model, data_loader_test, device=device)



Epoch: [0]  [  0/425]  eta: 1:15:34  lr: 0.000017  loss: 5.1365 (5.1365)  loss_classifier: 4.4335 (4.4335)  loss_box_reg: 0.0003 (0.0003)  loss_objectness: 0.6969 (0.6969)  loss_rpn_box_reg: 0.0059 (0.0059)  time: 10.6688  data: 0.3325
Epoch: [0]  [ 10/425]  eta: 0:53:07  lr: 0.000135  loss: 5.1181 (5.0966)  loss_classifier: 4.4210 (4.3962)  loss_box_reg: 0.0002 (0.0007)  loss_objectness: 0.6939 (0.6943)  loss_rpn_box_reg: 0.0041 (0.0054)  time: 7.6797  data: 0.0383


KeyboardInterrupt: 

In [None]:
torch.save(model,"model_real_fasterrcnn_mobilenet_v3_large_fpn")

In [None]:
trainedModel = torch.load("model_real_fasterrcnn_mobilenet_v3_large_fpn")

In [None]:
trainedModel.eval()

In [None]:
img, targets = next(iter(data_loader_test))

In [None]:
trainedModel(img)

In [None]:
imgs = img[3]
imgs = np.transpose(imgs, (1,2,0)) #tt = np.transpose(tt,(1,2,0))
plt.figure()
plt.imshow(imgs)

plt.show()