In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#import library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import numpy as np
import torch
from PIL import Image
import re


In [3]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

COCO is a large image dataset designed for object detection, segmentation, person keypoints detection, stuff segmentation, and caption generation. This package provides Matlab, Python, and Lua APIs that assists in loading, parsing, and visualizing the annotations in COCO.

In [4]:
%%shell

# Install pycocotools
git clone https://github.com/cocodataset/cocoapi.git
cd cocoapi/PythonAPI
python setup.py build_ext install

Cloning into 'cocoapi'...
remote: Enumerating objects: 975, done.[K
remote: Total 975 (delta 0), reused 0 (delta 0), pack-reused 975[K
Receiving objects: 100% (975/975), 11.72 MiB | 26.98 MiB/s, done.
Resolving deltas: 100% (576/576), done.
running build_ext
cythoning pycocotools/_mask.pyx to pycocotools/_mask.c
  tree = Parsing.p_module(s, pxd, full_module_name)
building 'pycocotools._mask' extension
creating build
creating build/common
creating build/temp.linux-x86_64-3.7
creating build/temp.linux-x86_64-3.7/pycocotools
x86_64-linux-gnu-gcc -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fdebug-prefix-map=/build/python3.7-a56wZI/python3.7-3.7.10=. -fstack-protector-strong -Wformat -Werror=format-security -g -fdebug-prefix-map=/build/python3.7-a56wZI/python3.7-3.7.10=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -fPIC -I/usr/local/lib/python3.7/dist-packages/numpy/core/include -I../common -I/usr/include/pytho



In [5]:
%%shell

# Download TorchVision repo to use some files from
# references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

Cloning into 'vision'...
remote: Enumerating objects: 22445, done.[K
remote: Counting objects: 100% (3226/3226), done.[K
remote: Compressing objects: 100% (798/798), done.[K
remote: Total 22445 (delta 2471), reused 3044 (delta 2348), pack-reused 19219[K
Receiving objects: 100% (22445/22445), 27.08 MiB | 17.70 MiB/s, done.
Resolving deltas: 100% (16532/16532), done.
Note: checking out 'v0.3.0'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by performing another checkout.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -b with the checkout command again. Example:

  git checkout -b <new-branch-name>

HEAD is now at be376084 version check against PyTorch's CUDA version




In [6]:
from engine import train_one_epoch, evaluate
import utils
import transforms as T

from torch.utils.data import Dataset
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

Fasterrcnn expects our data in the form of an image and a dictionary corresponding to it. The dictionary should include bounding boxes and labels at the very least. 

We convert our boxes to the (xmin, ymin, xmax, ymax) format. We assign our labels to all as either 0,1 or 2, accordingly. Finally, we convert everything to PyTorch tensors.


In [7]:
class DetectionDataset(Dataset):

  def __init__(self,root,transforms = None):

    self.root = root
    self.transforms = transforms

    self.images = list(sorted(os.listdir(os.path.join(root,'Image'))))
    self.masks = list(sorted(os.listdir(os.path.join(root,'Annotation'))))

  def __len__(self):
        return len(self.images)
        

  def __getitem__(self,index):

      img_path = os.path.join(self.root,'Image',self.images[index])
      mask_path = os.path.join(self.root,'Annotation',self.masks[index])
      image = Image.open(img_path)   
      f = open(mask_path,'r')
      mask = f.readlines()
      f.close()
      coordinates_pattern = re.compile(r'(\d+)')
      label_pattern = re.compile(r'([A-Z]+)')
      boxes = []
      labels = []
      num_boxes = len(mask)
      # store the bounding box coordinates of corresponding images from annotation file
      for box in mask:
        coordinates = coordinates_pattern.findall(box)
        xmin = min(int(coordinates[0]),int(coordinates[2]))
        xmax = max(int(coordinates[0]),int(coordinates[2]))
        ymin = min(int(coordinates[4]),int(coordinates[6]))
        ymax = max(int(coordinates[4]),int(coordinates[6]))

        boxes.append([xmin, ymin, xmax, ymax])

        label = label_pattern.findall(box)
        if label[0] == 'HINDI':
          labels.append(1)
        elif label[0] == 'ENGLISH':
          labels.append(2) 
        else:
          labels.append(0)
      
      boxes = torch.as_tensor(boxes, dtype=torch.float32)
      labels = torch.as_tensor(labels, dtype=torch.int64)
      iscrowd = torch.zeros(num_boxes, dtype=torch.int64)
      image_id = torch.tensor([index])
      #calculating area of the bounding boxes
      area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
      target = {}

      target["boxes"] = boxes
      target["labels"] = labels
      target["image_id"] = image_id
      target["area"] = area
      target["iscrowd"] = iscrowd
      if self.transforms is not None:
          image, target = self.transforms(image, target)

      return image, target


In [8]:
data_path = '/content/drive/MyDrive/EE626_Project/real_Image_dataset_Detection'

In [9]:
data = DetectionDataset(data_path)


We will now define our model. We are using a ResNet50 backbone with a feature pyramid network for our Fasterrcnn. The backbone is easily customizable. The only parameter we need to pass is the number of classes in our dataset.  In train mode, the model takes images and target dictionary as input and produces a loss_dict as output.



In [10]:
def get_detection_model(num_classes):
  # load a model; pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)  # Using pytorch FasterRCNN for detection 
    return model

In [11]:
def get_transform(train):

    transforms = []
    transforms.append(T.ToTensor())

    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))  # performing random flips to images, to make it train efficiently
        
    return T.Compose(transforms)

In [12]:
dataset_train = DetectionDataset(data_path, get_transform(train=True))
dataset_test = DetectionDataset(data_path, get_transform(train=False))

In [13]:
torch.manual_seed(42)
indices = torch.randperm(len(dataset_train)).tolist()
dataset_train = torch.utils.data.Subset(dataset_train, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

In [14]:
data_loader_train = torch.utils.data.DataLoader(
    dataset_train, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=2, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

  cpuset_checked))


In [15]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [16]:
num_classes = 3     #ENGLISH, HINDI, OTHER

model = get_detection_model(num_classes)
model.to(device)


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


HBox(children=(FloatProgress(value=0.0, max=167502836.0), HTML(value='')))




FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [17]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005) # Optimizer for training

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [18]:
num_epochs = 10

for epoch in range(num_epochs):

    train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=5)

    lr_scheduler.step()
    
    evaluate(model, data_loader_test, device=device)

  cpuset_checked))


Epoch: [0]  [  0/189]  eta: 0:07:30  lr: 0.000032  loss: 2.1829 (2.1829)  loss_classifier: 0.9275 (0.9275)  loss_box_reg: 0.2722 (0.2722)  loss_objectness: 0.9246 (0.9246)  loss_rpn_box_reg: 0.0586 (0.0586)  time: 2.3841  data: 1.4334  max mem: 2225
Epoch: [0]  [  5/189]  eta: 0:02:39  lr: 0.000164  loss: 1.9798 (3.0540)  loss_classifier: 0.8792 (0.8900)  loss_box_reg: 0.2722 (0.2621)  loss_objectness: 0.9246 (1.6643)  loss_rpn_box_reg: 0.0542 (0.2377)  time: 0.8660  data: 0.2474  max mem: 2913
Epoch: [0]  [ 10/189]  eta: 0:02:10  lr: 0.000297  loss: 1.5073 (2.3849)  loss_classifier: 0.7622 (0.7477)  loss_box_reg: 0.2722 (0.2592)  loss_objectness: 0.5913 (1.1916)  loss_rpn_box_reg: 0.0586 (0.1864)  time: 0.7265  data: 0.1375  max mem: 2913
Epoch: [0]  [ 15/189]  eta: 0:01:55  lr: 0.000430  loss: 1.3456 (1.9595)  loss_classifier: 0.5431 (0.6441)  loss_box_reg: 0.3180 (0.3168)  loss_objectness: 0.2663 (0.8486)  loss_rpn_box_reg: 0.0508 (0.1500)  time: 0.6651  data: 0.0970  max mem: 2913


In [20]:
PATH = "/content/drive/MyDrive/EE626_Project/Detection_model/detection_faster_rcnn.pth"
torch.save(model, PATH)