# Mask-RCNN Model Implemented using PyTorch

***

**Author:** Shane Cooke

**Date:** 30 Sept 2022

**References:**
* https://medium.com/fullstackai/how-to-train-an-object-detector-with-your-own-coco-dataset-in-pytorch-319e7090da5
* https://towardsdatascience.com/train-mask-rcnn-net-for-object-detection-in-60-lines-of-code-9b6bbff292c3
* https://towardsdatascience.com/how-to-use-datasets-and-dataloader-in-pytorch-for-custom-text-data-270eed7f7c00
* https://www.kaggle.com/code/abhishek/train-your-own-mask-rcnn/notebook

***

## Imports & Data Preparation

#### Google Colab

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
cd drive/MyDrive/MastersProject/Code/

/content/drive/MyDrive/MastersProject/Code


#### Install Dependencies

In [5]:
#!pip install albumentations==0.4.6
#!pip install pycocotools --quiet

# Clone TorchVision repo and copy helper files
#!git clone https://github.com/pytorch/vision.git
#%cd vision
#!git checkout v0.3.0
#%cd ..
#!cp vision/references/detection/utils.py ./
#!cp vision/references/detection/transforms.py ./
#!cp vision/references/detection/coco_eval.py ./
#!cp vision/references/detection/engine.py ./
#!cp vision/references/detection/coco_utils.py ./

#### Imports

In [6]:
# Import Torch Libraries
import torch
import torchvision
from torchvision import models, transforms
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

# Import Useful Libraries
import os
from engine import train_one_epoch, evaluate
import utils
import transforms as T

# Import Image/File Work
from PIL import Image
import json
import numpy as np


#### Defining Input/Output Files & Directories

In [7]:
classes = ('NonOverfilled', 'Overfilled')
num_classes = 3
ds = "ASL" # ASL or FEL

#### Dataset Preparation

In [8]:
def DataPrep(image_path, annot_path):
   
    jpg_files = [pos_jpg for pos_jpg in os.listdir(image_path) if pos_jpg.endswith('.jpg')]
    
    with open(annot_path) as json_file:
        annotations = json.load(json_file)
    
    return jpg_files, annotations

In [9]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self, imSize, imDir, imNames, annots, transforms=None):
        self.height = imSize[0]
        self.width = imSize[1]
        self.imDir = imDir
        self.imNames = imNames
        self.annots = annots
        self.transform = transforms
    
    def __getitem__(self, index):
        image_name = self.imNames[index]
        image_path = f"{self.imDir}{image_name}"

        image = Image.open(image_path).convert('RGB')
        image = image.resize((self.width, self.height), resample=Image.BILINEAR)

        for item in self.annots['images']:
            if item['file_name'] == image_name:
                imageID = item['id']
                break

        boxes = []
        area = []
        iscrowd = []
        labels = []
        masks = []
        
        for annotation in self.annots['annotations']:
            if annotation['image_id'] == imageID:
                xmin = annotation['bbox'][0]
                ymin = annotation['bbox'][1]
                xmax = xmin + annotation['bbox'][2]
                ymax = ymin + annotation['bbox'][3]
                
                # Append Values to Arrays
                area.append(annotation['area'])
                iscrowd.append(annotation['iscrowd'])
                labels.append(annotation['category_id'])
                boxes.append((xmin, ymin, xmax, ymax))
                #segmen.append(annotation['segmentation'])

        # Transform data into Tensor Format
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        imageID = torch.as_tensor(imageID, dtype=torch.int64) # torch.tensor([imageID])
        area = torch.as_tensor(area, dtype=torch.float32)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)
        
        targets = {}
        targets['boxes'] = boxes
        targets['labels'] = labels
        targets['image_id'] = imageID
        targets['area'] = area
        targets['iscrowd'] = iscrowd
        targets['masks'] = masks

        if self.transform is not None:
            image, targets = self.transform(image, targets)

        return image, targets

    def __len__(self):
        return len(self.imNames)


In [10]:
def get_transform():
    transforms = []
    transforms.append(T.ToTensor())
    return T.Compose(transforms)

#### Training Data Loader

In [11]:
image_path = f'./Dataset/{ds}/Train/'
annot_path = f'./Dataset/{ds}/{ds}_train_abs.json'
train_imNames, train_annotations = DataPrep(image_path, annot_path)
imSize = [480, 704]

train_data = Dataset(imSize, image_path, train_imNames, train_annotations, get_transform())
print('Length of dataset: ', len(train_data), '\n')

image, annotations = train_data[6]
print('Image shape: ', image.shape)                                                                   
print('Annotation Example: ', annotations)
print('Image type: ',image.dtype)

batchSize = 4
workers = 2
train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=batchSize, shuffle=True, num_workers=workers, collate_fn=utils.collate_fn)

Length of dataset:  1602 

Image shape:  torch.Size([3, 480, 704])
Annotation Example:  {'boxes': tensor([[316.9700, 242.5500, 464.1000, 442.1700],
        [ 39.2200, 172.6700, 169.8400, 430.9600]]), 'labels': tensor([2, 2]), 'image_id': tensor(1416), 'area': tensor([29370.1094, 33738.3789]), 'iscrowd': tensor([0, 0])}
Image type:  torch.float32


In [12]:
with open(annot_path) as json_file:
    annotations = json.load(json_file)

points = annotations['annotations'][1]['segmentation'][0]
points = tuple(points)

print(points)

(383.46, 198.1, 379.89, 216.15, 381.08, 217.74, 376.92, 235.39, 385.05, 240.55, 397.35, 241.74, 401.51, 221.11, 405.48, 221.91, 409.05, 212.78, 412.03, 211.79, 412.42, 207.43, 406.87, 203.06, 393.98, 198.1, 388.62, 197.51, 386.04, 195.92)


#### Validation Data Loader

In [None]:
image_path = f'./Dataset/{ds}/Val/'
annot_path = f'./Dataset/{ds}/{ds}_val_abs.json'
val_imNames, val_annotations = DataPrep(image_path, annot_path)
imSize = [480, 704]

val_data = Dataset(imSize, image_path, val_imNames, val_annotations, get_transform())

batchSize = 2
workers = 2
val_data_loader = torch.utils.data.DataLoader(val_data, batch_size=batchSize, shuffle=False, num_workers=workers, collate_fn=utils.collate_fn)

***
## Training

#### Define Model

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print(device)

cuda


In [None]:
model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights=models.detection.MaskRCNN_ResNet50_FPN_V2_Weights.DEFAULT)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = MaskRCNNPredictor(in_features, num_classes)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.02, momentum=0.9, weight_decay=0.0001)

lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[6,10], gamma=0.1)

Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_v2_coco-73cbd019.pth" to /root/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_v2_coco-73cbd019.pth


  0%|          | 0.00/177M [00:00<?, ?B/s]

TypeError: ignored

#### Training Function

In [None]:
epochs = 5

for epoch in range(epochs):
    print("\n\n")
    train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=20)
    lr_scheduler.step()
    evaluate(model, val_data_loader, device=device)

torch.save(model, f'./Models/trained_{ds}_model_Epoch{epochs}.pt')

print('\n\nTraining is now finished')




Epoch: [0]  [  0/401]  eta: 1:20:11  lr: 0.000070  loss: 1.5091 (1.5091)  loss_classifier: 1.2319 (1.2319)  loss_box_reg: 0.2241 (0.2241)  loss_objectness: 0.0484 (0.0484)  loss_rpn_box_reg: 0.0046 (0.0046)  time: 11.9997  data: 1.9829  max mem: 7120
Epoch: [0]  [ 20/401]  eta: 0:13:58  lr: 0.001069  loss: 0.6239 (0.8403)  loss_classifier: 0.3855 (0.6215)  loss_box_reg: 0.1455 (0.1637)  loss_objectness: 0.0334 (0.0491)  loss_rpn_box_reg: 0.0055 (0.0061)  time: 1.7101  data: 0.3700  max mem: 7731
Epoch: [0]  [ 40/401]  eta: 0:10:47  lr: 0.002068  loss: 0.3886 (0.6321)  loss_classifier: 0.1959 (0.4151)  loss_box_reg: 0.1895 (0.1776)  loss_objectness: 0.0161 (0.0343)  loss_rpn_box_reg: 0.0037 (0.0051)  time: 1.3687  data: 0.0150  max mem: 7731
Epoch: [0]  [ 60/401]  eta: 0:09:26  lr: 0.003067  loss: 0.3096 (0.5264)  loss_classifier: 0.1041 (0.3151)  loss_box_reg: 0.1895 (0.1787)  loss_objectness: 0.0131 (0.0283)  loss_rpn_box_reg: 0.0019 (0.0043)  time: 1.3877  data: 0.0151  max mem: 7