[Reference](https://www.kaggle.com/code/benihime91/pytorch-fasterrcnn)

### Imports

In [1]:
# %load_ext autoreload
# %autoreload 2

In [2]:
import os
import numpy as np
import pandas as pd
from PIL import Image

# Sk learn
from sklearn.model_selection import train_test_split

# PyTorch
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision import transforms, datasets, models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor


In [3]:
import engine
import utils

In [4]:
# Count the total number of images

img_list = list(sorted(os.listdir('./data/images/')))
len(img_list)

853

#### Split the data

In [5]:
train_img, test_img = train_test_split(img_list, test_size = 0.2, random_state=42)
print(f"No. of training images: {len(train_img)}")
print(f"No. of test images: {len(test_img)}")

No. of training images: 682
No. of test images: 171


### DataSet Class

In [6]:
from bs4 import BeautifulSoup
from torchvision import transforms as T

def collate_fn(batch):
    data_list, label_list = [], []
    for _data, _label in batch:
        data_list.append(_data)
        label_list.append(_label)
    return torch.Tensor(data_list), torch.LongTensor(label_list)


def generate_box(obj):
    xmin = int(obj.find('xmin').text)
    ymin = int(obj.find('ymin').text)
    xmax = int(obj.find('xmax').text)
    ymax = int(obj.find('ymax').text)
    return [xmin, ymin, xmax, ymax]

def generate_labels(obj):
    if obj.find('name').text == "with_mask":
        return 1
    elif obj.find('name').text == "mask_weared_incorrect":
        return 2
    elif obj.find('name').text == "without_mask":
        return 3
    return 0

def get_transform():
    transforms = []
    transforms.append(T.PILToTensor())
    transforms.append(T.ConvertImageDtype(torch.float),)
    return T.Compose(transforms)


In [7]:
class MaskDataset(Dataset):
    def __init__(self, img_dir, annot_dir, img_list, transforms):
        self.transforms = transforms
        self.imgs = img_list
        self.img_dir = img_dir
        self.annot_dir = annot_dir

    def __len__(self):
        return len(self.imgs)

    def __getitem__(self, idx):
        f_image = 'maksssksksss'+ str(idx) + '.png'
        f_label = 'maksssksksss'+ str(idx) + '.xml'
        
        img_path = os.path.join(self.img_dir, f_image)
        label_path = os.path.join(self.annot_dir, f_label)
        
        # Open the image
        img = Image.open(img_path).convert("RGB")
        
        target = self.__generate_target(idx, label_path)
        
        if self.transforms is not None:
            # img, target = self.transforms(img, target)
            img = self.transforms(img)

        return img, target
    
    
    @staticmethod
    def __generate_target(img_id, file):
        with open(file, 'r') as f:
            data = f.read()
            soup = BeautifulSoup(data, 'xml')
            objects = soup.find_all('object')
            
            num_objects = len(objects)
            
            # Bounding boxes
            boxes = []
            labels = []
            
            for i in objects:
                boxes.append(generate_box(i))
                labels.append(generate_labels(i))
        
            # Convert to tensor
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)
            
            img_id = torch.tensor(img_id)
            
            # Area is width and height
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
            iscrowd = torch.zeros((num_objects,), dtype=torch.int64)
            
            # Return a dictionary format
            target = {}
            target['boxes'] = boxes
            target['labels'] = labels
            target['image_id'] = img_id
            target['area'] = area
            target['iscrowd'] = iscrowd
            
            return target

### DataLoader Class

In [11]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [12]:
train_dataset = MaskDataset(img_dir= './data/images/',
                        annot_dir= './data/annotations/',
                        img_list= train_img,
                        transforms= get_transform())

In [13]:
test_dataset = MaskDataset(img_dir='./data/images/',
                            annot_dir= './data/annotations/',
                            img_list= test_img,
                            transforms= get_transform())

In [19]:
train_loader = DataLoader(train_dataset, batch_size = 4, shuffle = True, num_workers = 0, collate_fn=collate_fn)


In [20]:
test_loader = DataLoader(test_dataset, batch_size = 1, shuffle = False, num_workers = 0, collate_fn = collate_fn)

### Define Model

In [21]:
def model_instance_segmentation(num_classes: int, pretrained: bool = True):
    
    # Load pre-trained model
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=pretrained)
    
    
    # replace the classifier with a new one, that has
    # num_classes which is user-defined
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    return model

### Create Training Loop

In [22]:
def train_model(epochs = 10):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    
    num_classes = 4
    
    model = model_instance_segmentation(num_classes)
    model.to(device)
    
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.005)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    
    # Training loop
    for epoch in range(epochs):
        engine.train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq=10)
        lr_scheduler.step()
        engine.evaluate(model, test_loader, device=device)
        
    torch.save(model.state_dict(), "checkpoint.pth")
        

In [23]:
train_model()

Epoch: [0]  [  0/171]  eta: 0:51:34  lr: 0.000034  loss: 1.8886 (1.8886)  loss_classifier: 1.4287 (1.4287)  loss_box_reg: 0.1287 (0.1287)  loss_objectness: 0.2835 (0.2835)  loss_rpn_box_reg: 0.0477 (0.0477)  time: 18.0949  data: 0.0404
Epoch: [0]  [ 10/171]  eta: 0:36:02  lr: 0.000328  loss: 1.7827 (1.5732)  loss_classifier: 1.2121 (1.1502)  loss_box_reg: 0.1438 (0.1573)  loss_objectness: 0.1752 (0.2300)  loss_rpn_box_reg: 0.0309 (0.0357)  time: 13.4304  data: 0.0395
Epoch: [0]  [ 20/171]  eta: 0:32:31  lr: 0.000622  loss: 0.9439 (1.1671)  loss_classifier: 0.5411 (0.7826)  loss_box_reg: 0.1842 (0.1987)  loss_objectness: 0.1078 (0.1545)  loss_rpn_box_reg: 0.0193 (0.0313)  time: 12.6631  data: 0.0377
Epoch: [0]  [ 30/171]  eta: 0:30:16  lr: 0.000916  loss: 0.6890 (1.0181)  loss_classifier: 0.3553 (0.6459)  loss_box_reg: 0.2617 (0.2268)  loss_objectness: 0.0397 (0.1199)  loss_rpn_box_reg: 0.0114 (0.0255)  time: 12.5853  data: 0.0354
Epoch: [0]  [ 40/171]  eta: 0:28:37  lr: 0.001210  loss:



creating index...
index created!
Test:  [  0/171]  eta: 0:02:07  model_time: 0.7335 (0.7335)  evaluator_time: 0.0031 (0.0031)  time: 0.7430  data: 0.0064
Test:  [100/171]  eta: 0:00:54  model_time: 0.7874 (0.7585)  evaluator_time: 0.0009 (0.0016)  time: 0.7914  data: 0.0079
Test:  [170/171]  eta: 0:00:00  model_time: 0.7364 (0.7580)  evaluator_time: 0.0009 (0.0015)  time: 0.7707  data: 0.0077
Test: Total time: 0:02:11 (0.7676 s / it)
Averaged stats: model_time: 0.7364 (0.7580)  evaluator_time: 0.0009 (0.0015)
Accumulating evaluation results...
DONE (t=0.04s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.301
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.571
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.276
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.278
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.357
 Average Precisio



creating index...
index created!
Test:  [  0/171]  eta: 0:01:58  model_time: 0.6884 (0.6884)  evaluator_time: 0.0013 (0.0013)  time: 0.6957  data: 0.0061
Test:  [100/171]  eta: 0:00:51  model_time: 0.7360 (0.7176)  evaluator_time: 0.0009 (0.0014)  time: 0.7368  data: 0.0079
Test:  [170/171]  eta: 0:00:00  model_time: 0.6918 (0.7196)  evaluator_time: 0.0009 (0.0013)  time: 0.7381  data: 0.0076
Test: Total time: 0:02:04 (0.7289 s / it)
Averaged stats: model_time: 0.6918 (0.7196)  evaluator_time: 0.0009 (0.0013)
Accumulating evaluation results...
DONE (t=0.03s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.419
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.678
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.495
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.362
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.579
 Average Precisio



creating index...
index created!
Test:  [  0/171]  eta: 0:02:00  model_time: 0.6961 (0.6961)  evaluator_time: 0.0009 (0.0009)  time: 0.7037  data: 0.0067
Test:  [100/171]  eta: 0:00:51  model_time: 0.7407 (0.7232)  evaluator_time: 0.0007 (0.0012)  time: 0.7456  data: 0.0079
Test:  [170/171]  eta: 0:00:00  model_time: 0.6611 (0.7172)  evaluator_time: 0.0007 (0.0011)  time: 0.7065  data: 0.0076
Test: Total time: 0:02:04 (0.7262 s / it)
Averaged stats: model_time: 0.6611 (0.7172)  evaluator_time: 0.0007 (0.0011)
Accumulating evaluation results...
DONE (t=0.03s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.499
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.761
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.619
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.422
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.644
 Average Precisio



creating index...
index created!
Test:  [  0/171]  eta: 0:02:03  model_time: 0.7125 (0.7125)  evaluator_time: 0.0036 (0.0036)  time: 0.7231  data: 0.0069
Test:  [100/171]  eta: 0:00:51  model_time: 0.7450 (0.7219)  evaluator_time: 0.0007 (0.0014)  time: 0.7464  data: 0.0078
Test:  [170/171]  eta: 0:00:00  model_time: 0.6730 (0.7201)  evaluator_time: 0.0007 (0.0012)  time: 0.7251  data: 0.0076
Test: Total time: 0:02:04 (0.7293 s / it)
Averaged stats: model_time: 0.6730 (0.7201)  evaluator_time: 0.0007 (0.0012)
Accumulating evaluation results...
DONE (t=0.03s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.609
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.887
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.733
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.526
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.749
 Average Precisio



creating index...
index created!
Test:  [  0/171]  eta: 0:02:02  model_time: 0.7092 (0.7092)  evaluator_time: 0.0009 (0.0009)  time: 0.7162  data: 0.0061
Test:  [100/171]  eta: 0:00:53  model_time: 0.7441 (0.7391)  evaluator_time: 0.0007 (0.0012)  time: 0.7483  data: 0.0079
Test:  [170/171]  eta: 0:00:00  model_time: 0.6918 (0.7311)  evaluator_time: 0.0007 (0.0011)  time: 0.7348  data: 0.0076
Test: Total time: 0:02:06 (0.7402 s / it)
Averaged stats: model_time: 0.6918 (0.7311)  evaluator_time: 0.0007 (0.0011)
Accumulating evaluation results...
DONE (t=0.03s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.657
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.910
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.801
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.569
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.776
 Average Precisio



creating index...
index created!
Test:  [  0/171]  eta: 0:02:00  model_time: 0.6999 (0.6999)  evaluator_time: 0.0007 (0.0007)  time: 0.7066  data: 0.0059
Test:  [100/171]  eta: 0:00:53  model_time: 0.7725 (0.7378)  evaluator_time: 0.0007 (0.0013)  time: 0.7706  data: 0.0080
Test:  [170/171]  eta: 0:00:00  model_time: 0.6919 (0.7352)  evaluator_time: 0.0007 (0.0012)  time: 0.7443  data: 0.0076
Test: Total time: 0:02:07 (0.7443 s / it)
Averaged stats: model_time: 0.6919 (0.7352)  evaluator_time: 0.0007 (0.0012)
Accumulating evaluation results...
DONE (t=0.03s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.678
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.931
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.848
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.598
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.802
 Average Precisio



creating index...
index created!
Test:  [  0/171]  eta: 0:02:03  model_time: 0.7165 (0.7165)  evaluator_time: 0.0007 (0.0007)  time: 0.7232  data: 0.0060
Test:  [100/171]  eta: 0:00:53  model_time: 0.7770 (0.7455)  evaluator_time: 0.0007 (0.0012)  time: 0.7735  data: 0.0079
Test:  [170/171]  eta: 0:00:00  model_time: 0.7246 (0.7446)  evaluator_time: 0.0007 (0.0011)  time: 0.7596  data: 0.0077
Test: Total time: 0:02:08 (0.7537 s / it)
Averaged stats: model_time: 0.7246 (0.7446)  evaluator_time: 0.0007 (0.0011)
Accumulating evaluation results...
DONE (t=0.03s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.709
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.938
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.862
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.627
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.826
 Average Precisio



creating index...
index created!
Test:  [  0/171]  eta: 0:02:00  model_time: 0.7010 (0.7010)  evaluator_time: 0.0007 (0.0007)  time: 0.7076  data: 0.0059
Test:  [100/171]  eta: 0:00:52  model_time: 0.7431 (0.7294)  evaluator_time: 0.0007 (0.0012)  time: 0.7521  data: 0.0078
Test:  [170/171]  eta: 0:00:00  model_time: 0.6956 (0.7278)  evaluator_time: 0.0007 (0.0011)  time: 0.7361  data: 0.0076
Test: Total time: 0:02:05 (0.7368 s / it)
Averaged stats: model_time: 0.6956 (0.7278)  evaluator_time: 0.0007 (0.0011)
Accumulating evaluation results...
DONE (t=0.03s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.722
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.939
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.868
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.636
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.830
 Average Precisio



creating index...
index created!
Test:  [  0/171]  eta: 0:02:02  model_time: 0.7082 (0.7082)  evaluator_time: 0.0007 (0.0007)  time: 0.7149  data: 0.0060
Test:  [100/171]  eta: 0:00:51  model_time: 0.7370 (0.7172)  evaluator_time: 0.0007 (0.0012)  time: 0.7392  data: 0.0079
Test:  [170/171]  eta: 0:00:00  model_time: 0.6806 (0.7149)  evaluator_time: 0.0007 (0.0010)  time: 0.7249  data: 0.0075
Test: Total time: 0:02:03 (0.7239 s / it)
Averaged stats: model_time: 0.6806 (0.7149)  evaluator_time: 0.0007 (0.0010)
Accumulating evaluation results...
DONE (t=0.03s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.746
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.948
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.879
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.667
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.853
 Average Precisio



creating index...
index created!
Test:  [  0/171]  eta: 0:02:03  model_time: 0.7144 (0.7144)  evaluator_time: 0.0009 (0.0009)  time: 0.7216  data: 0.0063
Test:  [100/171]  eta: 0:00:52  model_time: 0.7609 (0.7370)  evaluator_time: 0.0007 (0.0012)  time: 0.7636  data: 0.0079
Test:  [170/171]  eta: 0:00:00  model_time: 0.7124 (0.7385)  evaluator_time: 0.0007 (0.0011)  time: 0.7519  data: 0.0075
Test: Total time: 0:02:07 (0.7479 s / it)
Averaged stats: model_time: 0.7124 (0.7385)  evaluator_time: 0.0007 (0.0011)
Accumulating evaluation results...
DONE (t=0.03s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.747
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.950
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.880
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.668
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.854
 Average Precisio