
# [Download kaggle dataset](https://www.kaggle.com/datasets/andrewmvd/face-mask-detection)

In [1]:
%%capture
# Set up Kaggle dataset
! kaggle datasets download -d andrewmvd/face-mask-detection
! unzip face-mask-detection.zip -d face-mask-detection

# Imports

In [None]:
# Model Building
import torch
from torch import nn
import torchvision
from torchvision import transforms, datasets, models
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from sklearn.model_selection import train_test_split

# Data Extraction
from bs4 import BeautifulSoup # Parse through xml files and extract bounding boxes / labels
from PIL import Image # Image editing

# Data Visualization
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Working with files and model saves
import os
from pathlib import Path

# Functions to parse xml data and retrieve bounding boxes + labels


Following code snippet taken from Pytorch FasterRCNN by Daniel and tweaked to match this project

*   def generate_box(obj): Input "object" from xml; Output bounding box
*   def generate_label(obj): Input "object" from xml; Output label


Fine-tuning pretrained fasterrcnn_resnet50_fpn requires the following input tensors and targets:

*   Bounding-boxes of type FloatTensor[N, 4]
*   Labels of type Int64Tensor[N]











In [3]:
# Get Ground Truth Box
def generate_box(obj):
    xmin = int(obj.find('xmin').text)
    ymin = int(obj.find('ymin').text)
    xmax = int(obj.find('xmax').text)
    ymax = int(obj.find('ymax').text)

    return [xmin, ymin, xmax, ymax]

# Get Ground Truth Label
def generate_label(obj):
    if obj.find('name').text == "with_mask":
        return 1
    elif obj.find('name').text == "mask_weared_incorrect": # Yes, this is how it is labeled in the xml file
        return 2
    return 0 # without_mask

# Use BeautifulSoup to read xml data, all faces stored in objects[]
def generate_target(file):
    with open(file) as f:
        data = f.read()
        soup = BeautifulSoup(data, 'xml')
        objects = soup.find_all('object')

        # Bounding boxes for objects
        # In pytorch, the input should be [xmin, ymin, xmax, ymax], WE ARE USING THIS
        # In coco format, bounding box = [xmin, ymin, width, height]
        boxes = []
        labels = []
        for obj in objects:
            boxes.append(generate_box(obj))
            labels.append(generate_label(obj))

        # Convert boxes and labels to tensor objects for fasterrcnn_resnet50_fpn
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        # Annotation is in dictionary format
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        return target

# CREATE DATALOADER

## Create Dataset() class for torch.utils.data.DataLoader(), this is a Map-style Dataset

*   implements \__getitem__() and \__len__()



In [4]:
class Dataset():
  def __init__(self, indices):
    self.imgs = list(sorted(os.listdir("/content/face-mask-detection/images/")))
    self.labels = list(sorted(os.listdir("/content/face-mask-detection/annotations/")))
    self.indices = indices # Used for train/validation split

  def __getitem__(self, idx):
    # Generate file paths from idx
    img_file = 'maksssksksss'+ str(self.indices[idx]) + '.png'
    label_file = 'maksssksksss'+ str(self.indices[idx]) + '.xml'

    img_path = os.path.join("/content/face-mask-detection/images", img_file)
    label_path = os.path.join("/content/face-mask-detection/annotations", label_file)

    # Load Image
    img = Image.open(img_path).convert("RGB")
    data_transform = transforms.Compose([transforms.ToTensor(), ])
    img = data_transform(img)

    # Generate Label
    target = generate_target(label_path)

    return img, target

  def __len__(self):
    return len(self.indices)

## Create collate function for torch.utils.data.DataLoader()

In [5]:
def collate_fn(batch):
  return tuple(zip(*batch))

## Train/Valid Split

In [6]:
total_imgs = len(list(sorted(os.listdir("/content/face-mask-detection/images/"))))
train_indices, valid_indices = train_test_split(range(total_imgs), test_size=0.3, random_state=1) # I have used random_state=1 for training this model (reproducibility purposes)

## Create train and validation DataLoader() objects

In [7]:
# IIf using cpu on colab free, 16/8 batch_size maximizes memory and training speeds
train_dl = torch.utils.data.DataLoader(dataset=Dataset(train_indices),
                                          batch_size=32,
                                          collate_fn=collate_fn)

valid_dl = torch.utils.data.DataLoader(dataset=Dataset(valid_indices),
                                          batch_size=16,
                                          collate_fn=collate_fn)

# Make instance of pretrained Faster R-CNN

In [8]:
def get_model(num_classes):
  model = models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
  in_features = model.roi_heads.box_predictor.cls_score.in_features
  model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
  return model

In [None]:
# Instantiate
model = get_model(3)

# Fine-tune Faster R-CNN model on our dataset

## Create optimizer

In [10]:
# For this project I did not experiment with various optimizer settings
optimizer = torch.optim.SGD(model.parameters(),
                            lr=0.001,
                            momentum=0.9,
                            weight_decay=0.0005)

## Set Device type

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

## LOAD MODEL FROM GDRIVE

In [None]:
# Google drive must be mounted beforehand
model.load_state_dict(torch.load('/content/drive/MyDrive/{YourPathHere}.pt'))
model.to(device)

## Fine-tune

In [None]:
model.to(device) # In the event you don't load a saved model use this
num_epochs = 10
len_dataloader = len(train_dl)

for epochs in range(num_epochs):
  model.train()
  i = 0
  epoch_loss = 0
  for imgs, annotations in train_dl:
    i += 1
    imgs = list(img.to(device) for img in imgs)
    annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]

    loss_dict = model([imgs[0]], [annotations[0]])
    loss = sum(loss for loss in loss_dict.values())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # print(f'Iteration: {i}/{len_dataloader}, Loss: {loss}') # Show each iteration
    epoch_loss += loss

  print(epoch_loss)
  torch.save(model.state_dict(),'/content/drive/MyDrive/{YourPathHere}.pt')

In [13]:
# Prepare testing on validation set
for imgs, annotations in valid_dl:
      imgs = list(img.to(device) for img in imgs)
      annotations = [{k: v.to(device) for k, v in t.items()} for t in annotations]
      break # Take only first batch

## Enter evaluation mode and generate predictions of validation dataset

In [None]:
model.eval()
# Use with torch.no_grad() to prevent memory buildup exceeding colab's limits when running on cpu
# If still exceeding reduce validation batch_size
with torch.no_grad():
  preds = model(imgs) # Generate predictions

In [15]:
def plot_image(img_tensor, annotation):

    fig,ax = plt.subplots(1)
    img = img_tensor.cpu().data

    # Display the image
    ax.imshow(img.permute(1, 2, 0))

    for box in annotation["boxes"]:
        box = box.cpu() # Use if running cuda
        xmin, ymin, xmax, ymax = box.detach().numpy()

        # Create a Rectangle patch
        rect = patches.Rectangle((xmin.item(),ymin.item()),(xmax-xmin),(ymax-ymin),linewidth=1,edgecolor='r',facecolor='none')

        # Add the patch to the Axes
        ax.add_patch(rect)

    plt.show()

In [None]:
IMG_NUM = 4 # [0, batch_size - 1]
print("Prediction")
plot_image(imgs[IMG_NUM], preds[IMG_NUM])
print("Target")
plot_image(imgs[IMG_NUM], annotations[IMG_NUM])

Prediction: Left | 
Target: Right |
Epochs Trained: 300 |
Batch_Size: 32

![Predict](Images/Predict_Facemasks.png)
![Target](Images/Target_Facemasks.png)

# SAVE MODEL

In [None]:
torch.save(model.state_dict(),'/content/drive/MyDrive/{YourPathHere}.pt') # Make sure GoogleDrive is mounted beforehand