# Face Mask Detection

## Required Packages

1. Install PyTorch. (If you want to use CUDA, you need to install it manully first)

In [None]:
# Install PyTorch (CUDA): https://pytorch.org/get-started/locally/
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

2. Install required packages:

In [None]:
# Install required packages
%pip install numpy opencv-python ultralytics matplotlib pyyaml scikit-learn

## Download and Setup Dataset

Setup data and folder structure for YOLO to train with:

```
|-- dataset
`-- original
    |-- original
    |   |-- annotations
    |   `-- images
    `-- yolo
        |-- images
        |   |-- test
        |   |-- train
        |   `-- val
        `-- labels
            |-- test
            |-- train
            `-- val
```

In [None]:
import os
from pathlib import Path

cwd = Path.cwd()

DATASET_PATH = cwd / 'dataset'

ORIGINAL_DATASET_PATH = DATASET_PATH / 'original'

ORIGINAL_IMAGES_PATH = ORIGINAL_DATASET_PATH / 'images'
ORIGINAL_ANNOTATIONS_PATH = ORIGINAL_DATASET_PATH / 'annotations'

YOLO_DATASET_PATH = DATASET_PATH / 'yolo'

TRAIN_DIR = 'train'
TRAIN_IMAGES_PATH = YOLO_DATASET_PATH / 'images' / TRAIN_DIR
TRAIN_LABELS_PATH = YOLO_DATASET_PATH / 'labels' / TRAIN_DIR

TEST_DIR = 'test'
TEST_IMAGES_PATH = YOLO_DATASET_PATH / 'images' / TEST_DIR
TEST_LABELS_PATH = YOLO_DATASET_PATH / 'labels' / TEST_DIR

VALIDATION_DIR = 'val'
VALIDATION_IMAGES_PATH = YOLO_DATASET_PATH / 'images' / VALIDATION_DIR
VALIDATION_LABELS_PATH = YOLO_DATASET_PATH / 'labels' / VALIDATION_DIR

for dir in [ORIGINAL_DATASET_PATH,
            TRAIN_IMAGES_PATH, TRAIN_LABELS_PATH,
            TEST_IMAGES_PATH, TEST_LABELS_PATH,
            VALIDATION_IMAGES_PATH, VALIDATION_LABELS_PATH]:
    # create directories if not exist
    os.makedirs(dir, exist_ok=True)

Download dataset from [Kaggle's Face Mask Detection Dataset](https://www.kaggle.com/datasets/omkargurav/face-mask-dataset)

In [None]:
from urllib.request import urlretrieve

DATASET_FILE_PATH = DATASET_PATH / 'archive.zip'

# https://www.kaggle.com/datasets/omkargurav/face-mask-dataset
DATASET_URL = 'https://www.kaggle.com/api/v1/datasets/download/andrewmvd/face-mask-detection'

urlretrieve(DATASET_URL, DATASET_FILE_PATH)

print('dataset downloaded')

Extracts it to `dataset/original/`

In [None]:
import zipfile

# extracts all files to cwd. which should only create folder `annotations` and `images`.
with zipfile.ZipFile(DATASET_FILE_PATH, 'r') as f:
    f.extractall(ORIGINAL_DATASET_PATH)

print('dataset extracted')

Prepares data for YOLO to convert from Pascal VOC to YOLO format.

Get a list of all annotation files.

In [None]:
# get all files which are XMLs in the annotations folder.
label_files = [ORIGINAL_ANNOTATIONS_PATH / f
               for f in os.listdir(ORIGINAL_ANNOTATIONS_PATH)
               if Path(f).suffix.lower() == '.xml']

Create a function to convert Pascal VOC's annotation format to a YOLO one

In [None]:
def xml_to_yolo_bbox(bbox, w, h):
    """Convert Pascal VOC's annotation to YOLO's format"""
    xmin, ymin, xmax, ymax = bbox

    x_center = (xmax + xmin) / 2 / w
    y_center = (ymax + ymin) / 2 / h

    width = (xmax - xmin) / w
    height = (xmax - xmin) / h

    return [x_center, y_center, width, height]

1. Parse all Pascal VOC annotations. (which are XMLs)

2. Get all essential values. (width, height, classes, bounding boxes)

3. Convert to YOLO format with function `xml_to_yolo_bbox`.

In [None]:
import xml.etree.ElementTree as ET

classes = []

images = dict()
labels = dict()

for label_file in label_files:
    # parse the xml file
    tree = ET.parse(label_file)
    root = tree.getroot()

    # get `filename` and checks the image is exists
    filename = root.find('filename').text
    image_path = ORIGINAL_IMAGES_PATH / filename
    if not os.path.isfile(image_path):
        continue
    
    # get width and height of the image
    w = int(root.find('size/width').text)
    h = int(root.find('size/height').text)

    yolo_labels = []
    for obj in root.iter('object'):
        # get class and append to `classes` if not exists
        cls = obj.find('name').text
        if cls not in classes:
            classes.append(cls)

        # get bounding box and converts to yolo format
        idx = classes.index(cls)
        bbox = [int(x.text) for x in obj.find('bndbox')]
        yolo_bbox = xml_to_yolo_bbox(bbox, w, h)

        bbox_string = ' '.join([str(x) for x in yolo_bbox])
        yolo_label = f'{idx} {bbox_string}'

        yolo_labels.append(yolo_label)

    if yolo_labels:
        data_id = image_path.stem
        
        # add a valid data to the valid list
        images[data_id] = image_path
        labels[data_id] = yolo_labels

print(f'total valid data count: {len(labels)}/{len(label_files)}')

Split train, test, and validation from the dataset. By split training/testing/validation data at 70/15/15%

In [None]:
from sklearn.model_selection import train_test_split

train_data, temp_data = train_test_split(list(labels.keys()), test_size=0.3, shuffle=True)
val_data, test_data = train_test_split(temp_data, test_size=0.5, shuffle=True)

Copy the train, test, and validation data, according to the cell above, to its responding folder includes images and annotations.

In [None]:
import shutil

# delete all files in the current yolo folder
for dir in [TRAIN_IMAGES_PATH, TRAIN_LABELS_PATH,
            TEST_IMAGES_PATH, TEST_LABELS_PATH,
            VALIDATION_IMAGES_PATH, VALIDATION_LABELS_PATH]:
    for f in os.listdir(dir):
        if os.path.isfile(dir / f):
            os.remove(dir / f)

# copy images and annotations to each corresponding train/test/validation folder.
for data, dir in [(train_data, TRAIN_DIR),
                  (val_data, VALIDATION_DIR),
                  (test_data, TEST_DIR)]:
    for data_id in data:
        image_name = images[data_id].name  # get data id (file name)

        # copy image to the folder
        shutil.copy(ORIGINAL_IMAGES_PATH / image_name,
                    YOLO_DATASET_PATH / 'images' / dir / image_name)
        
        # write yolo labels to a file
        with open(YOLO_DATASET_PATH / 'labels' / dir / f'{data_id}.txt', 'w') as f:
            f.write('\n'.join(labels[data_id]))

Create a YAML file for YOLO to works with, define dataset path, train/test/validation path, class count, and class names.

In [None]:
import yaml

DATASET_YAML = YOLO_DATASET_PATH / 'face-mask-detection.yaml'

content = {
    'path': str(YOLO_DATASET_PATH),
    'train': 'images/train',
    'test': 'images/test',
    'val': 'images/val',

    'nc': len(classes),
    'names': classes
}
with open(DATASET_YAML, 'w') as f:
    yaml.dump(content, f)

## Model Training

Download the YOLOv8 weight.

In [None]:

YOLO_WEIGHT_PATH = DATASET_PATH / 'yolov8n.pt'

# https://github.com/ultralytics/assets/releases
WEIGHT_URL = 'https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt'

urlretrieve(WEIGHT_URL, YOLO_WEIGHT_PATH)

print('weight downloaded')

Import the weight to the code.

In [None]:
from ultralytics import YOLO
model = YOLO(YOLO_WEIGHT_PATH)

Training with dataset at 30 epochs using CUDA.

In [None]:
train_results = model.train(
    data=DATASET_YAML,  # dataset
    epochs=30,          # epochs
    imgsz=640,          # image size
    batch=8,            # batch size
    device=0,           # device to train (cpu or gpu)
    save=True           # save the model as a weight file
)

## Model Inference

Pick a random image from validation dataset.

In [None]:
import random

import cv2

pick_id = random.choice(val_data)
image = cv2.imread(VALIDATION_IMAGES_PATH / f'{pick_id}.png')  # read with opencv

Inference the model with the image.

In [None]:
results = model.predict(image)

Draw classes and bounding boxes on the image.

In [None]:
import numpy as np

WIDTH = 2
FONT_SCALE = 0.5

for box in results[0].boxes:
    for xyxy in box.xyxy:
        x1, y1, x2, y2 = xyxy.cpu().numpy().astype(np.int32)  # get bounding box
        cls = box.cls.cpu().numpy().astype(np.int32)[0]       # get class id
        cls = classes[cls]                                    # get class name from the id

        # random a color for each bounding box in BGR.
        color = (
            random.randint(0, 255),
            random.randint(0, 255),
            random.randint(0, 255)
        )

        cv2.rectangle(image, (x1, y1), (x2, y2), color, WIDTH)  # draw a bounding box
        # write a class name to the box
        cv2.putText(image, cls, (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, FONT_SCALE,
                    color, WIDTH)

Finally, displays the image.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

# convert opencv's bgr to rgb for displaying with matplotlib
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

plt.imshow(image)
plt.axis('off')  # disable plot's axis
plt.show()

Saves the image. (optional)

In [None]:
cv2.imwrite('output.png', image)

---