

---
## **FACE MASK DETECTION USING YOLOv8**
---
**OVERVIEW:**

This notebook implements a complete computer vision pipeline for detecting face masks
in images using the YOLOv8 object detection model. The system can identify three
different mask-wearing scenarios with high accuracy.




## **SECTION 1: Setup Kaggle API and Download Dataset**

In [None]:
# Create kaggle directory and setup API credentials
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets list

In [None]:
# Download the face mask detection dataset from Kaggle
#!/bin/bash
!kaggle datasets download andrewmvd/face-mask-detection

In [None]:
# Extract the downloaded dataset
!unzip face-mask-detection

## **SECTION 2: Data Preprocessing - XML to YOLO Format Conversion**

In [6]:
import os
import xml.etree.ElementTree as ET
import shutil
import random

In [7]:
# Define directory paths
annotations_path = '/content/annotations'  # XML annotation files
labels_dir       = '/content/Labels'       # Output directory for YOLO format labels
images_dir       = '/content/images'       # Original images directory
base_dir         = 'dataset'               # Base directory for organized dataset

In [8]:
# Create directory structure for train/validation split
img_train_dir    = os.path.join(base_dir, 'images/train')
img_val_dir      = os.path.join(base_dir, 'images/val')
lbl_train_dir    = os.path.join(base_dir, 'labels/train')
lbl_val_dir      = os.path.join(base_dir, 'labels/val')

In [9]:
# Create all necessary directories
os.makedirs(labels_dir, exist_ok=True)
for d in [img_train_dir, img_val_dir, lbl_train_dir, lbl_val_dir]:
    os.makedirs(d, exist_ok=True)

In [10]:
# Define class mapping for YOLO format (class names to numeric IDs)
label_map = {
    "with_mask": 0,                # Person wearing mask correctly
    "without_mask": 1,             # Person not wearing mask
    "mask_weared_incorrect": 2     # Person wearing mask incorrectly
}

In [None]:
# Convert XML annotations to YOLO format
print("Converting XML annotations to YOLO format...")
for xml_file in os.listdir(annotations_path):
    if not xml_file.endswith('.xml'):
        continue

    # Parse XML file
    tree = ET.parse(os.path.join(annotations_path, xml_file))
    root = tree.getroot()

    # Get image dimensions
    img_w = int(root.find('size/width').text)
    img_h = int(root.find('size/height').text)

    yolo_lines = []

    # Process each object (bounding box) in the image
    for obj in root.findall('object'):
        class_name = obj.find('name').text.lower()

        # Skip if class not in our label map
        if class_name not in label_map:
            continue

        class_id = label_map[class_name]

        # Get bounding box coordinates
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)

        # Convert to YOLO format (normalized center coordinates and dimensions)
        x_center = ((xmin + xmax) / 2) / img_w
        y_center = ((ymin + ymax) / 2) / img_h
        width = (xmax - xmin) / img_w
        height = (ymax - ymin) / img_h

        # Create YOLO format line: class_id x_center y_center width height
        line = f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"
        yolo_lines.append(line)

    # Save YOLO format annotation file
    out_filename = os.path.splitext(xml_file)[0] + '.txt'
    with open(os.path.join(labels_dir, out_filename), 'w') as f:
        f.write('\n'.join(yolo_lines))

print("Done converting XML to YOLO format!")

## **SECTION 3: Dataset Split - Train/Validation**

In [12]:
# Get all image files
image_files = [f for f in os.listdir(images_dir) if f.endswith(('.jpg', '.png'))]
random.shuffle(image_files)  # Randomize order

In [13]:
# Split dataset: 80% training, 20% validation
split_idx = int(0.8 * len(image_files))
train_files = image_files[:split_idx]
val_files = image_files[split_idx:]

In [14]:
def move_split(file_list, target_img_dir, target_lbl_dir):
    """
    Move images and their corresponding labels to target directories

    Args:
        file_list: List of image filenames to move
        target_img_dir: Target directory for images
        target_lbl_dir: Target directory for labels
    """
    for img_file in file_list:
        img_src = os.path.join(images_dir, img_file)
        lbl_src = os.path.join(labels_dir, os.path.splitext(img_file)[0] + '.txt')

        # Copy image file
        shutil.copy(img_src, os.path.join(target_img_dir, img_file))

        # Copy corresponding label file if it exists
        if os.path.exists(lbl_src):
            shutil.copy(lbl_src, os.path.join(target_lbl_dir, os.path.basename(lbl_src)))

In [None]:
# Move files to train and validation directories
move_split(train_files, img_train_dir, lbl_train_dir)
move_split(val_files, img_val_dir, lbl_val_dir)

print("Dataset split completed!")
print(f"Training images: {len(train_files)}")
print(f"Validation images: {len(val_files)}")

## **SECTION 4: Create YAML Configuration File**

In [None]:
# Create YAML configuration file for YOLOv8 training
yaml_content = """
path: /content/dataset
train: /content/dataset/images/train
val: /content/dataset/images/val
names:
  0: with_mask
  1: without_mask
  2: mask_weared_incorrect
"""

# Save the YAML configuration file
with open("/content/face_mask.yaml", "w") as f:
    f.write(yaml_content.strip())

print("YAML configuration file created!")

## **SECTION 5: Install YOLOv8 and Train Model**

In [None]:
# Install ultralytics package for YOLOv8
!pip install ultralytics

In [18]:
# Import YOLO
from ultralytics import YOLO

In [None]:
# Load pre-trained YOLOv8 nano model (smallest and fastest)
model = YOLO('yolov8n.pt')

# Train the model
model.train(
    data='/content/face_mask.yaml',    # Path to dataset configuration
    epochs=200,                        # Number of training epochs
    imgsz=416,                        # Input image size
    batch=16,                         # Batch size
    name='mask_detector'              # Name for this training run
)

print("Training completed!")


## **SECTION 6: Model Evaluation**

In [None]:
# Define path to the best trained model weights
yol_path = "runs/detect/mask_detector5/weights/best.pt"

# Load the trained model and evaluate on validation set
model = YOLO(yol_path)
print("Evaluating model performance...")
metrics = model.val(data='/content/face_mask.yaml')
print("Validation metrics:", metrics)

## **SECTION 7: Visualize Predictions on Validation Set**

In [23]:
import random
import os
import matplotlib.pyplot as plt
from PIL import Image
from ultralytics import YOLO

In [None]:
# Load the trained model
model = YOLO(yol_path)

# Get validation images for testing
images_dir = '/content/dataset/images/val'
all_images = [f for f in os.listdir(images_dir) if f.endswith(('.png', '.jpg', '.jpeg'))]

# Select 10 random images for visualization
random_images = random.sample(all_images, 10)

# Create visualization
plt.figure(figsize=(20, 40))
print("Generating predictions on random validation images...")

for i, img_name in enumerate(random_images):
    img_path = os.path.join(images_dir, img_name)

    # Run inference on the image
    results = model(img_path)

    # Get annotated image with bounding boxes
    annotated_img = results[0].plot()

    # Count detections by class
    counts = {"with_mask": 0, "without_mask": 0}
    for box in results[0].boxes:
        cls = int(box.cls.cpu().numpy())
        label = model.names[cls]
        if label == "with_mask":
            counts["with_mask"] += 1
        elif label == "without_mask":
            counts["without_mask"] += 1

    # Display the result
    plt.subplot(5, 2, i+1)
    plt.imshow(annotated_img)
    plt.title(f"{img_name}\nWith Mask: {counts['with_mask']} | Without Mask: {counts['without_mask']}", fontsize=14)
    plt.axis('off')

plt.tight_layout()
plt.show()

## **SECTION 8: Test on External Dataset**

In [None]:
# Download additional test dataset
!kaggle datasets download omkargurav/face-mask-dataset

In [None]:
# Extract the test dataset
!unzip face-mask-dataset

In [None]:
# Organize test images into a single folder
import os
import shutil
import random

# Define source folders
folder1 = '/content/data/with_mask'      # Images with masks
folder2 = '/content/data/without_mask'   # Images without masks
final_folder = '/content/Final_test'     # Combined test folder

# Create test folder
os.makedirs(final_folder, exist_ok=True)

# Collect all images from both folders
all_images = []
for folder in [folder1, folder2]:
    images = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(('.jpg', '.jpeg', '.png'))]
    all_images.extend(images)

# Shuffle and copy to final test folder
random.shuffle(all_images)
for img_path in all_images:
    filename = os.path.basename(img_path)
    shutil.copy(img_path, os.path.join(final_folder, filename))

print(f"Test dataset prepared with {len(all_images)} images")

## **SECTION 9: Run Inference on Test Dataset**

In [None]:
from google.colab.patches import cv2_imshow
import cv2
import os

# Load the trained model
model = YOLO(yol_path)

# Get all test images
folder_path = '/content/Final_test'
image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path)
               if f.lower().endswith(('.jpg', '.png', '.jpeg'))]

print(f"Running inference on {len(image_files)} test images...")
print("Displaying results with bounding boxes and confidence scores...")

# Process each test image
for img_path in image_files:
    # Run inference
    results = model(img_path)

    # Get annotated image with predictions
    annotated_img = results[0].plot()

    # Display the result
    cv2_imshow(annotated_img)

print("Inference completed on all test images!")

In [None]:
model.save('face_mask_detector.pt')