# Unzip the Dataset



In [9]:
import zipfile

with zipfile.ZipFile("MaskedUnmaskedDataset.zip", "r") as zip_ref:
    zip_ref.extractall("MaskedUnmaskedDataset")
print("Extraction complete.")

Extraction complete.


# Build a Convolutional Neural Network (CNN) for Face Mask Classification

We will now build and implement a CNN model to classify images as masked, unmasked, or partially masked faces.

Import the required libraries

In [10]:


# Import required libraries for PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import os

Peforms Data Augmentation and prepares dataset to train the CNN Model

In [11]:
# Set up data transforms and load the dataset with advanced data augmentation
from sklearn.model_selection import train_test_split
from torchvision.datasets import ImageFolder
from torchvision import transforms

# Advanced data augmentation for training
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomResizedCrop(128, scale=(0.8, 1.0)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

# Simpler transform for validation
val_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

dataset_dir = 'MaskedUnmaskedDataset'  # Path to extracted dataset

dataset = ImageFolder(root=dataset_dir, transform=train_transform)

# Split dataset into train and val
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Apply correct transforms to train/val splits
train_dataset.dataset.transform = train_transform
val_dataset.dataset.transform = val_transform

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Show class names
display_classes = dataset.classes
print('Classes:', display_classes)

Classes: ['with_mask', 'without_mask']


# Explanation of CNN Model Parameters and Layers

**Model Architecture:**
- The model is a simple Convolutional Neural Network (CNN) designed for image classification.

**Layers and Parameters:**
- `nn.Conv2d(3, 32, kernel_size=3, padding=1)`: First convolutional layer. Takes 3-channel (RGB) images as input, outputs 32 feature maps, uses a 3x3 kernel, and padding of 1 to preserve spatial dimensions.
- `nn.ReLU()`: Activation function introducing non-linearity.
- `nn.MaxPool2d(2, 2)`: Downsamples the feature maps by a factor of 2 (2x2 window).
- `nn.Conv2d(32, 64, kernel_size=3, padding=1)`: Second convolutional layer. Takes 32 input channels, outputs 64 feature maps.
- `nn.ReLU()` and `nn.MaxPool2d(2, 2)`: As above.
- `nn.Conv2d(64, 128, kernel_size=3, padding=1)`: Third convolutional layer. Takes 64 input channels, outputs 128 feature maps.
- `nn.ReLU()` and `nn.MaxPool2d(2, 2)`: As above.
- `nn.Flatten()`: Flattens the output from the convolutional layers into a 1D vector for the fully connected layers.
- `nn.Linear(128 * 16 * 16, 128)`: Fully connected layer. Input size is 128 feature maps of size 16x16 (after pooling), output is 128 features.
- `nn.ReLU()`: Activation function.
- `nn.Dropout(0.5)`: Randomly sets 50% of the input units to 0 during training to help prevent overfitting.
- `nn.Linear(128, num_classes)`: Final fully connected layer. Outputs a vector with length equal to the number of classes (masked, unmasked, partially masked).

**Other Parameters:**
- `num_classes`: The number of output classes, determined from the dataset.

This architecture is a good starting point for image classification tasks and can be further tuned for better performance.

In [12]:
import torch.nn as nn
from torchvision import models

# Improved custom CNN model for face mask classification
class FaceMaskCNN(nn.Module):
    def __init__(self, num_classes):
        super(FaceMaskCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.25)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 16 * 16, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )
    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

num_classes = len(display_classes)
model = FaceMaskCNN(num_classes)
print(model)

FaceMaskCNN(
  (features): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Dropout(p=0.25, inplace=False)
    (8): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU()
    (11): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU()
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (15): Dropout(p=0.25, inp

# Training the CNN Model

This code trains the custom FaceMaskCNN model using CrossEntropyLoss and Adam optimizer (lr=0.0005) 
for 3 epochs. It performs training and validation in each epoch, prints metrics, and saves the model 
with the best validation accuracy to 'best_facemaskcnn.pth'. The model runs on GPU if available.









In [17]:
# Train the improved FaceMaskCNN model
import torch.optim as optim
import torch.nn as nn
from tqdm import tqdm

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

num_epochs = 3
best_val_acc = 0.0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    train_loss = running_loss / total
    train_acc = correct / total

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            val_total += labels.size(0)
            val_correct += predicted.eq(labels).sum().item()
    val_loss = val_loss / val_total
    val_acc = val_correct / val_total

    print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_facemaskcnn.pth')
        print("Best model saved!")


Epoch 1/3 [Train]:   0%|          | 0/96 [00:00<?, ?it/s]

Epoch 1/3 [Train]: 100%|██████████| 96/96 [01:57<00:00,  1.22s/it]
Epoch 1/3 [Val]: 100%|██████████| 24/24 [00:15<00:00,  1.59it/s]


Epoch 1/3 | Train Loss: 0.1883 | Train Acc: 0.9410 | Val Loss: 0.1181 | Val Acc: 0.9596
Best model saved!


Epoch 2/3 [Train]: 100%|██████████| 96/96 [01:28<00:00,  1.09it/s]
Epoch 2/3 [Val]: 100%|██████████| 24/24 [00:18<00:00,  1.33it/s]


Epoch 2/3 | Train Loss: 0.1470 | Train Acc: 0.9478 | Val Loss: 0.1422 | Val Acc: 0.9544


Epoch 3/3 [Train]: 100%|██████████| 96/96 [02:08<00:00,  1.34s/it]
Epoch 3/3 [Val]: 100%|██████████| 24/24 [00:08<00:00,  2.99it/s]

Epoch 3/3 | Train Loss: 0.1392 | Train Acc: 0.9517 | Val Loss: 0.0987 | Val Acc: 0.9700
Best model saved!





# Face Detection and Mask Classification with YOLOv8-Face

This cell uses the YOLOv5-face model for accurate face detection and a ResNet18-based classifier for mask classification. Detected faces are shown with bounding boxes and predicted labels.

In [None]:
import cv2
from ultralytics import YOLO

# Load standard YOLO model
yolo_model = YOLO('yolov8-face\yolov8n-face.pt')

img = cv2.imread('images.jpeg')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Run detection
results = yolo_model(img)

for result in results:
    boxes = result.boxes
    if boxes is not None:
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
            cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)

cv2.imwrite('output_yolov8_face_rect.jpg', img)
print("Image with face rectangles saved as 'output_yolov8_face_rect.jpg'")


  yolo_model = YOLO('yolov8-face\yolov8n-face.pt')



0: 384x640 2 faces, 129.3ms
Speed: 4.3ms preprocess, 129.3ms inference, 39.4ms postprocess per image at shape (1, 3, 384, 640)
Image with face rectangles saved as 'output_yolov8_face_rect.jpg'
0: 384x640 2 faces, 129.3ms
Speed: 4.3ms preprocess, 129.3ms inference, 39.4ms postprocess per image at shape (1, 3, 384, 640)
Image with face rectangles saved as 'output_yolov8_face_rect.jpg'


# Masked and Unmasked Classification using YOLOV5 and custom CNN

The folloeing code block uses the yolov8-face model to recognise faces from images, the rectangular selection around the face is selected and sent to the CNN after being converted to a tensor, where it classifies the image as masked or unmasked. The result is added along with a box around the faces of people in the image.It is then saved as a seperate image.


In [None]:
# Use YOLOv8-face detections and FaceMaskCNN to label faces as masked or unmasked
import cv2
import torch
from torchvision import transforms

model = FaceMaskCNN(num_classes)
model.load_state_dict(torch.load('best_facemaskcnn.pth', map_location=torch.device('cpu')))
model.eval()

# Use the same image as before
img = cv2.imread('images.jpeg')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Run YOLOv8-face detection
results = yolo_model(img_rgb)

# Use the same transform as used for training the CNN
face_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

for result in results:
    boxes = result.boxes
    if boxes is not None:
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
            face_img = img[y1:y2, x1:x2]
            if face_img.size == 0:
                continue
            try:
                face_tensor = face_transform(cv2.cvtColor(face_img, cv2.COLOR_BGR2RGB)).unsqueeze(0)
            except Exception as e:
                print("Transform error:", e)
                continue
            with torch.no_grad():
                outputs = model(face_tensor)
                pred = outputs.argmax(dim=1)
                label = display_classes[pred.item()]
            color = (0, 255, 0) if label == 'with_mask' or label == 'masked' else (0, 0, 255)
            cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
            cv2.putText(img, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 2)  

cv2.imwrite('output_yolov8_face_masked_unmasked.jpg', img)
print("Image with mask labels saved as 'output_yolov8_face_masked_unmasked.jpg'")



0: 384x640 2 faces, 101.0ms
Speed: 2.2ms preprocess, 101.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)
0: 384x640 2 faces, 101.0ms
Speed: 2.2ms preprocess, 101.0ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)
Image with mask labels saved as 'output_yolov8_face_masked_unmasked.jpg'
Image with mask labels saved as 'output_yolov8_face_masked_unmasked.jpg'
