In [1]:
# --- CELL: Importing Required Libraries ---
# This cell imports all the necessary libraries and modules for building a CNN-based Face Mask Detection system.
# - TensorFlow/Keras: Deep learning framework used to build and train the CNN model.
# - Layers (Dense, Dropout, BatchNormalization, Conv2D, MaxPooling2D, Flatten, Input, GlobalAveragePooling2D): 
#   Building blocks for constructing the neural network architecture.
# - Gradio (gr): Used to create an interactive web-based UI for real-time mask detection.
# - Sequential & Model: Two ways to define Keras models (Sequential for linear stacking, Model for functional API).
# - PIL (Image): Python Imaging Library for image manipulation and preprocessing.
# - ImageDataGenerator: Utility for real-time data augmentation and loading images from directories.

import tensorflow
from tensorflow.keras.layers import Dense,Dropout,BatchNormalization,Input,GlobalAveragePooling2D,MaxPooling2D,Conv2D,Flatten
import gradio as gr
from keras.models import Sequential
from tensorflow.keras.models import Model
from PIL import Image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# OUTPUT: A SciPy version warning may appear - this is a compatibility notice between NumPy and SciPy versions
# and does NOT affect the functionality of the code. It can be safely ignored.

  from scipy.sparse import issparse  # pylint: disable=g-import-not-at-top


In [2]:
# --- CELL: Importing NumPy ---
# NumPy is imported for numerical operations such as array manipulation,
# expanding dimensions of image arrays, and normalizing pixel values.

import numpy as np

In [3]:
# --- CELL: Defining the CNN Model Architecture ---
# This cell builds a Sequential CNN model for binary classification (with mask / without mask).
#
# Architecture breakdown:
# BLOCK 1: Two Conv2D layers (32 filters, 3x3 kernel, ReLU) -> BatchNormalization -> MaxPooling2D (2x2)
#   - Extracts low-level features like edges and textures from 128x128x3 RGB input images.
# BLOCK 2: Two Conv2D layers (64 filters) -> BatchNormalization -> MaxPooling2D
#   - Captures mid-level features like shapes and patterns.
# BLOCK 3: Two Conv2D layers (128 filters) -> BatchNormalization -> MaxPooling2D
#   - Learns high-level features specific to mask/no-mask distinction.
# CLASSIFIER:
#   - Flatten: Converts 3D feature maps to 1D vector.
#   - Dense(128, relu): Fully connected layer for learning complex combinations.
#   - Dense(64, relu): Another FC layer for further abstraction.
#   - Dropout(0.5): Randomly drops 50% of neurons during training to prevent overfitting.
#   - Dense(1, sigmoid): Output layer with sigmoid activation for binary classification (0 or 1).

cnn=Sequential([
    Input(shape=(128,128,3)),

    Conv2D(32,(3,3),activation='relu'),
    Conv2D(32,(3,3),activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(64,(3,3),activation='relu'),
    Conv2D(64,(3,3),activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(128,(3,3),activation='relu'),
    Conv2D(128,(3,3),activation='relu'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(128,activation='relu'),
    Dense(64,activation='relu'),
    Dropout(0.5),
    Dense(1,activation='sigmoid')
])

In [4]:
# --- CELL: Setting Up Data Generators (Train, Validation, Test) ---
# This cell uses ImageDataGenerator to load and augment the Face Mask dataset.
#
# TRAINING DATA AUGMENTATION (train_datagen):
#   - rescale=1./255: Normalizes pixel values from [0,255] to [0,1] for faster convergence.
#   - rotation_range=20: Randomly rotates images up to 20 degrees.
#   - zoom_range=0.2: Randomly zooms in/out by 20%.
#   - width/height_shift_range=0.1: Shifts images horizontally/vertically by 10%.
#   - horizontal_flip=True: Randomly flips images horizontally.
#   These augmentations help the model generalize better and reduce overfitting.
#
# VALIDATION & TEST DATA: Only rescaling (no augmentation) to evaluate on unmodified images.
# All images are resized to 128x128, loaded in batches of 32, with binary class mode (2 classes).

train_datagen=ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
    )

val_datagen=ImageDataGenerator(rescale=1/255)
test_datagen=ImageDataGenerator(rescale=1/255)


train_genrator=train_datagen.flow_from_directory(r"C:\Users\elwin\OneDrive\Desktop\CNN data\Face Mask Dataset\Train",
                                                 target_size=(128,128),
                                                batch_size=32,
                                                class_mode='binary')
val_genrator=val_datagen.flow_from_directory(r"C:\Users\elwin\OneDrive\Desktop\CNN data\Face Mask Dataset\Validation",
                                              target_size=(128,128),
                                              batch_size=32,
                                              class_mode='binary')
test_genrator=val_genrator=test_datagen.flow_from_directory(r"C:\Users\elwin\OneDrive\Desktop\CNN data\Face Mask Dataset\Validation",
                                              target_size=(128,128),
                                              batch_size=32,
                                              class_mode='binary')

# OUTPUT: The generator found:
# - 10,000 training images belonging to 2 classes (WithMask, WithoutMask)
# - 800 validation images belonging to 2 classes
# - 800 test images belonging to 2 classes
# This confirms the dataset is correctly loaded and split.

Found 10000 images belonging to 2 classes.
Found 800 images belonging to 2 classes.
Found 800 images belonging to 2 classes.


In [5]:
# --- CELL: Compiling the Model and Displaying Architecture Summary ---
# Compilation configures the model for training:
#   - optimizer='adam': Adaptive learning rate optimizer, efficient for most deep learning tasks.
#   - loss='binary_crossentropy': Loss function for binary classification (mask vs no mask).
#   - metrics=['accuracy']: Tracks classification accuracy during training and evaluation.
#
# model.summary() prints the complete architecture with layer details.

cnn.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
cnn.summary()

# OUTPUT (model.summary):
# The model has 3 convolutional blocks, each with 2 Conv2D layers, BatchNormalization, and MaxPooling2D.
# Key observations from the summary:
# - Input: 128x128x3 images -> progressively reduced spatial dimensions through pooling.
# - Conv layers increase filters: 32 -> 64 -> 128 (capturing increasingly complex features).
# - After Flatten: 18,432 features are fed to Dense layers.
# - The Dense(128) layer has the most parameters (2,359,424) due to the large flattened input.
# - Total params: 2,655,649 (10.13 MB) | Trainable: 2,655,201 | Non-trainable: 448 (from BatchNorm).
# - Non-trainable params come from BatchNormalization's moving mean and variance.

In [6]:
# --- CELL: Training the CNN Model ---
# This cell trains the model on the training data for 14 epochs with validation monitoring.
#   - train_genrator: Feeds augmented training images in batches of 32.
#   - epochs=14: The model will see the entire training dataset 14 times.
#   - validation_data=val_genrator: Evaluates on validation set after each epoch to monitor overfitting.
# The training history is stored in 'hist' for later analysis (e.g., plotting accuracy/loss curves).

hist=cnn.fit(train_genrator,epochs=14,validation_data=val_genrator)

# OUTPUT (Training Logs):
# The model trained for 14 epochs (313 batches/epoch = 10,000 images / 32 batch size).
# Key observations:
# - Epoch 1:  Train Acc = 90.29%, Val Acc = 67.87% (model is learning, val is unstable initially)
# - Epoch 5:  Train Acc = 97.37%, Val Acc = 98.50% (strong improvement, good generalization)
# - Epoch 13: Train Acc = 98.55%, Val Acc = 98.87% (best val accuracy so far)
# - Epoch 14: Train Acc = 98.35%, Val Acc = 99.62% (excellent final val accuracy)
# - Val accuracy fluctuates at epochs 2,8,9,12 (signs of slight overfitting but recovers).
# - Final result: ~98.35% training accuracy and ~99.62% validation accuracy.
# - The model generalizes very well for face mask detection.

Epoch 1/14
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m415s[0m 1s/step - accuracy: 0.9029 - loss: 0.4211 - val_accuracy: 0.6787 - val_loss: 1.9083
Epoch 2/14
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m314s[0m 1s/step - accuracy: 0.9446 - loss: 0.1744 - val_accuracy: 0.4963 - val_loss: 1.2252
Epoch 3/14
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m284s[0m 907ms/step - accuracy: 0.9629 - loss: 0.1070 - val_accuracy: 0.9625 - val_loss: 0.1035
Epoch 4/14
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m274s[0m 876ms/step - accuracy: 0.9673 - loss: 0.0900 - val_accuracy: 0.9737 - val_loss: 0.0850
Epoch 5/14
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m275s[0m 878ms/step - accuracy: 0.9737 - loss: 0.0789 - val_accuracy: 0.9850 - val_loss: 0.0352
Epoch 6/14
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m274s[0m 874ms/step - accuracy: 0.9746 - loss: 0.0775 - val_accuracy: 0.9775 - val_loss: 0.0736
Epoch 7/14

In [9]:
# --- CELL: Setting Up the Gradio Web Interface for Face Mask Detection ---
# This cell creates a Gradio-based interactive web app for real-time face mask prediction.
#
# Prediction function (mas_detection):
#   1. Converts the uploaded numpy image to a PIL Image.
#   2. Resizes to 128x128 (same as training input size).
#   3. Normalizes pixel values to [0,1] by dividing by 255.
#   4. Expands dimensions to add batch axis: (128,128,3) -> (1,128,128,3).
#   5. Runs prediction through the trained CNN model.
#   6. If prediction < 0.5 -> 'without mask'; else -> 'with mask' (sigmoid output interpretation).
#
# Gradio Interface:
#   - Input: Image upload widget (numpy format).
#   - Output: Textbox displaying prediction result with confidence score.

models=cnn

# image preprocessing funtion
def mas_detection(image):
    image=Image.fromarray(image)
    image=image.resize((128,128))
    image=np.array(image)/255.0
    image=np.expand_dims(image,axis=0)

    predictions=models.predict(image)[0][0]
    if predictions < 0.5 :
        return f'without mask (confidence :{predictions: 2f})'
    else :
        return f'with mask (confidence :{predictions: 2f})'

interface=gr.Interface(
    fn=mas_detection,
    inputs=gr.Image(type='numpy',label='Upload Image'),
    outputs=gr.Textbox(label='Prediction'),
    title='mask image classifier',
    description='upload an image to identify the with or without mask'
)

In [10]:
# --- CELL: Launching the Gradio Web Application ---
# This launches the Gradio interface as a local web server.
# Users can upload face images and get instant mask/no-mask predictions.

interface.launch()

# OUTPUT: The Gradio app is running on http://127.0.0.1:7861
# An interactive iframe is embedded in the notebook for direct use.
# When an image is uploaded, the model processes it (1/1 batch) and returns the prediction.
# To create a publicly shareable link, use interface.launch(share=True).

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
