In [4]:
import os
import cv2
import time
from tqdm import tqdm
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, Dense, MaxPooling2D, Flatten, Activation, Dropout

In [5]:
img_size = 100
datadir = r'dataset'
CATEGORIES = os.listdir(datadir)
print(CATEGORIES)

['with_mask', 'without_mask']


In [6]:
x, y = [], [] #empty list for storing the images and their labels

#PreProcess() function reads images from dataset directory, resizes them to a specified size, 
#normalizes their pixel values to be between 0 and 1 and stores them in a list x and y.

def PreProcess(): 
    for category in CATEGORIES:
        path = os.path.join(datadir, category)
        classIndex = CATEGORIES.index(category)
        print(path)
        for imgs in tqdm(os.listdir(path)):
            img_arr = cv2.imread(os.path.join(path, imgs))
            
            # resize the image
            resized_array = cv2.resize(img_arr, (img_size, img_size)) #esizes them to img_size 
            cv2.imshow("images", resized_array)
            cv2.waitKey(1)
            resized_array = resized_array/255.0  #images are normalized by dividing each pixel value by 255.0
            x.append(resized_array)
            y.append(classIndex)
            
PreProcess()
cv2.destroyAllWindows()  #close any open windows from displaying images using cv2.imshow()

dataset/with_mask


100%|███████████████████████████████████████| 2994/2994 [00:44<00:00, 66.56it/s]


dataset/without_mask


100%|███████████████████████████████████████| 2994/2994 [00:44<00:00, 66.89it/s]


In [7]:
#splitting the dataset into training and testing sets, and then converting 
#the images and labels into numPy arrays that can be used as input and output to machine learning model.

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42) 
#randomly split the dataset into training and testing set.
#The test_size parameter is set to 0.20, meaning that 20% of the data will be used for testing,
#and the remaining 80% will be used for training. 

X_train = np.array(X_train).reshape(-1, img_size, img_size, 3)
y_train = np.array(y_train)
X_test = np.array(X_test).reshape(-1, img_size, img_size, 3)
y_test = np.array(y_test)

#The training data is stored in X_train and y_train, while the testing data is stored in X_test and y_test.

In [8]:
#This code defines a convolutional neural network (CNN) model using the Keras API. 
#The model consists of four convolutional layers, each followed by an activation function, and two max pooling layers.
#The final layer is a softmax activation layer that outputs the predicted class probabilities.

batch_size = 32
epochs = 15

model = Sequential()

model.add(Conv2D(64,(3, 3), input_shape=(img_size, img_size, 3))) #layer with 64 filters of size (3, 3)
model.add(Activation('relu'))    #relu activation function
model.add(MaxPooling2D(pool_size = (2, 2))) #layer with pool size (2, 2)

model.add(Conv2D(256, (3, 3))) #layer with 256 filters of size (3, 3)
model.add(Activation('relu'))  #relu activation function
model.add(MaxPooling2D(pool_size = (2, 2)))

model.add(Conv2D(128, (3, 3))) #layer with 128 filters of size (3, 3)
model.add(Activation('relu'))  #relu activation function
model.add(Dropout(0.25)) #layer with a rate of 0.25 to prevent overfitting

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))

model.add(Dropout(0.25))
model.add(Flatten()) #to flatten the output from the convolutional layers to a 1D array
model.add(Dense(100, activation='relu')) #layer with 100 neurons and relu activation function
model.add(Dense(16, activation='relu')) #layer with 16 neurons and relu activation function.


model.add(Dense(len(CATEGORIES))) #layer with number of neurons equal to the number of categories
model.add(Activation('softmax')) #softmax activation function

model.compile(optimizer='adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy']) #compile the model
model.summary() # prints out a summary of the model architecture, including the number of parameters in each layer

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 98, 98, 64)        1792      
                                                                 
 activation (Activation)     (None, 98, 98, 64)        0         
                                                                 
 max_pooling2d (MaxPooling2D  (None, 49, 49, 64)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 47, 47, 256)       147712    
                                                                 
 activation_1 (Activation)   (None, 47, 47, 256)       0         
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 23, 23, 256)      0         
 2D)                                                    

In [9]:
t1 = time.time()

# raining the model using the fit() method
#trains the model for 5 epochs 
#validation split of 0.3, meaning that 30% of the training data is used for validation during training
#verbose parameter is set to 1, which means that progress updates are printed during training

model.fit(X_train, y_train, batch_size = batch_size, epochs=5, validation_split=0.3, verbose = 1)
model.save('{}.h5'.format("model")) #it is saved as an h5 file using the save method

t2 = time.time()
print('Time taken: ',t2-t1) #time taken to train the model is printed to the console

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Time taken:  796.2538619041443


face mask detection using the trained model.  

In [10]:
import cv2
import mediapipe as mp
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model

#Load the model
model = load_model('model.h5')

#Define mediapipe Face detector

face_detection = mp.solutions.face_detection.FaceDetection()

#Detection function
#This is a function detect faces in input frame, extract the bounding box coordinates of the detected face, 
#and return them as (x, y, w, h) tuple, where (x,y) represents the top-left corner of the bounding box 
#and (w,h) represent the width and height of the bounding box, respectively. 
#The function first converts the input frame from the BGR color space to the RGB color space, 
#since the mediapipe face detection model requires images in RGB format. 
#If no face is detected in the input frame, the function returns None.

def get_detection(frame):

    height, width, channel = frame.shape

    # Convert frame BGR to RGB colorspace

    imgRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # Detect results from the frame
    
    result = face_detection.process(imgRGB)


    try:
        for count, detection in enumerate(result.detections):

            # print(detection)

        
            # Extract bounding box information 
            
            box = detection.location_data.relative_bounding_box

            x, y, w, h = int(box.xmin*width), int(box.ymin * height), int(box.width*width), int(box.height*height)
            
    # If detection is not available then pass 
    except:
        pass

    return x, y, w, h

#It captures video frames from the webcam, detects faces using the MediaPipe face detection library, 
#crops the face region, resizes it to 100x100 pixels, and feeds it to model.h5 to predict whether 
#the person is wearing a mask or not. 
#The predicted result is displayed on the video frame along with a bounding box around the detected face region.
#If the person is wearing a mask, the bounding box is green, otherwise, it is red

CATEGORIES = ['without_mask', 'with_mask']
cap = cv2.VideoCapture(0)
while True:
    _, frame = cap.read() #read from the camera and a copy of the frame is created
    img = frame.copy()
    try:
        x, y, w, h = get_detection(frame) #function is called to detect the face in the current frame
        
        crop_img = img[y:y+h, x:x+w] #the face is cropped from the original frame using the x, y, w, h values
        
        crop_img = cv2.resize(crop_img, (100, 100)) #the cropped image is resized to 100x100 pixels
        
        crop_img = np.expand_dims(crop_img, axis=0)
        
        # get the prediction from the model.
        prediction = model.predict(crop_img) # returns a prediction array of probabilities
        print(prediction)
        index = np.argmax(prediction) #index of the highest probability is determined
        res = CATEGORIES[index] #class label is assigned
        if index == 0:
            color = (0, 0, 255)
        else:
            color = (0, 255, 0)
        cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2) #colored rectangle is drawn around the face in the frame
        cv2.putText(frame, res, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX,
                                 0.8, color, 2, cv2.LINE_AA)  #label is displayed
                                 #The color of the rectangle and the text depends on the predicted class label

    except:
        pass
    
    cv2.imshow("frame", frame)
    key=cv2.waitKey(1) #waits for a key to be pressed.If no key is pressed within the specified time, it returns -1
    
    if(key==27):    #press Esc key to exit
        break
        #the program will break out of the loop and the camera will be released     
        
cap.release()
cv2.destroyAllWindows() #all windows will be destroyed


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


[[0. 1.]]
[[1. 0.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[0. 1.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1.0000000e+00 1.1170327e-15]]
[[1. 0.]]
[[1. 0.]]
[[0. 1.]]
[[0. 1.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[0. 1.]]
[[1. 0.]]
[[2.3991742e-10 1.0000000e+00]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[0. 1.]]
[[0. 1.]]
[[1. 0.]]
[[1. 0.]]
[[1.000000e+00 5.173269e-08]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[1.0000000e+00 4.6535564e-38]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[1.7608467e-05 9.9998236e-01]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1. 0.]]
[[1.000000e+00 2.055977e-33]]
[[1. 0.]]
[[0. 1.]]
[[

[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[1. 0.]]
[[1. 0.]]
[[0. 1.]]
[[1. 0.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[2.8257574e-15 1.0000000e+00]]
[[0. 1.]]
[[1. 0.]]
[[0.99461347 0.00538653]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[0. 1.]]
[[