## Setting Jupyter Notebook

### Install the necessary packages.

In [None]:
!pip install tensorflow==2.4.3

### Download the necessary archive.

#### If you want to download only archive:

In [None]:
!wget --load-cookies ~/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies ~/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1XQCXaVy5YVoLPF9V2mR9MAGQXElbh-53' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1XQCXaVy5YVoLPF9V2mR9MAGQXElbh-53" -O mask_archive_v0.3.zip && rm -rf ~/cookies.txt

#### If you want to download archive with model:

In [None]:
!wget --load-cookies ~/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies ~/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1b29DgQo821Spa4m0eplq0RlLCvQn1f74' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1b29DgQo821Spa4m0eplq0RlLCvQn1f74" -O mask_archive_with_model_v0.3.zip && rm -rf ~/cookies.txt

### Unzip archive and get directory.

#### If you want to unzip and get only archive:

In [None]:
!unzip -qq mask_archive_v0.3.zip
%cd mask_archive_v0.3

#### If you want to unzip and get archive with model:

In [None]:
!unzip -qq mask_archive_with_model_v0.3.zip
%cd mask_archive_with_model_v0.3

## Main Code

### Import modules.

In [None]:
# Import modules.
import os
import cv2
import imutils
import numpy as np
import matplotlib.pyplot as plt
from imutils import paths
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

### Function to display images in Jupyter Notebooks and Google Colab.

In [None]:
def plt_imshow(title, image):
    # Convert the image frame BGR to RGB color space and display it.
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.imshow(image)
    plt.title(title)
    plt.grid(False)
    plt.show()

### Implementing face mask recognizer training script with Keras and TensorFlow.

In [None]:
def recognize_and_predict_mask(frame, faceNet, maskNet):
    # Hold the dimensions of the frame and then create a block in the frame.
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
        (104.0, 177.0, 123.0))

    # Pass the blob through the network and acquire face recognition.
    faceNet.setInput(blob)
    recognitions = faceNet.forward()

    # Initialize the face list, corresponding location, and prediction list of face mask networks.
    faces = []
    locs = []
    preds = []

    # Loop over the recognitions.
    for i in range(0, recognitions.shape[2]):
        
        # Extract the confidence (like probability) related to recognition.
        confidence = recognitions[0, 0, i, 2]

        # Filter weak recognition by checking whether the confidence is greater than the minimum confidence.
        if confidence > args["confidence"]:
            
            # Calculate the (x, y) coordinates of the bounding box for the object.
            box = recognitions[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # Make sure the boundary boxes is within frame dimensions.
            (startX, startY) = (max(0, startX), max(0, startY))
            (endX, endY) = (min(w - 1, endX), min(h - 1, endY))

            # Extract the face ROI, convert it from BGR to RGB channel ordering, resize it to 224x224, and preprocess it.
            face = frame[startY:endY, startX:endX]
            face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
            face = cv2.resize(face, (224, 224))
            face = img_to_array(face)
            face = preprocess_input(face)

            # Add the face and bounding boxes to their respective lists.
            faces.append(face)
            locs.append((startX, startY, endX, endY))

    # Only make a predictions if at least one face was recognized
    if len(faces) > 0:
        # For faster inference we'll make batch predictions on all faces at the same time rather than one-by-one predictions in the above `for` loop.
        faces = np.array(faces, dtype="float32")
        preds = maskNet.predict(faces, batch_size=32)

    # Return a 2-tuple of the face locations and their corresponding locations.
    return (locs, preds)

In [None]:
# Parse the arguments.
# ap = argparse.ArgumentParser()
# ap.add_argument("-d", "--dataset", type=str, default="dataset",
#     help="the path of the input dataset")
# ap.add_argument("-p", "--plot", type=str, default="loss_acc_plot.png",
#     help="the path of output loss/accuracy plot")
# ap.add_argument("-m", "--model", type=str,
#     default="maskRecognizer.model",
#     help="the path to output the face mask recognizer model")
# args = vars(ap.parse_args())

# Since we are using Jupyter Notebooks we can replace our argument parsing code with hard coded arguments and values.
args = {
    "dataset": "dataset",
    "plot": "loss_acc_plot.png",
    "model": "mask_recognizer.model"
}

In [None]:
# Set init learning rate, epochs, and batch size.
INIT_LR = 1e-4
EPOCHS = 20
BS = 32

In [None]:
# Get the image list from the dataset directory, and then initialize the data(images) and class image list.
print("Loading images...")
imagePaths = list(paths.list_images(args["dataset"]))
data = []
labels = []

# Loop over the image paths.
for imagePath in imagePaths:
    # Extract class labels from file names.
    label = imagePath.split(os.path.sep)[-2]

    # Load the 224x224 input image and preprocess it.
    image = load_img(imagePath, target_size=(224, 224))
    image = img_to_array(image)
    image = preprocess_input(image)

    # Update the data and label list, respectively.
    data.append(image)
    labels.append(label)

# Convert data and labels to NumPy array.
data = np.array(data, dtype="float32")
labels = np.array(labels)

# Perform one-hot encoding on the labels.
lb = LabelBinarizer()
labels = lb.fit_transform(labels)
labels = to_categorical(labels)

In [None]:
# Partition the data into training and testing splits using 75% of the data for training and the remaining 25% for testing.
(trainX, testX, trainY, testY) = train_test_split(data, labels,
    test_size=0.20, stratify=labels, random_state=42)

# Construct the training image generator for data augmentation.
aug = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.15,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    horizontal_flip=True,
    fill_mode="nearest")

In [None]:
# Load the MobileNetV2 network to ensure that the head FC layer set is left off.
baseModel = MobileNetV2(weights="imagenet", include_top=False,
    input_tensor=Input(shape=(224, 224, 3)))

# Construct the head of the model to be placed on top of the base model.
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(128, activation="relu")(headModel)
headModel = Dropout(0.5)(headModel)
headModel = Dense(2, activation="softmax")(headModel)

# Place the head FC model on top of the base model (it will be the actual model we will train).
model = Model(inputs=baseModel.input, outputs=headModel)

# Repeat to all layers of the base model to fix it so that it is not updated during the first training process.
for layer in baseModel.layers:
    layer.trainable = False

In [None]:
# Compile our model.
print("Compiling model...")
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="binary_crossentropy", optimizer=opt,
    metrics=["accuracy"])

# Train the head of the network.
print("Training head...")
H = model.fit(
    aug.flow(trainX, trainY, batch_size=BS),
    steps_per_epoch=len(trainX) // BS,
    validation_data=(testX, testY),
    validation_steps=len(testX) // BS,
    epochs=EPOCHS)

In [None]:
# Make predictions on the testing set
print("Evaluating network...")
predIdxs = model.predict(testX, batch_size=BS)

# For each image in the testing set we need to find the index of the label with corresponding largest predicted probability.
predIdxs = np.argmax(predIdxs, axis=1)

# Show a nicely formatted classification report.
print(classification_report(testY.argmax(axis=1), predIdxs,
    target_names=lb.classes_))

# Serialize the model to disk.
print("Saving mask recognizer model...")
model.save(args["model"], save_format="h5")

# Make a plot  the training loss and accuracy.
N = EPOCHS
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy on Mask Recognition")
plt.xlabel("Epoch")
plt.ylabel("Loss / Accuracy")
plt.legend(loc="lower left")
plt.show()

### Implementing face mask recognizer for images with OpenCV.

In [None]:
# Parse the arguments.
# ap = argparse.ArgumentParser()
# ap.add_argument("-i", "--image", required=True,
#     help="the path of input image")
# ap.add_argument("-f", "--face", type=str,
#     default="face_recognizer",
#     help="the path of face recognizer model directory")
# ap.add_argument("-m", "--model", type=str,
#     default="maskRecognizer.model",
#     help="the path of trained face mask recognizer model")
# ap.add_argument("-c", "--confidence", type=float, default=0.5,
#     help="minimum probability to filter weak recognitions")
# args = vars(ap.parse_args())

# Since we are using Jupyter Notebooks we can replace our argument parsing code with hard coded arguments and values.
args = {
    "image": "assets/image/mask-1.png",
    "face": "face_recognizer",
    "model": "mask_recognizer.model",
    "confidence": 0.5
}

In [None]:
# Load our serialized face recognizer model from disk.
print("Loading face recognizer model...")
prototxtPath = os.path.sep.join([args["face"], "deploy.prototxt"])
weightsPath = os.path.sep.join([args["face"],
    "res10_300x300_ssd_iter_140000.caffemodel"])
net = cv2.dnn.readNet(prototxtPath, weightsPath)

# Load the face mask recognizer model from disk.
print("Loading face mask recognizer model...")
model = load_model(args["model"])

In [None]:
# Load the input image from disk, clone it, and grab the image spatial dimensions.
image = cv2.imread(args["image"])
orig = image.copy()
(h, w) = image.shape[:2]

# Construct a blob from the image.
blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300),
    (104.0, 177.0, 123.0))

# Pass the blob through the network and obtain the face recognitions.
print("Computing face recognitions...")
net.setInput(blob)
recognitions = net.forward()

In [None]:
# Loop over the recognitions.
for i in range(0, recognitions.shape[2]):
    # Extract the confidence (i.e., probability) associated with the recognition.
    confidence = recognitions[0, 0, i, 2]

    # Filter out weak recognitions by ensuring the confidence is greater than the minimum confidence.
    if confidence > args["confidence"]:
        # Compute the (x, y)-coordinates of the bounding box for the object.
        box = recognitions[0, 0, i, 3:7] * np.array([w, h, w, h])
        (startX, startY, endX, endY) = box.astype("int")

        # Ensure the bounding boxes fall within the dimensions of the frame.
        (startX, startY) = (max(0, startX), max(0, startY))
        (endX, endY) = (min(w - 1, endX), min(h - 1, endY))

        ## Extract the face ROI, convert it from BGR to RGB channel ordering, resize it to 224x224, and preprocess it.
        face = image[startY:endY, startX:endX]
        face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
        face = cv2.resize(face, (224, 224))
        face = img_to_array(face)
        face = preprocess_input(face)
        face = np.expand_dims(face, axis=0)

        # Pass the face through the model to determine if the face has a mask or not.
        (mask, withoutMask) = model.predict(face)[0]

        # Determine the class label and color we'll use to draw the bounding box and text.
        label = "MASK" if mask > withoutMask else "NO MASK"
        color = (0, 255, 0) if label == "MASK" else (0, 0, 255)

        # Include the probability in the label.
        label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)

        # Display the label and bounding box rectangle on the output frame.
        cv2.putText(image, label, (startX, startY - 10),
            cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
        cv2.rectangle(image, (startX, startY), (endX, endY), color, 2)

# Show the output image.
plt_imshow("MASK RECOGNITION RESULT", image)

### Implementing face mask recognizer in real-time video streams with OpenCV.

In [None]:
def recognize_and_predict_mask(frame, faceNet, maskNet):
    # Hold the dimensions of the frame and then create a block in the frame.
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300),
        (104.0, 177.0, 123.0))

    # Pass the blob through the network and acquire face recognition.
    faceNet.setInput(blob)
    recognitions = faceNet.forward()

    # Initialize the face list, corresponding location, and prediction list of face mask networks.
    faces = []
    locs = []
    preds = []

    # Loop over the recognitions.
    for i in range(0, recognitions.shape[2]):
        # Extract the confidence (like probability) related to recognition.
        confidence = recognitions[0, 0, i, 2]

        # Filter weak recognition by checking whether the confidence is greater than the minimum confidence.
        if confidence > args["confidence"]:
            # Calculate the (x, y) coordinates of the bounding box for the object.
            box = recognitions[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # Make sure the boundary boxes is within frame dimensions.
            (startX, startY) = (max(0, startX), max(0, startY))
            (endX, endY) = (min(w - 1, endX), min(h - 1, endY))

            # Extract the face ROI, convert it from BGR to RGB channel ordering, resize it to 224x224, and preprocess it.
            face = frame[startY:endY, startX:endX]
            face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
            face = cv2.resize(face, (224, 224))
            face = img_to_array(face)
            face = preprocess_input(face)

            # Add the face and bounding boxes to their respective lists.
            faces.append(face)
            locs.append((startX, startY, endX, endY))

    # Only make a predictions if at least one face was recognized.
    if len(faces) > 0:
        # For faster inference we'll make batch predictions on all faces at the same time rather than one-by-one predictions in the above `for` loop.
        faces = np.array(faces, dtype="float32")
        preds = maskNet.predict(faces, batch_size=32)

    # Return a 2-tuple of the face locations and their corresponding locations.
    return (locs, preds)

In [None]:
# # construct the argument parser and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-f", "--face", type=str,
# 	default="face_recognizer",
# 	help="path to face recognizer model directory")
# ap.add_argument("-m", "--model", type=str,
# 	default="mask_recognizer.model",
# 	help="path to trained face mask recognizer model")
# ap.add_argument("-c", "--confidence", type=float, default=0.5,
# 	help="minimum probability to filter weak recognitions")
# args = vars(ap.parse_args())

# Since we are using Jupyter Notebooks we can replace our argument parsing code with hard coded arguments and values.
args = {
    "input": "assets/video/CDC_mask_720.mp4",
    "output": "mask_recognize_output.avi",
    "face": "face_recognizer",
    "model": "mask_recognizer.model",
    "confidence": 0.5
}

In [None]:
# Load our serialized face recognizer model from disk.
print("Loading face recognizer model...")
prototxtPath = os.path.sep.join([args["face"], "deploy.prototxt"])
weightsPath = os.path.sep.join([args["face"],
    "res10_300x300_ssd_iter_140000.caffemodel"])
faceNet = cv2.dnn.readNet(prototxtPath, weightsPath)

# Load the face mask recognizer model from disk.
print("Loading face mask recognizer model...")
maskNet = load_model(args["model"])

# Grab a reference to the video file and initialize pointer to output video file.
print("Opening video file...")
vs = cv2.VideoCapture(args["input"])
writer = None

In [None]:
# Loop over the frames from the video stream.
while True:
    # Grab the next frame.
    frame = vs.read()[1]

    # If we did not grab a frame then we have reached the end of the video.
    if frame is None:
        break

    # Resize the frame to have a maximum width of 400 pixels.
    frame = imutils.resize(frame, width=400)

    # Recognize faces in the frame and determine if they are wearing a face mask or not.
    (locs, preds) = recognize_and_predict_mask(frame, faceNet, maskNet)

    # Loop over the recognized face locations and their corresponding locations.
    for (box, pred) in zip(locs, preds):
        # Unpack the bounding box and predictions.
        (startX, startY, endX, endY) = box
        (mask, withoutMask) = pred

        # Determine the class label and color we'll use to draw the bounding box and text.
        label = "MASK" if mask > withoutMask else "NO MASK"
        color = (0, 255, 0) if label == "MASK" else (0, 0, 255)

        # Include the probability in the label.
        label = "{}: {:.2f}%".format(label, max(mask, withoutMask) * 100)

        # Display the label and bounding box rectangle on the output frame.
        cv2.putText(frame, label, (startX, startY - 10),
            cv2.FONT_HERSHEY_SIMPLEX, 0.45, color, 2)
        cv2.rectangle(frame, (startX, startY), (endX, endY), color, 2)

    # If the video writer is None and we are supposed to write the output video to disk initialize the writer.
    if writer is None and args["output"] is not None:
        fourcc = cv2.VideoWriter_fourcc(*"MJPG")
        writer = cv2.VideoWriter(args["output"], fourcc, 20,
            (frame.shape[1], frame.shape[0]), True)

    # If the writer is not None, write the frame to disk.
    if writer is not None:
        writer.write(frame)

# Do a bit of cleanup.
vs.release()

# Check to see if the video writer point needs to be released.
if writer is not None:
    writer.release()

If you are interested to view the video within Google Colab just execute the following code blocks.

Our output video is produced in `.avi` format. First, we need to convert it to `.mp4` format.

In [None]:
!ffmpeg -i mask_recognize_output.avi mask_recognize_output.mp4

In [None]:
# Display video inline.
from IPython.display import HTML
from base64 import b64encode

mp4 = open("mask_recognize_output.mp4", "rb").read()
dataURL = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
    <source src="%s" type="video/mp4">
</video>
""" % dataURL)