# Deploying Our Model

This file deploys our trained model by feeding live images using OpenCV. We use a cascade classifier to detect the face(s) from a webcam and feed the face image into our CNN. Our CNN returns a result (mask, no mask, incorrectly worn mask) and we display the result back onto the screen.

In [4]:
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [5]:
face_detector = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

## Test On Static Image
First test the cascade classifier on a static image as videos are a series of static images. Running the code shows that it works but not too well. However this is not a pressing issue as just because the classifier cannot detect the face at the particular moment in time, it may be able to detect it later.

In [3]:
input_img = cv2.imread('group_img_mask.jpg')
gray = cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)
faces = face_detector.detectMultiScale(gray, scaleFactor=1.2, minNeighbors =5)

for (x, y, w, h) in faces:
    cv2.rectangle(input_img, (x, y), (x + w, y + h), (255, 0, 0), 2)
    
cv2.imshow('image', input_img)
cv2.waitKey(0)
cv2.destroyAllWindows()

## Test On Live 
First we need to re-define our CNN models

In [3]:
class Basic_CNN(nn.Module):
    # 7 layers: 2 conv, 2 max pool, 2 linear, 1 output
    def __init__(self):
        self.name = "Basic_CNN"
        super(Basic_CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 5, 5) #in_channels=3, out_chanels=5, kernel_size=5
        self.pool = nn.MaxPool2d(2, 2) #kernel_size=2, stride=2 
        self.conv2 = nn.Conv2d(5, 10, 5) #in_channels=5, out_chanels=10, kernel_size=5
        self.fc1 = nn.Linear(10*57*57, 32)
        self.fc2 = nn.Linear(32, 3) 
    def weights_init(m):
        if isinstance(m, nn.Conv2d):
            nn.init.xavier_uniform(m.weight.data)
            nn.init.xavier_uniform(m.bias.data)

    def forward(self, img):
        x = self.pool(F.relu(self.conv1(img))) #relu activation function
        x = self.pool(F.relu(self.conv2(x))) #relu activation function
        x = x.view(-1, 10*57*57)
        x = F.relu(self.fc1(x)) #relu activation function
        x = self.fc2(x)
        return x

In [6]:
# Create a neural net class
class CNN_V2(nn.Module):
    def __init__(self, num_classes=3):
        super(CNN_V2, self).__init__()
        self.name = "CNN_V2"
        
        # Our images are RGB, so we have input channels = 3. 
        #12 filters in the first convolutional layer
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        
        # A second convolutional layer takes 12 input channels, and generates 24 outputs
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
        
        # We in the end apply max pooling with a kernel size of 2
        self.pool = nn.MaxPool2d(kernel_size=2)
        
        # A drop layer deletes 20% of the features to help prevent overfitting
        self.drop = nn.Dropout2d(p=0.2)
        
        # 240x240 image tensors will be pooled twice with a kernel size of 2. 240/2/2 is 60.
        # Thus 60x60 tensors and 24 tensors
        self.fc = nn.Linear(in_features=60 * 60 * 24, out_features=num_classes)

    def forward(self, x):
        # In the forward function, pass the data through the layers we defined in the init function
        
        # Use a ReLU activation function after layer 1 (convolution 1 and pool)
        x = F.relu(self.pool(self.conv1(x))) 
        
        # Use a ReLU activation function after layer 2
        x = F.relu(self.pool(self.conv2(x)))  
        
        # Select some features to drop to prevent overfitting (only drop during training)
        x = F.dropout(self.drop(x), training=self.training)
        
        # Flatten
        x = x.view(-1, 60 * 60 * 24)
        # Feed to fully-connected layer to predict class
        x = self.fc(x)
        # Return class probabilities via a log_softmax function 
        return torch.log_softmax(x, dim=1)

## Load Deployable Model
Here we load the deployable model from our models folder. This model will be receiving live data from OpenCV

In [8]:
model = CNN_V2()
model_path = "models/model_{0}_bs{1}_lr{2}_epoch{3}".format("CNN_V2", 32, 0.0001, 18)
state = torch.load(model_path)
model.load_state_dict(state)

<All keys matched successfully>

In [11]:
labels_dict={0:'INCORRECT MASK',1:'MASK',2:'NO MASK'} 
color_dict={0:(0,255,255),1:(0,255,0),2:(0,0,255)} #BGR color codes

In [12]:
source = cv2.VideoCapture(0) #Change parameter if you have different webcams, 0 for default

while(True):    
    
    #Read from webcam
    ret,img = source.read()
    
    #OpenCV convert to grayscale for haarcascade classifier
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_detector.detectMultiScale(gray, scaleFactor=1.2, minNeighbors =5)
    
    #OpenCV convert to BGR to RGB for CNN
    rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    #Iterate through all faces detected
    for (x, y, w, h) in faces:
        
        #Boundaries for face
        rgb_face = rgb[y:y+w, x:x+w]
        
        #Resize & Normalize
        resized_face_img = cv2.resize(rgb_face, (240,240)) /255.0
        
        #Reshape tensor
        model_input = torch.tensor(resized_face_img).unsqueeze(0).permute(0,3,1,2) 
        
        #Model Prediction
        output = model(model_input.float())
        label = output.max(1, keepdim=True)[1].sum().item()
        
        #Draws boxes around faces
        cv2.rectangle(img, (x, y), (x + w, y + h), color_dict[label], 2)
        
    
    cv2.imshow('Detecting Masks... Press esc to exit',img)
    key=cv2.waitKey(1)
    
    #Exit - Esc key
    if(key==27):
        break
    
cv2.waitKey(0)
cv2.destroyAllWindows()