# Test case of the model

# Actions

>Test the model with real time video data

> Confirm its accuracy on the given metrics

In [1]:
from __future__ import print_function, division
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
import torch
import torch.nn.functional as F
from torchvision.models.resnet import ResNet, BasicBlock
from torchvision.transforms import transforms
from torch.utils.data import DataLoader,Dataset
from torchvision import models
from torch.autograd import Variable
from random import randint
from tqdm.autonotebook import tqdm
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import os
import copy
plt.ion()   # interactive mode
import warnings
warnings.filterwarnings("ignore")
import cv2

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, 5)
        self.max1 = nn.MaxPool2d(3, stride=2)
        self.conv2 = nn.Conv2d(64, 64, 5)
        self.max2 = nn.MaxPool2d(3,stride=2)
        self.conv3 = nn.Conv2d(64, 128, 4)
        self.fc1 = nn.Linear(128 * 5 * 5 , 3072)
        self.fc2 = nn.Linear(3072,7)
        self.fc3 = nn.Softmax()
        
    def forward(self, x):
        x = self.max1(F.relu(self.conv1(x)))
        x = self.max2(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = F.dropout(x)
        x = x.view(-1, 128*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

In [3]:
model = Net()
print(model)

Net(
  (conv1): Conv2d(1, 64, kernel_size=(5, 5), stride=(1, 1))
  (max1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(64, 64, kernel_size=(5, 5), stride=(1, 1))
  (max2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(64, 128, kernel_size=(4, 4), stride=(1, 1))
  (fc1): Linear(in_features=3200, out_features=3072, bias=True)
  (fc2): Linear(in_features=3072, out_features=7, bias=True)
  (fc3): Softmax(dim=None)
)


In [4]:
# Definign the loss and optimizer..
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adadelta(model.parameters())

In [5]:
model = model.cpu()

In [6]:
device = torch.device('cpu')
model.load_state_dict(torch.load('model_save_state.pt', map_location=device))

<All keys matched successfully>

# How it works
> Using OpenCV to access webcam for taking the input video and converts into a series of image frames.

>For each frame, we run the Haar cascade model from OpenCV to locate faces and crop it out from the frame.

>Pass those cropped out frames of detected faces into our trained model to find relevant facial features

> Display the bounding box, and all the features detected back on the frame while running the script
> Optionally save the video stream

<img src="images/image.png" width="800" height="600"/>

In [None]:
faceCascade = cv2.CascadeClassifier("haarcascade_frontalface_alt2.xml")
video_capture = cv2.VideoCapture(0)
target = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']
font = cv2.FONT_HERSHEY_SIMPLEX
while True:
    # Capture frame-by-frame
    ret, frame = video_capture.read()

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    faces = faceCascade.detectMultiScale(gray, scaleFactor=1.1)

    # Draw a rectangle around the faces
    for (x, y, w, h) in faces:
        cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2, 5)
        face_crop = frame[y:y + h, x:x + w]
        face_crop = cv2.resize(face_crop, (48, 48))
        face_crop = cv2.cvtColor(face_crop, cv2.COLOR_BGR2GRAY)
        face_crop = face_crop.astype('float32') / 255
        face_crop = face_crop.reshape(1, 1, face_crop.shape[0], face_crop.shape[1])
        
        face_crop = torch.from_numpy(face_crop)
        output = model(face_crop)
        output = torch.argmax(output, dim=1).numpy()
        
        result = target[int(output)]
        cv2.putText(frame, result, (x, y), font, 1, (200, 0, 0), 3, cv2.LINE_AA)

    # Display the resulting frame
    cv2.imshow('Video', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything is done, release the capture
video_capture.release()
cv2.destroyAllWindows()