In [13]:
import numpy as np
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
import os

In [10]:
class CNN(nn.Module): # num_of_class = 7  48*48*1
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 8, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(8, 16, 3)
        self.conv3 = nn.Conv2d(16, 64, 3)
        self.fc1 = nn.Linear(16*20*20, 512)
        self.fc2 = nn.Linear(512, 144)
        self.fc3 = nn.Linear(144,7)
        self.drop = nn.Dropout(0.5)
  
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.drop(x)
        x = F.relu(self.fc2(x))
        x = self.drop(x)
        x = self.fc3(x)
        return x
    
net = CNN()

In [16]:
classes = {0:'Angry', 1:'Disgust',
          2:'Fear', 3:'Happy',
          4:'Sad', 5:'Surprise', 
          6:'Neutral'}

In [17]:
def predict(img, model):
    x = img.unsqueeze(0)
    y = model(x)
    _, preds  = torch.max(y, dim=1)
    return classes[preds[0].item()]

In [11]:
PATH = './emo_net.pth'
net.load_state_dict(torch.load(PATH))
net.eval()

CNN(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=6400, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=144, bias=True)
  (fc3): Linear(in_features=144, out_features=7, bias=True)
  (drop): Dropout(p=0.5, inplace=False)
)

In [12]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [14]:
filepath = os.path.join(os.getcwd())

In [18]:
vid = cv2.VideoCapture(os.path.join(filepath, "Test Video.mp4"))
net.to(device)

fourcc = cv2.VideoWriter_fourcc(*'XVID')
cv2.namedWindow('frame',0)
cv2.resizeWindow('frame', 1440, 720)
out = cv2.VideoWriter('demo.avi', fourcc, 20.0, (1440, 720))

while True:
    ret, frame = vid.read()
    frame = cv2.resize(frame, (1440, 720))
    if not ret:
        break
    face_detector = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    num_faces = face_detector.detectMultiScale(gray_frame, scaleFactor=1.3, minNeighbors=4)

    for (x, y, w, h) in num_faces:
        cv2.rectangle(frame, (x, y-50), (x+w+20, y+h+20), (0, 255, 0), 4)
        roi_gray_frame = gray_frame[y:y + h, x:x + w]
        cropped_img = cv2.resize(roi_gray_frame, (48, 48))
        cropped_img = np.expand_dims(cropped_img, 0)
        cropped_img = torch.from_numpy(cropped_img).float()
        cropped_img = cropped_img.to(device)

        emotion_prediction = predict(cropped_img, net)
        cv2.putText(frame, emotion_prediction, (x+5, y-20), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2, cv2.LINE_AA)
        
    cv2.imshow('Emotion Detection', frame)
    if cv2.waitKey(100) & 0xFF == ord('q'):
        break
    out.write(frame)
out.release()
vid.release()
cv2.destroyAllWindows()

In [None]:
# Run Above