In [1]:
import cv2
import torch

import torch
import torch.nn as nn
import numpy as np
import torchvision
from PIL import Image
from matplotlib import cm


In [2]:
## fer map
emotion_dict = {0: "Anger", 1: "Disgust", 2: "Fear", 3: "Happiness", 4: "Sadness", 5: "Surprise", 6: "Neutral"}


class Config:
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            setattr(self, key, value)

config = Config(
    num_classes = 7,
    width = 224,
    height = 224,
    num_epochs = 30,
    batch_size = 32,
    feat_dim = 7,
    lr_cent = 0.5,
    closs_weight = 0.5,
    ckp = True,
    fer = False
)

In [None]:
# Model Architecture

class ConvBlock(nn.Module):
    def __init__(self, C_in, C_out, kernel_size, stride):
        super(ConvBlock, self).__init__()
        self.block = nn.Sequential(
                          nn.Conv2d(in_channels=C_in, out_channels=C_out, kernel_size=kernel_size, stride=stride, padding=(1,1)),
                          nn.BatchNorm2d(C_out),
                          nn.ReLU(),
                          nn.Conv2d(in_channels=C_out, out_channels=C_out, kernel_size=kernel_size, stride=stride, padding=(1,1)),
                          nn.BatchNorm2d(C_out),
                          nn.ReLU(),
                          nn.MaxPool2d(2))
        
    def forward(self, x):
        return self.block(x)
    
class LinearBlock(nn.Module):
    def __init__(self, insize, outsize):
        super(LinearBlock, self).__init__()
        self.linblock = nn.Sequential(
                          nn.Linear(insize, outsize),
                          nn.BatchNorm1d(outsize),
                          nn.ReLU())
        
    def forward(self, x):
        return self.linblock(x)
    
class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)
    
class BaselineModel(nn.Module):
    def __init__(self, num_blocks):
        super(BaselineModel, self).__init__()
        layers = []
        num_classes = 7
        channels = [1, 256, 128, 64] # this needs to be modified according to num_blocks
        linear_size = [64*6*6, 512, 256, 128]
        
        for i in range(num_blocks):
            layers.append(ConvBlock(C_in=channels[i], C_out=channels[i+1], kernel_size=3, stride=1))
        
        layers.append(Flatten())
        
        for i in range(num_blocks):
            layers.append(LinearBlock(linear_size[i], linear_size[i+1]))
        
        layers.append(nn.Linear(linear_size[i+1], config.num_classes))
        
        self.net = nn.Sequential(*layers)
        
    def forward(self, x):
        return self.net(x)


    
model = BaselineModel(num_blocks=3)
model = torch.load('models/fer_adam_cent_20.pth', map_location=torch.device('cpu'))
model.eval()


  "type " + container_type.__name__ + ". It won't be checked "
  "type " + container_type.__name__ + ". It won't be checked "
  if original_source != current_source:
  "type " + container_type.__name__ + ". It won't be checked "
  "type " + container_type.__name__ + ". It won't be checked "


BaselineModel(
  (net): Sequential(
    (0): ConvBlock(
      (block): Sequential(
        (0): Conv2d(1, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): ReLU()
        (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      )
    )
    (1): ConvBlock(
      (block): Sequential(
        (0): Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
        (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5

In [None]:
# Load the cascade
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')


# Read the input image
frame = cv2.imread('imgs/test6.png')


# Convert into grayscale
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
print("frame shape: ", frame.shape)

# Detect faces
faces = face_cascade.detectMultiScale(gray, 1.1, 4)


# Draw the rectangle around each face
for (x, y, w, h) in faces:
    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 1)
    roi_gray = gray[y:y + h, x:x + w]
    
    
    cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (224, 224)), 0), 0)
    cv2.normalize(cropped_img, cropped_img, alpha=0, beta=1, norm_type=cv2.NORM_L2, dtype=cv2.CV_32F)
    
#     print("img shape: ", cropped_img.shape)
#     print(type(cropped_img))
    
#     cropped_img = torch.from_numpy(cropped_img)
#     cropped_img = cropped_img.float()

    img = Image.fromarray(frame)
    img_pil = torchvision.transforms.Resize((48,48))(img)
    img = torchvision.transforms.ToTensor()(img_pil)
    img = img/255
    
    if img.shape[0] == 3:
        img = torchvision.transforms.Grayscale(num_output_channels=1)(img_pil)
        img = torchvision.transforms.ToTensor()(img)
        
        
    img = img.unsqueeze(dim=0)
    prediction = model(img)
        
    print(prediction)
    print(torch.argmax(prediction).item())
    cv2.putText(frame, emotion_dict[int(torch.argmax(prediction).item())], (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 1, cv2.LINE_AA)


    
# Display
cv2.imshow('frame', frame)
cv2.waitKey(0)
cv2.destroyWindow('frame')

('frame shape: ', (490, 640, 3))
tensor([[-0.0992, -2.0119, -0.0718,  0.4170,  1.8168, -0.7470, -0.1515]],
       grad_fn=<AddmmBackward>)
4


In [None]:
# # # To capture video from webcam. 
# cap = cv2.VideoCapture(0)
# # # To use a video file as input 
# # cap = cv2.VideoCapture('filename.mp4')

# # # Load the cascade
# face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')


# while True:
#     # Read the frame
#     ret, frame = cap.read()
    
#     # Convert to grayscale
#     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
#     # Detect the faces
#     faces = face_cascade.detectMultiScale(gray, 1.3, 5)
    
#     # Draw the rectangle around each face
#     for (x, y, w, h) in faces:
#         cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 1)
#         roi_gray = gray[y:y + h, x:x + w]
# #         cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (48, 48)), -1), 0)
#         cropped_img = np.expand_dims(np.expand_dims(cv2.resize(roi_gray, (224, 224)), 0), 0)

#         cv2.normalize(cropped_img, cropped_img, alpha=0, beta=1, norm_type=cv2.NORM_L2, dtype=cv2.CV_32F)

#         cropped_img = torch.from_numpy(cropped_img)
#         cropped_img = cropped_img.float()
#         prediction = model(cropped_img)
        
#         print(prediction)
#         cv2.putText(frame, emotion_dict[int(torch.argmax(prediction).item())], (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 1, cv2.LINE_AA)

#     # Display
#     cv2.imshow('frame', frame)
    
#     # Stop if q key is pressed
#     if cv2.waitKey(1) & 0xFF == ord('q'):
#         break
        
# # Release the VideoCapture object
# cap.release()
# cv2.destroyAllWindows()