In [1]:
# import necessary resources
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import cv2

import torch
import torch.nn as nn

from models import Net

net = Net()

# net = nn.DataParallel(net)

## TODO: load the best saved model parameters (by your path name)
## You'll need to un-comment the line below and add the correct name for *your* saved model
net.load_state_dict(torch.load('saved_models/keypoints_model_1.pt'),strict=False)

## print out your net and prepare it for testing (uncomment the line below)
net.eval()

Net(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
  (conv4): Conv2d(128, 256, kernel_size=(5, 5), stride=(1, 1))
  (conv5): Conv2d(256, 512, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=4608, out_features=1360, bias=True)
  (fc2): Linear(in_features=1360, out_features=680, bias=True)
  (drop): Dropout(p=0.8, inplace=False)
  (fc3): Linear(in_features=680, out_features=136, bias=True)
)

In [2]:
# sunglasses = cv2.imread('images/sunglasses.png', cv2.IMREAD_UNCHANGED)

In [3]:
import cv2
import time

face_cascade = cv2.CascadeClassifier('detector_architectures/haarcascade_frontalface_default.xml')

def detect_facial_keypoints(image, input_shape):

    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    def extract_patch(face):
        x, y, w, h = face
        patch = gray[y:y+h, x:x+w]
        patch = cv2.resize(patch, input_shape)
        # patch = np.expand_dims(patch, axis=2)
        patch = patch / 255

        return patch

    def denormalize_keypoints(arg):
        face, keypoints = arg
        x, y, w, h = face

        keypoints_x = keypoints[0::2]
        keypoints_y = keypoints[1::2]

        keypoints_x = (keypoints_x + 1) * (input_shape[0]//2)
        keypoints_y = (keypoints_y + 1) * (input_shape[1]//2)

        keypoints_x = (keypoints_x * int(w*0.85) / input_shape[0]) + x
        keypoints_y = (keypoints_y * int(h*0.85) / input_shape[1]) + y

        keypoints = list(zip(keypoints_x, keypoints_y))

        return (face, keypoints)

    faces = face_cascade.detectMultiScale(gray, 1.25, 6)

    # print(faces.shape)

    inputs = np.asarray(list(map(extract_patch, faces)))

    inputs = torch.from_numpy(inputs)

    inputs = inputs.type(torch.FloatTensor)

    inputs.unsqueeze_(1)

    if inputs.shape == torch.Size([1, 1, 224, 224]):

        predictions = net(inputs)

        # predictions = predictions.view(predictions.size()[0], 68, -1)

        return list(map(denormalize_keypoints, zip(faces, predictions)))

    else:

        return list(map(denormalize_keypoints, zip(faces,  torch.zeros([2, 136]))))

In [4]:
def applyOverlay(background, overlay_rgba):
    overlay      = overlay_rgba[:,:,:3]
    overlay_mask = overlay_rgba[:,:,3:]

    background_mask = 255 - overlay_mask

    overlay_mask = cv2.cvtColor(overlay_mask, cv2.COLOR_GRAY2BGR)
    background_mask = cv2.cvtColor(background_mask, cv2.COLOR_GRAY2BGR)

    background = (background * (1 / 255.0)) * (background_mask * (1 / 255.0))
    overlay    = (overlay    * (1 / 255.0)) * (overlay_mask    * (1 / 255.0))

    return np.uint8(cv2.addWeighted(background, 255.0, overlay, 255.0, 0.0))

def applySunglasses(image, facial_keypoints, sunglasses):

    image = np.copy(image)
    sunglasses = np.copy(sunglasses)

    for (face, keypoints) in facial_keypoints:

        #Left/right from the subject's perspective
        left_eyebrow  = { 'inner': keypoints[27], 'outer': keypoints[34] }
        right_eyebrow = { 'inner': keypoints[17], 'outer': keypoints[26] }

        # original_height = sunglasses.shape[0]
        # original_width  = sunglasses.shape[1]

        #When resizing according to eyebrow point,
        #The sunglasses are too small and don't look natural.
        #This is mitigated by scaling the sunglasses image by 25%
        scale_ratio = 0.3

        height  = left_eyebrow['inner'][1] - left_eyebrow['outer'][1]
        # height = original_height * (width / original_width)

        width  = right_eyebrow['inner'][0] - right_eyebrow['outer'][0]

        width = np.abs(width.detach().numpy())
        height = np.abs(height.detach().numpy())

        width = int(width * (1 + scale_ratio))
        offset_x = -width * scale_ratio / 2

        height = int(height * (1 + scale_ratio))
        offset_y = -height * scale_ratio / 2

        sunglasses = cv2.resize(sunglasses, (width, height))

        x = int(keypoints[17][0] + offset_x)
        y = int(keypoints[17][1] + offset_y)

        eyes_patch = image[y:y+np.abs(height), x:x+np.abs(width)]
        eyes_patch = applyOverlay(eyes_patch, sunglasses)
        image[y:y+height, x:x+width] = eyes_patch[:,:,:3]

    return image

In [5]:
def laptop_camera_go():
    # Create instance of video capturer
    cv2.namedWindow("face detection activated")
    vc = cv2.VideoCapture(0)

    # Try to get the first frame
    if vc.isOpened():
        rval, frame = vc.read()
    else:
        rval = False

    # keep video stream open
    while rval:

        sunglasses = cv2.imread('images/sunglasses.png', cv2.IMREAD_UNCHANGED)

        facial_keypoints = detect_facial_keypoints(frame, input_shape=(224, 224))
        frame = applySunglasses(frame, facial_keypoints, sunglasses)

        # plot image from camera with detections marked
        cv2.imshow("face detection activated", frame)

        # exit functionality - press any key to exit laptop video
        key = cv2.waitKey(20)
        if key > 0: # exit by pressing any key
            # destroy windows
            cv2.destroyAllWindows()

            # hack from stack overflow for making sure window closes on osx --> https://stackoverflow.com/questions/6116564/destroywindow-does-not-close-window-on-mac-using-python-and-opencv
            for i in range (1,5):
                cv2.waitKey(1)
            return

        # read next frame
        time.sleep(0.05)             # control framerate for computation - default 20 frames per sec
        rval, frame = vc.read()

In [6]:
laptop_camera_go( )