# Face and emotion detection live
This notebook will go access and run code on images and video taken using webcam.  

In [None]:
# import dependencies
from IPython.display import display, Javascript, Image
import matplotlib.pyplot as plt
from google.colab.output import eval_js
from base64 import b64decode, b64encode
import cv2
import numpy as np
import PIL
import io
import html
import time

## Helper Functions
Below are a few helper function to make converting between different image data types and formats. 

In [None]:
# function to convert the JavaScript object into an OpenCV image
def js_to_image(js_reply):
  """
  Params:
          js_reply: JavaScript object containing image from webcam
  Returns:
          img: OpenCV BGR image
  """
  # decode base64 image
  image_bytes = b64decode(js_reply.split(',')[1])
  # convert bytes to numpy array
  jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
  # decode numpy array into OpenCV BGR image
  img = cv2.imdecode(jpg_as_np, flags=1)

  return img

# function to convert OpenCV Rectangle bounding box image into base64 byte string to be overlayed on video stream
def bbox_to_bytes(bbox_array):
  """
  Params:
          bbox_array: Numpy array (pixels) containing rectangle to overlay on video stream.
  Returns:
        bytes: Base64 image byte string
  """
  # convert array into PIL image
  bbox_PIL = PIL.Image.fromarray(bbox_array, 'RGBA')
  iobuf = io.BytesIO()
  # format bbox into png for return
  bbox_PIL.save(iobuf, format='png')
  # format return string
  bbox_bytes = 'data:image/png;base64,{}'.format((str(b64encode(iobuf.getvalue()), 'utf-8')))

  return bbox_bytes

## Haar Cascade Classifier
For this tutorial we will run a simple object detection algorithm called Haar Cascade on our images and video fetched from our webcam. OpenCV has a pre-trained Haar Cascade face detection model. 

In [None]:
# initialize the Haar Cascade face detection model
face_cascade = cv2.CascadeClassifier(cv2.samples.findFile(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'))

## Webcam Images
Running code on images taken from webcam is fairly straight-forward. We will utilize code within Google Colab's **Code Snippets** that has a variety of useful code functions to perform various tasks. (this code exists within colab)
We will be using the code snippet for **Camera Capture** to utilize your computer's webcam.

In [None]:
def take_photo(filename='photo.jpg', quality=0.8):
  js = Javascript('''
    async function takePhoto(quality) {
      const div = document.createElement('div');
      const capture = document.createElement('button');
      capture.textContent = 'Capture';
      div.appendChild(capture);

      const video = document.createElement('video');
      video.style.display = 'block';
      const stream = await navigator.mediaDevices.getUserMedia({video: true});

      document.body.appendChild(div);
      div.appendChild(video);
      video.srcObject = stream;
      await video.play();

      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

      // Wait for Capture to be clicked.
      await new Promise((resolve) => capture.onclick = resolve);

      const canvas = document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      canvas.getContext('2d').drawImage(video, 0, 0);
      stream.getVideoTracks()[0].stop();
      div.remove();
      return canvas.toDataURL('image/jpeg', quality);
    }
    ''')
  display(js)

  # get photo data
  data = eval_js('takePhoto({})'.format(quality))
  # get OpenCV format image
  img = js_to_image(data) 
  # grayscale img
  gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
  print(gray.shape)
  # get face bounding box coordinates using Haar Cascade
  faces = face_cascade.detectMultiScale(gray)
  # draw face bounding box on image
  for (x,y,w,h) in faces:
      img = cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)
  # save image
  cv2.imwrite(filename, img)

  return filename

In [None]:
try:
  filename = take_photo('photo.jpg')
  print('Saved to {}'.format(filename))
  
  # Show the image which was just taken.
  display(Image(filename))
except Exception as err:
  # Errors will be thrown if the user does not have a webcam or if they do not
  # grant the page permission to access it.
  print(str(err))

'NoneType' object has no attribute 'split'


## Webcam Videos
Running code on webcam video is a little more complex than images. We need to start a video stream using our webcam as input.

In [None]:
# JavaScript to properly create our live video stream using our webcam as input
def video_stream():
  js = Javascript('''
    var video;
    var div = null;
    var stream;
    var captureCanvas;
    var imgElement;
    var labelElement;
    
    var pendingResolve = null;
    var shutdown = false;
    
    function removeDom() {
       stream.getVideoTracks()[0].stop();
       video.remove();
       div.remove();
       video = null;
       div = null;
       stream = null;
       imgElement = null;
       captureCanvas = null;
       labelElement = null;
    }
    
    function onAnimationFrame() {
      if (!shutdown) {
        window.requestAnimationFrame(onAnimationFrame);
      }
      if (pendingResolve) {
        var result = "";
        if (!shutdown) {
          captureCanvas.getContext('2d').drawImage(video, 0, 0, 640, 480);
          result = captureCanvas.toDataURL('image/jpeg', 0.8)
        }
        var lp = pendingResolve;
        pendingResolve = null;
        lp(result);
      }
    }
    
    async function createDom() {
      if (div !== null) {
        return stream;
      }

      div = document.createElement('div');
      div.style.border = '2px solid black';
      div.style.padding = '3px';
      div.style.width = '100%';
      div.style.maxWidth = '600px';
      document.body.appendChild(div);
      
      const modelOut = document.createElement('div');
      modelOut.innerHTML = "<span>Status:</span>";
      labelElement = document.createElement('span');
      labelElement.innerText = 'No data';
      labelElement.style.fontWeight = 'bold';
      modelOut.appendChild(labelElement);
      div.appendChild(modelOut);
           
      video = document.createElement('video');
      video.style.display = 'block';
      video.width = div.clientWidth - 6;
      video.setAttribute('playsinline', '');
      video.onclick = () => { shutdown = true; };
      stream = await navigator.mediaDevices.getUserMedia(
          {video: { facingMode: "environment"}});
      div.appendChild(video);

      imgElement = document.createElement('img');
      imgElement.style.position = 'absolute';
      imgElement.style.zIndex = 1;
      imgElement.onclick = () => { shutdown = true; };
      div.appendChild(imgElement);
      
      const instruction = document.createElement('div');
      instruction.innerHTML = 
          '<span style="color: red; font-weight: bold;">' +
          'When finished, click here or on the video to stop</span>';
      div.appendChild(instruction);
      instruction.onclick = () => { shutdown = true; };
      
      video.srcObject = stream;
      await video.play();

      captureCanvas = document.createElement('canvas');
      captureCanvas.width = 640; //video.videoWidth;
      captureCanvas.height = 480; //video.videoHeight;
      window.requestAnimationFrame(onAnimationFrame);
      
      return stream;
    }
    async function stream_frame(label, imgData) {
      if (shutdown) {
        removeDom();
        shutdown = false;
        return '';
      }

      var preCreate = Date.now();
      stream = await createDom();
      
      var preShow = Date.now();
      if (label != "") {
        labelElement.innerHTML = label;
      }
            
      if (imgData != "") {
        var videoRect = video.getClientRects()[0];
        imgElement.style.top = videoRect.top + "px";
        imgElement.style.left = videoRect.left + "px";
        imgElement.style.width = videoRect.width + "px";
        imgElement.style.height = videoRect.height + "px";
        imgElement.src = imgData;
      }
      
      var preCapture = Date.now();
      var result = await new Promise(function(resolve, reject) {
        pendingResolve = resolve;
      });
      shutdown = false;
      
      return {'create': preShow - preCreate, 
              'show': preCapture - preShow, 
              'capture': Date.now() - preCapture,
              'img': result};
    }
    ''')

  display(js)
  
def video_frame(label, bbox):
  data = eval_js('stream_frame("{}", "{}")'.format(label, bbox))
  return data

the actual detection is here, so we can add here our skin detection preprocessing before face detecting, and then take the output into emotion processing  

In [None]:
import tensorflow as tf
from tensorflow import keras
model = tf.keras.Model
#model.load('/content/anlz_mdl.h5')
model = keras.models.load_model('/content/anlz_mdl2.h5')

Face detection neural network

In [None]:
face_model = tf.keras.Model
#model.load('/content/anlz_mdl.h5')
face_model = keras.models.load_model('/content/dtct_mdl2.h5')

In [None]:
#for time measurement 
import statistics
import time
times=[]
start = time.time()
end = time.time()
print(end - start)

4.410743713378906e-05


In [23]:
from numpy.core.fromnumeric import size
images=[]
# start streaming video from webcam
video_stream()
# label for video
label_html = 'Capturing...'
# initialze bounding box to empty
bbox = ''
count = 0
while True: #loop for detecting faces 
    js_reply = video_frame(label_html, bbox)
    if not js_reply:
        break
    start = time.time()
    # convert JS response to OpenCV Image
    img = js_to_image(js_reply["img"])

    # create transparent overlay for bounding box
    bbox_array = np.zeros([480,640,4], dtype=np.uint8)

    # skin detection
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    f_img = img.copy()

    img[:, :, 0] = cv2.multiply(img[:, :, 0], 0.5)
    img[:, :, 1] = cv2.multiply(img[:, :, 1], 0.5)
    img[:, :, 2] = cv2.multiply(img[:, :, 2], 0.5)

    min_YCrCb1 = np.array([0, 133, 77], np.uint8)
    max_YCrCb1 = np.array([235, 173, 127], np.uint8)

    min_YCrCb2 = np.array([20, 129, 117], np.uint8)
    max_YCrCb2 = np.array([40, 143, 127], np.uint8)

    imageYCrCb = cv2.cvtColor(img, cv2.COLOR_RGB2YCR_CB)
    skinRegionYCrCb1 = cv2.inRange(imageYCrCb, min_YCrCb1, max_YCrCb1)
    skinRegionYCrCb2 = cv2.inRange(imageYCrCb, min_YCrCb2, max_YCrCb2)

    skinRegionYCrCb1 = cv2.medianBlur(skinRegionYCrCb1, 5)
    skinRegionYCrCb2 = cv2.medianBlur(skinRegionYCrCb2, 5)

    skinRegionYCrCb = skinRegionYCrCb1 + skinRegionYCrCb1
    skinYCrCb = cv2.bitwise_and(img, img, mask = skinRegionYCrCb) # second img for how we want to save the pixel values, like the original
    skinYCrCb = cv2.blur(skinYCrCb, (5, 5))

    kernel = np.ones((5, 5), np.uint8)

    erosion_1 = cv2.erode(skinYCrCb, kernel, iterations = 4)
    erosion = cv2.medianBlur(erosion_1, 5)
    dilation_1 = cv2.dilate(erosion, kernel, iterations = 11)
    erosion_2 = cv2.erode(dilation_1, kernel, iterations = 1)
    # to make connected components
    ers_2 = cv2.cvtColor(erosion_2, cv2.COLOR_RGB2GRAY)

    connectivity = 8

    n_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(ers_2, connectivity, cv2.CV_32S)

    check_set = []
    #check_Set is RGB
    for i in range(n_labels - 1):
        x = int(stats[i + 1, cv2.CC_STAT_TOP])
        y = int(stats[i + 1, cv2.CC_STAT_LEFT])
        h = int(stats[i + 1, cv2.CC_STAT_HEIGHT])
        w = int(stats[i + 1, cv2.CC_STAT_WIDTH])

        check_set.append(f_img[x : x + h, y : y + w])

    check_set = np.array(check_set)
    
    # get face region coordinates
    for i in range(len(check_set)):
      if face_model.predict(np.array([cv2.resize(check_set[i],(48,48),interpolation=cv2.INTER_CUBIC) / 255]))<0.95 and check_set[i].shape[0] / img.shape[0] >= 0.05:
        x_skin=(stats[i + 1, cv2.CC_STAT_TOP])
        y_skin=(stats[i + 1, cv2.CC_STAT_LEFT])
        faces = face_cascade.detectMultiScale(check_set[i])
        # get face bounding box for overlay
        for (x,y,w,h) in faces:
          bbox_array = cv2.rectangle(bbox_array,(y+y_skin,x+x_skin),(y+y_skin+h,x+x_skin+w),(1,50,255),2)
          image=np.array(img[y+y_skin:y+y_skin+h,x+x_skin:x+x_skin+w])
          #images with 3 grayscale channels
          if size(image)>0:
            
            image=cv2.cvtColor(image,cv2.COLOR_RGB2GRAY)
            image=cv2.cvtColor(image,cv2.COLOR_GRAY2RGB)

            #input of the neural network preprocessed and converted
            res = np.argmax(model.predict(np.array([cv2.resize(image,(48,48),interpolation=cv2.INTER_CUBIC) / 255])))
            dic={0:'angry',1:'contempt',2:'disgust',3:'fear',4:'happy',5:'sadness',6:'surprised'}
            #putting the text on the box
            cv2.putText(bbox_array,dic[res],(y+y_skin-10,x+x_skin-10),cv2.FONT_HERSHEY_SIMPLEX,1,(1,50,255),2)
            #images.append(image)

          bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
        # convert overlay of bbox into bytes
          bbox_bytes = bbox_to_bytes(bbox_array)
          # update bbox so next frame gets new overlay
          bbox = bbox_bytes
        
      if face_model.predict(np.array([cv2.resize(check_set[i],(48,48),interpolation=cv2.INTER_CUBIC) / 255]))>=0.95 and check_set[i].shape[0] / img.shape[0] >= 0.05:
        x = stats[i + 1, cv2.CC_STAT_TOP]
        y = stats[i + 1, cv2.CC_STAT_LEFT]
        h =stats[i + 1, cv2.CC_STAT_HEIGHT]
        w =stats[i + 1, cv2.CC_STAT_WIDTH]
        image=cv2.cvtColor(check_set[i],cv2.COLOR_RGB2GRAY)
        image=cv2.cvtColor(image,cv2.COLOR_GRAY2RGB)

        #input of the neural network preprocessed and converted
        res = np.argmax(model.predict(np.array([cv2.resize(image,(48,48),interpolation=cv2.INTER_CUBIC) / 255])))
        dic={0:'angry',1:'contempt',2:'disgust',3:'fear',4:'happy',5:'sadness',6:'surprised'}
        #putting the text on the box
        cv2.putText(bbox_array,dic[res],(y-10,x-10),cv2.FONT_HERSHEY_SIMPLEX,1,(1,50,255),2)
        
        cv2.rectangle(bbox_array,(y,x),(y+w,x+h),(1,50,255),2)

        bbox_array[:,:,3] = (bbox_array.max(axis = 2) > 0 ).astype(int) * 255
        # convert overlay of bbox into bytes
        bbox_bytes = bbox_to_bytes(bbox_array)
        # update bbox so next frame gets new overlay
        bbox = bbox_bytes
      #calculate time of execution
      end = time.time()
      times.append(end - start)


MessageError: ignored

In [None]:
print(statistics.mean(times))

0.5089147164046399


Thank you