In [2]:
from IPython.display import display, Javascript
from google.colab.output import eval_js
from base64 import b64decode, b64encode

import numpy as np
from PIL import Image

import io
import cv2 # OpenCV library

## Javascript code to capture the webcam feed

In [3]:
from google.colab.output import eval_js

def VideoCapture():
  js = Javascript('''
    async function create(){
      div = document.createElement('div');
      document.body.appendChild(div);

      video = document.createElement('video');
      video.setAttribute('playsinline', '');

      div.appendChild(video);
      stream = await navigator.mediaDevices.getUserMedia({video: {facingMode: "environment"}});
      video.srcObject = stream;

      await video.play();

      canvas =  document.createElement('canvas');
      canvas.width = video.videoWidth;
      canvas.height = video.videoHeight;
      canvas.getContext('2d').drawImage(video, 0, 0);

      div_out = document.createElement('div');
      document.body.appendChild(div_out);
      img = document.createElement('img');
      div_out.appendChild(img);
    }

    async function capture(){
        return await new Promise(function(resolve, reject){
            pendingResolve = resolve;
            canvas.getContext('2d').drawImage(video, 0, 0);
            result = canvas.toDataURL('image/jpeg', 0.20);

            pendingResolve(result);
        })
    }

    function showimg(imgb64){
        img.src = "data:image/jpg;base64," + imgb64;
    }

  ''')
  display(js)

## Conversions between data formats

In [4]:
def b64_to_bytes(byte):
  jpeg = b64decode(byte.split(',')[1])
  im = Image.open(io.BytesIO(jpeg))
  return np.array(im)

def bytes_to_b64(image):
  image = Image.fromarray(image)
  buffer = io.BytesIO()
  image.save(buffer, 'jpeg')
  buffer.seek(0)
  x = b64encode(buffer.read()).decode('utf-8')
  return x

## Loading Cascades and mounting drive

In [5]:
# load the file in your colab env
face_cascades = cv2.CascadeClassifier("haarcascade_frontalface_default.xml")

Note: mounting the drive allows for easier writing of the images below - that being said, the PATH variable needs to be updated to fit your own drive structure if you want to save images with my code.

In [20]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Utilitary functions

In [6]:
def scale_contour(pts, scale_x, scale_y):
  M = cv2.moments(pts)

  if M['m00'] == 0:
    return pts
    
  cx = int(M['m10']/M['m00'])
  cy = int(M['m01']/M['m00'])

  cnt_norm = pts - [cx, cy]
  cnt_scaled = cnt_norm * np.array([scale_x, scale_y]) 
  cnt_scaled = cnt_scaled + [cx, cy]
  cnt_scaled = cnt_scaled.astype(np.int32)

  return cnt_scaled

In [7]:
def crop_hand(pts, im_width, im_height):
  x_tl, y_tl = max(0, min(pts[:, 0])), max(0, min(pts[:, 1]))
  x_br, y_br = min(im_width, max(pts[:, 0])), min(im_height, max(pts[:, 1]))

  return (x_tl, y_tl), (x_br, y_br)

In [8]:
def detect_faces(img, cascades):
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  faces = cascades.detectMultiScale(gray, 1.3, 4)
  return faces

# Selection of the initial face for the histogram and later CamShift usage

In [None]:
VideoCapture()
eval_js('create()')

while True:
  response = input("Type anything when ready!:")
  byte = eval_js('capture()')
  im = b64_to_bytes(byte)
  im_copy = im.copy()
  faces = detect_faces(im, face_cascades)

  if len(faces) == 1:
    face = faces[0]
    cv2.rectangle(im_copy,(face[0],face[1]),(face[0] + face[2], face[1] + face[3]),(0,255,0),2)
  else:
    continue

  eval_js('showimg("{}")'.format(bytes_to_b64(im_copy)))
  response = input("Use this bounding box? [y or n]:")
  if response == 'y':
    frame = im[face[1]:face[1]+face[3], face[0]:face[0]+face[2]]
    eval_js('showimg("{}")'.format(bytes_to_b64(frame)))
    tracking_window_face = face
    break

### Computing and cleaning of the face histogram

In [14]:
# Transform the frame into HSV
frame_hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# Create an inRange mask for pixels. Limit the saturation in [64., 200.] and brightness in [32., 200.]
mask = cv2.inRange(frame_hsv, np.array([0., 64., 32.]), np.array([180., 200., 200.]))
# Compute the histogram of the frame (use only the HUE channel). See `https://bit.ly/3pdVUEd`
# Take into account only pixels which are not too bright and not too dark (use the previous mask)
# Use 16 bins and speicfy the range of the hue ([0, 180])
frame_hist = cv2.calcHist([frame_hsv], [0], mask, [16], [0, 180])
# Normalize the histogram between 0 (lowest intensity) and 255 (highest intensity) (use MinMax normalization `cv.NORM_MINMAX`) using the method `https://bit.ly/3jMGhCj`
frame_hist = cv2.normalize(frame_hist, frame_hist, 0, 255, cv2.NORM_MINMAX)
# Reashape the histogram into a 1-D array (use `.reshape(-1)`)
frame_hist = frame_hist.reshape(-1)

# Saving Hand Images

Note: Obviously, the path below is specific to my gdrive.

Note 2: the imwrite will not work if the directory in which we try to save doesn't exist.

In [38]:
letter = 'C'
PATH = '/content/drive/MyDrive/Colab Notebooks/Computer Vision/dataset/'+ letter + '/'

In [None]:
VideoCapture()
eval_js('create()')

# These mean: Stop the mean-shift algorithm iff we effectuated 10 iterations or the computed mean does not change by more than 1pt ~ 1.3px in both directions
term_crit = ( cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 1 )

byte = eval_js('capture()')
im = b64_to_bytes(byte)
im_width, im_height = im.shape[1], im.shape[0]

tracking_window_hand = (0, 0, im_width, im_height)
cpt = -1

while True and cpt<105:
  byte = eval_js('capture()') # Take a capture
  im = b64_to_bytes(byte)

  # Convert the capture in HSV
  hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
  # Compute an inRange mask as before with the frame
  mask = cv2.inRange(hsv, np.array([0., 64., 32.]), np.array([180., 200., 200.]))
  # Back project the frame histogram into the hsv image. Use only channel 0 (Hue), range of [0,180] and scale of 1
  prob = cv2.calcBackProject([hsv], [0], frame_hist, [0, 180], scale=1)
  # Bitwise and the back projection and the previously computed mask in order to remove very bright or very dark pixels (you can use `&` of python or cv2.bitwise_and in opencv)
  prob = prob & mask

  bbox, tracking_window_face = cv2.CamShift(prob, tracking_window_face, term_crit)

  pts = cv2.boxPoints(bbox).astype(np.int)
  scaled_pts = scale_contour(pts, 1.5, 1.3)
  cv2.fillPoly(prob, [scaled_pts], 0)
  #Do not draw the polylines to avoid them being saved in the dataset
  #cv2.polylines(im, [pts], True, (255, 255 , 255), 2)
  #cv2.polylines(im, [scaled_pts], True, (255, 255 , 0), 2)

  bbox, tracking_window_hand = cv2.CamShift(prob, tracking_window_hand, term_crit)

  pts = cv2.boxPoints(bbox).astype(np.int)
  pts = scale_contour(pts, 1.8, 1.5)
  cropped_hand_bbox = crop_hand(pts, im_width, im_height)
  cv2.rectangle(im, cropped_hand_bbox[0], cropped_hand_bbox[1], (255, 0, 0), 2)
  
  cropped_hand = cv2.cvtColor(im[cropped_hand_bbox[0][1]:cropped_hand_bbox[1][1], cropped_hand_bbox[0][0]:cropped_hand_bbox[1][0]], cv2.COLOR_BGR2GRAY)
  cropped_hand = cv2.resize(cropped_hand, (224, 224))
  cpt += 1
  if cpt>5: #the first few frames we don't have the hand properly detected yet
    cv2.imwrite(PATH + letter + str(cpt) + '.jpg', cropped_hand)
  eval_js('showimg("{}")'.format(bytes_to_b64(im)))