# Face Detection on Videos

In [None]:
import cv2
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt
import numpy as np
import time

## Reading the video file

In [None]:
video_file = '/content/video01.mp4'
cap = cv2.VideoCapture(video_file)

In [None]:
connected, video = cap.read()
print(connected)

True


In [None]:
video.shape

(1080, 1920, 3)

In [None]:
video_width = video.shape[1]
video_height = video.shape[0]
print(video_width, video_height)

1920 1080


## Video resize (optional)  

In [None]:
max_width = 900

In [None]:
def resize_video(width, height, max_width = 600):
  if width > max_width:
    proportion = width / height
    video_width = max_width
    video_height = int(video_width / proportion)
  else:
    video_width = width
    video_height = height
  return video_width, video_height

In [None]:
if max_width is not None:
  video_width, video_height = resize_video(video_width, video_height, max_width)
print(video_width, video_height)

900 506


## Video Configurations

In [None]:
result_file = 'result.avi'

> Codec of the file

*FourCC is a 4-character code of codec used to compress the frames (Example of other codecs: MP4V, MJPG, DIVX, X264...). More options can be seem here: fourcc.org*


In [None]:
fourcc = cv2.VideoWriter_fourcc(*'XVID')

> FPS (frames per second)

If you want to make the video a little slower you can decrease the number of frames per second (to 20 for example)

In [None]:
fps = 24

In [None]:
video_output = cv2.VideoWriter(result_file, fourcc, fps, (video_width, video_height))

## Face detection with Haarcascade - Configuration

In [None]:
face_detector = cv2.CascadeClassifier('/content/haarcascade_frontalface_default.xml')

In [None]:
def detect_face_haarcascade(face_detector, image):
  image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  detections = face_detector.detectMultiScale(image_gray, scaleFactor = 1.15, minNeighbors=5, minSize=(50,50))
  for (x, y, w, h) in detections:
    cv2.rectangle(image, (x, y), (x + w, y + h), (0,255,255), 3)
  return image

### Processing the video

In [None]:
frames_show = 20
current_frame = 1
max_frames = -1

In [None]:
while cv2.waitKey(1) < 0:
  connected, frame = cap.read()
  if not connected:
    break
  if max_frames > -1 and current_frame > max_frames:
    break
  (H, W) = frame.shape[:2]
  t = time.time()
  if max_width is not None:
    frame = cv2.resize(frame, (video_width, video_height))
  processed_frame = detect_face_haarcascade(face_detector, frame)
  cv2.putText(frame, " frame processed in {:.2f} seconds".format(time.time() - t), (20, video_height - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (250,250,250), 0, lineType = cv2.LINE_AA)
  video_output.write(processed_frame)
  if current_frame <= frames_show:
    cv2_imshow(cv2.resize(processed_frame, (0,0), fx=0.75, fy=0.75))
  current_frame += 1

print('Finished')
video_output.release()
cv2.destroyAllWindows()

Output hidden; open in https://colab.research.google.com to view.

In [None]:
!ffmpeg -y -loglevel panic -i result.avi result.mp4

In [None]:
def show_video(name_file, width=700, height=480):
  import io
  import base64
  from IPython.display import HTML
  video_encoded = base64.b64encode(io.open(name_file, 'rb').read())
  return HTML(data='''<video width="{0}" height="{1}" alt="Video" controls>
                        <source src="data:video/mp4;base64,{2}" type="video/mp4" />
                      </video>'''.format(width, height, video_encoded.decode('ascii')))

In [None]:
show_video('result.mp4')

Output hidden; open in https://colab.research.google.com to view.

## Face detection with Deep Learning - Configuration

In [None]:
network = cv2.dnn.readNetFromCaffe("deploy.prototxt.txt", "res10_300x300_ssd_iter_140000.caffemodel")

In [None]:
def detect_face(network, image, show_conf=True, size=300, conf_min = 0.7):
  (h, w) = image.shape[:2]
  blob = cv2.dnn.blobFromImage(cv2.resize(image, (size, size)), 1.0, (size, size), (104.0, 117.0, 123.0))
  network.setInput(blob)
  detections = network.forward()

  face = None
  for i in range(0, detections.shape[2]):
    confidence = detections[0, 0, i, 2]
    if confidence > conf_min:
      bbox = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
      (start_x, start_y, end_x, end_y) = bbox.astype("int")

      cv2.rectangle(image, (start_x, start_y), (end_x, end_y), (0, 255, 0), 2)
      if show_conf:
        text = "{:.2f}%".format(confidence * 100)
        cv2.putText(image, text, (start_x, start_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

  return image

### Processing the video

In [None]:
while (cv2.waitKey(1) < 0):
  connected, frame = cap.read()

  if not connected:
    break
  if max_frames > -1 and current_frame > max_frames:
      break

  try:
    (H, W) = frame.shape[:2]
  except:
    print("Error => ")
    continue

  t = time.time()

  if max_width is not None:
    frame = cv2.resize(frame, (video_width, video_height))

  processed_frame = detect_face(network, frame, True, 500)

  cv2.putText(frame, " frame processed in {:.2f} seconds".format(time.time() - t), (20, video_height-20), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (250, 250, 250), 0, lineType=cv2.LINE_AA)

  video_output.write(processed_frame)

  if current_frame <= frames_show:
    cv2_imshow(cv2.resize(processed_frame, (0,0), fx=0.75, fy=0.75))

  current_frame = current_frame + 1

print("Finished")
video_output.release()
cv2.destroyAllWindows()

Finished


In [None]:
!ffmpeg -y -loglevel panic -i result.avi result2.mp4

In [None]:
show_video("result2.mp4")

Output hidden; open in https://colab.research.google.com to view.