<a href="https://colab.research.google.com/github/26medias/TF-Face-Angle-Translation/blob/master/Face_Position_Dataset_Builder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Face Angle Dataset Generator

We're going to download movie trailers from https://www.davestrailerpage.co.uk/

The frames from the video files will be extracted and saved to file.

## Downloading videos, extracting the frames

In [18]:
import requests
import ntpath
import cv2
import os, sys

# The variables
DIR_VIDEOS = "videos"
DIR_IMAGES = "images"
CAPTURE_FTP  = 10 # We'll extract 10 images per second of video

if not os.path.isdir(DIR_VIDEOS):
  os.mkdir(DIR_VIDEOS, 755);
if not os.path.isdir(DIR_IMAGES):
  os.mkdir(DIR_IMAGES, 755);

# The methods
# Dowload a video from a url
def downloadFile(url):
  myfile = requests.get(url)
  filename = DIR_VIDEOS+"/"+ntpath.basename(url)
  open(filename, 'wb').write(myfile.content)
  return filename

# Export the frames out of a video at a specific fps
def videoToImages(filename, capture_fps=1):
  basename = os.path.splitext(ntpath.basename(filename))[0]
  print("basename:", basename)
  if not os.path.isdir(DIR_VIDEOS):
    os.mkdir(DIR_IMAGES+"/"+basename, 755)
  cap = cv2.VideoCapture(filename)
  # Get the video's FPS
  fps = cap.get(cv2.CAP_PROP_FPS)
  # How many frames between capture?
  skipFrame = round(fps/capture_fps)
  print(basename, ": fps: ",fps," / skipFrame: ", skipFrame)
  i = 0
  while(cap.isOpened()):
      ret, frame = cap.read()
      if ret == False:
          break
      i+=1
      if (i % skipFrame == 0):
        continue
      cv2.imwrite(DIR_IMAGES+"/"+basename+'/'+str(round((i-1)/fps,2))+'sec.jpg',frame)
  cap.release()
  cv2.destroyAllWindows()
  print(basename, " processed.")

# Download a video then extract the frames
def remoteVideoToImages(url):
  videoFilename = downloadFile(url)
  videoToImages(videoFilename, CAPTURE_FTP)

remoteVideoToImages("http://trailers.apple.com/movies/fox_searchlight/lucy-in-the-sky/lucy-in-the-sky-trailer-1_h480p.mov")

basename: lucy-in-the-sky-trailer-1_h480p
lucy-in-the-sky-trailer-1_h480p : fps:  23.976023976023978  / skipFrame:  2
lucy-in-the-sky-trailer-1_h480p  processed.


## Find & extract the faces from the video frames

Import the dependencies

In [19]:
!pip install git+https://github.com/rcmalli/keras-vggface.git
!pip show keras-vggface
!pip install matplotlib
!pip install mtcnn

Collecting git+https://github.com/rcmalli/keras-vggface.git
  Cloning https://github.com/rcmalli/keras-vggface.git to /tmp/pip-req-build-s9sk1ryg
  Running command git clone -q https://github.com/rcmalli/keras-vggface.git /tmp/pip-req-build-s9sk1ryg
Building wheels for collected packages: keras-vggface
  Building wheel for keras-vggface (setup.py) ... [?25l[?25hdone
  Created wheel for keras-vggface: filename=keras_vggface-0.6-cp36-none-any.whl size=8311 sha256=9ca6aa9c36728746714cd2e9a3b3583c2e05418f4b95e66781f4925bd6b10764
  Stored in directory: /tmp/pip-ephem-wheel-cache-4mehs8sa/wheels/36/07/46/06c25ce8e9cd396dabe151ea1d8a2bc28dafcb11321c1f3a6d
Successfully built keras-vggface
Name: keras-vggface
Version: 0.6
Summary: VGGFace implementation with Keras framework
Home-page: https://github.com/rcmalli/keras-vggface
Author: Refik Can MALLI
Author-email: mallir@itu.edu.tr
License: MIT
Location: /usr/local/lib/python3.6/dist-packages
Requires: pyyaml, pillow, six, numpy, h5py, keras, s

We're going to use VGGFace2 to find & extract the faces

In [0]:
import matplotlib.pyplot as pyplot
import glob
import keras_vggface
import mtcnn
from PIL import Image
from numpy import asarray
from mtcnn.mtcnn import MTCNN
from pathlib import Path

# The variables
DIR_FACES = "faces"

if not os.path.isdir(DIR_FACES):
  os.mkdir(DIR_FACES, 755);

# The methods
# Get the directory of a filename
def getDir(filename):
  p = Path(filename);
  return p.parts[len(p.parts)-2]

# Extract a single face from an image
def findFaces(filename):
	# load image from file
	pixels = pyplot.imread(filename)
	# create the detector, using default weights
	detector = MTCNN()
	# detect faces in the image
	return (pixels, detector.detect_faces(pixels))

def extractFaceFromImage(filename, required_size=(224, 224)):
  (pixels, results) = findFaces(filename)
  faces = []
  for i,faceData in enumerate(results):
    x1, y1, width, height = faceData['box']
    x2, y2 = x1 + width, y1 + height
    # extract the face
    face = pixels[y1:y2, x1:x2]
    # resize pixels to the model size
    image = Image.fromarray(face)
    image = image.resize(required_size)
    face_array = asarray(image)
    faces.append(face_array)
  return faces;


# Extract faces from images in a directory & its subdirectories
def extractFacesFromDirectory(directory, outputDirectory):
  filenames = glob.glob(directory+'/*/*.jpg')
  for i,filename in enumerate(filenames):
    dirname  = getDir(filename)
    basename = os.path.splitext(ntpath.basename(filename))[0]
    faces = extractFaceFromImage(filename);
    print(filename, "Faces: ", len(faces))
    if len(faces) > 0:
      n = 0
      for face in faces:
        im = Image.fromarray(face)
        im.save(outputDirectory+'/'+dirname+'_'+basename+'-'+str(n)+'.jpg')
        n = n+1

extractFacesFromDirectory(DIR_IMAGES, DIR_FACES)


## Cluster the faces

We want to group all the actors per directory