### Recognizes faces using Facenet
It was used to train the SVM to recognize the custom dataset and apply it on the image that contains multiple faces and on the input video
#### Imports

In [0]:
import pandas as pd
from keras.models import load_model
import cv2
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.svm import SVC
import matplotlib.patches as patches
import pickle
import time

#### Load Face Recognition model

In [0]:
model_path = '/content/drive/My Drive/ML/facenet_keras.h5'
model = load_model(model_path)

#### Preprocess dataset (crop face, resize)

In [0]:
dataset_path = '/content/drive/My Drive/ML/Face_dataset/'
def preprocess_data(df):
  ''' Retrieves the faces that were detected and crops and resize them '''
  X = []
  y = []
  for index, row in df.iterrows():
    img = cv2.imread(row['file_name'])
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    x1, y1, x2, y2 = row['x_min'], row['y_min'], row['x_max'], row['y_max']
    label = row['label']
    face = img[y1:y2, x1:x2]
    face = cv2.resize(face, (160, 160))
    X.append(face)
    y.append(label)
  return np.array(X), np.array(y)

df_train = pd.read_csv(dataset_path+'train_boxes.csv')
df_test = pd.read_csv(dataset_path+'test_boxes.csv')
X_train, y_train = preprocess_data(df_train)
X_test, y_test = preprocess_data(df_test)

#### Normalize Dataset and embed samples

In [0]:
def get_embedding(model, face_pixels):
  ''' Normalize the faces and embed them using facenet to 128 feature vector '''
	# scale pixel values
	face_pixels = face_pixels.astype('float32')
	# standardize pixel values across channels (global)
	mean, std = face_pixels.mean(), face_pixels.std()
	face_pixels = (face_pixels - mean) / std
	# transform face into one sample
	samples = np.expand_dims(face_pixels, axis=0)
	# make prediction to get embedding
	yhat = model.predict(samples)
	return yhat[0]

def embed_dataset(X_train, y_train, X_test, y_test):
  ''' Extracts the embeddings and normalize them for all the custom dataset 
  It also labels the samples with the identites of people in them
  '''
  train_embeddings = []
  test_embeddings = []
  # normalize input vectors
  in_encoder = Normalizer(norm='l2')
  # Training faces
  for face_pixels in X_train:
    embedding = get_embedding(model, face_pixels)
    train_embeddings.append(embedding)
  train_embeddings = in_encoder.transform(np.array(train_embeddings))
  # Testing faces
  for face_pixels in X_test:
    embedding = get_embedding(model, face_pixels)
    test_embeddings.append(embedding)
  test_embeddings = in_encoder.transform(np.array(test_embeddings))
  
  # Label
  out_encoder = LabelEncoder()
  out_encoder.fit(y_train)
  one_hot_y_train = out_encoder.transform(y_train)
  one_hot_y_test = out_encoder.transform(y_test)
  return train_embeddings, one_hot_y_train, test_embeddings, one_hot_y_test, out_encoder

X_train, y_train, X_test, y_test, out_encoder = embed_dataset(X_train, y_train, X_test, y_test)

#### Fit SVM to the embeddings

In [0]:
def fit_svm(X_train, y_train, X_test, y_test):
  ''' Trains an SVM on the embeddings and reports accuracy '''
  model = SVC(kernel='linear', probability=True)
  model.fit(X_train, y_train)
  yhat_train = model.predict(X_train)
  yhat_test = model.predict(X_test)
  # score
  score_train = accuracy_score(y_train, yhat_train)
  score_test = accuracy_score(y_test, yhat_test)
  print(yhat_test)
  print('Accuracy: train=%.3f, test=%.3f' % (score_train*100, score_test*100))
  return model
svc = fit_svm(X_train, y_train, X_test, y_test)

[0 0 0 0 0 2 0 2 2 2 1 1 1 1 1 3 3 3 3 3]
Accuracy: train=100.000, test=95.000


In [0]:
def pipeline(face, svc):
  ''' Recognizes a face by calling all the previous functions '''
  face = cv2.resize(face, (160, 160))
  embedding = get_embedding(model, face).reshape((1,-1))
  in_encoder = Normalizer(norm='l2')
  embedding = in_encoder.transform(embedding)
  y_prob = svc.predict_proba(embedding)
  y = svc.predict(embedding)
  return y, np.max(y_prob)

### Custom Test
Using a personal photo that contains 3 faces. Can be found in the outputs folder

In [0]:
folder_path = '/content/drive/My Drive/ML/Face_dataset/all_faces/'
file_names = np.load(folder_path+'arrays/file_names.npy')
# Load the faces boundaries
boxes = np.load(folder_path+'arrays/boxes.npy', allow_pickle=True)

#### input the face, apply the pipeline and create a plot that have the face bounds labeled with the person's name

In [0]:
idx = 3
img = cv2.imread(file_names[idx])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
box = boxes[idx]

fig, ax = plt.subplots(figsize=(20, 10))
fig = plt.figure(figsize=(40, 40))
ax.imshow(img)
for b in box:
  # Face bounds
  x1, y1, x2, y2 = b[0], b[1], b[2], b[3]
  face = img[y1:y2, x1:x2]
  # predict the face
  y, prob = pipeline(face, svc)
  title = out_encoder.inverse_transform(y)[0] + '\n{:.2f}'.format(prob)
  # Create rectangle around the face and plot it
  rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=1, 
                           edgecolor='r', facecolor='none', label=title)
  ax.add_patch(rect)
  ax.text(x1, y1, title, fontsize=8, color='r', 
          bbox=dict(facecolor='black'))

### Video testing
Read the frames along with the face bounds, recognize the face and draw the face rectangles

In [0]:
def read_frame(img, row):
  ''' Returns the patch that contains the face '''
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  x1, y1, x2, y2 = row['x_min'], row['y_min'], row['x_max'], row['y_max']
  face = img[y1:y2, x1:x2]
  return face

def recognize_frames(path, df_path):
  ''' Read all frames alongside the face bounds in each frame '''

  vidcap = cv2.VideoCapture(path)
  width  = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))  # float
  height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
  fps = vidcap.get(cv2.CAP_PROP_FPS)
  timer = []
  video_faces = []
  # CSV file that contains the face bounds in each frame
  df = pd.read_csv(df_path)
  print('Number of frames', df.values.shape[0])
  for index, row in df.iterrows():

    # Face bounds (if any)
    ret, image = vidcap.read()
    x1, y1, x2, y2 = row['x_min'], row['y_min'], row['x_max'], row['y_max']
    image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
    fig, ax = plt.subplots()
    img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    ax.axis('off')
    ax.imshow(img_rgb)
    t0 = time.time()
    face = read_frame(image, row)

    # Recognize face (if any)
    if face.shape[0] > 0:
      y, prob = pipeline(face, svc)
      timer.append(time.time() - t0)
      title = out_encoder.inverse_transform(y)[0] + '\n{:.2f}'.format(prob)
      # Draw the face bounds
      rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=1, 
                          edgecolor='r', facecolor='none', label=title)
      ax.add_patch(rect)
      ax.text(x1, y1, title, fontsize=8, color='r', 
              bbox=dict(facecolor='black'))
      
    # Save the frame with the labeled bounding box
    plt.savefig(f'/content/drive/My Drive/ML/Face_dataset/video_frames/{index}.png')
  return timer

path = 'drive/My Drive/ML/IMG_5947.MOV'
df_path = '/content/drive/My Drive/ML/Face_dataset/video_boxes.csv'
timer = recognize_frames(path, df_path)
print('Recognition time on average for frame', np.average(np.array(timer)))

Finally, append all the save frames to create the video

In [0]:
vidcap = cv2.VideoCapture('drive/My Drive/ML/IMG_5947.MOV')
width  = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))  # float
height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*"XVID")
video = cv2.VideoWriter('drive/My Drive/ML/recognized_faces.avi', fourcc, 30, (432, 288))
for i in range(398):
  img = cv2.imread(f'drive/My Drive/ML/Face_dataset/video_frames/{i}.png')
  video.write(img)
video.release()