### Test Facenet on Surveillance data
Loads 20 identites from the QMUL-SurvFace dataset and recognizes them using Facenet and SVMs

In [0]:
import os
import cv2
from tqdm import tqdm
import numpy as np
from keras.models import load_model
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

#### Preprocessing

In [0]:
def normalize(img):
  ''' Normalizes and image '''
  img = img.astype('float32')
  mean, std = img.mean(), img.std()
  img = (img - mean) / std
  return img

In [7]:
dir_name = 'drive/My Drive/ML/surv_dataset'
def read_dataset(dir_name):
  ''' Reads 20 classes, preprocess them
   and saves them in Numpy arrays (The operation is time costly)'''

  class_names = os.listdir(dir_name)
  X = []
  y = []
  label = 0
  count = 0
  for c in class_names:
    class_path = os.path.join(dir_name, c)
    if not os.path.isfile(class_path):
      file_names = os.listdir(class_path)
      if len(file_names) > 0:
        for file_name in file_names:
          file_path = os.path.join(class_path, file_name)
          img = cv2.imread(file_path)
          # Preprocessing
          img = cv2.resize(img, (160, 160))
          img = normalize(img)
          X.append(img)
          y.append(label)
        label += 1
    # only 20 identities are needed
    if label == 20:
      break
  # Save the preprocessed faces
  X = np.array(X)
  y = np.array(y)
  np.save('drive/My Drive/ML/X_surv.npy', X)
  np.save('drive/My Drive/ML/y_surv.npy', y)

read_dataset(dir_name)

 35%|███▌      | 62/176 [00:43<01:30,  1.25it/s]

#### Load Facenet

In [4]:
model_path = '/drive/My Drive/ML/facenet_keras.h5'
model = load_model(model_path)



#### Embed Dataset

In [25]:
X = np.load('drive/My Drive/ML/X_surv.npy')
X = model.predict(X)
y = np.load('drive/My Drive/ML/y_surv.npy')
X.shape

(4934, 128)

#### Split to train and test

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)
X_train.shape, X_test.shape

((3947, 128), (987, 128))

#### Conduct a Grid search with cross validation on the SVM hyperparameters and report the final metrics

In [0]:
def fit_svm(X_train, y_train, X_test, y_test):
  ''' Grid search on the SVM hyperparameters '''
  
  parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10, 100, 1000]}
  print("# Tuning hyper-parameters ")
  clf = GridSearchCV(
      SVC(), parameters
  )
  clf.fit(X_train, y_train)
  print("Best parameters set found on development set:")
  print(clf.best_params_)
  print("Grid scores on development set:")
  means = clf.cv_results_['mean_test_score']
  stds = clf.cv_results_['std_test_score']
  for mean, std, params in zip(means, stds, clf.cv_results_['params']):
      print("%0.3f (+/-%0.03f) for %r"
            % (mean, std * 2, params))
  print("Detailed classification report:")
  print("The model is trained on the full development set.")
  print("The scores are computed on the full evaluation set.")
  y_true, y_pred = y_test, clf.predict(X_test)
  print(classification_report(y_true, y_pred))

svc = fit_svm(X_train, y_train, X_test, y_test)