In [49]:
import tensorflow as tf
from tensorflow.python.platform import gfile
from tensorflow.python.framework import ops
from sklearn.svm import SVC
import numpy as np
import cv2
import os
import pickle

import warnings
warnings.filterwarnings("ignore")

In [50]:
class ImageClass():
    "Stores the paths to images for a given class"
    def __init__(self, name, image_paths):
        self.name = name
        self.image_paths = image_paths
  
    def __str__(self):
        return self.name + ', ' + str(len(self.image_paths)) + ' images'
  
    def __len__(self):
        return len(self.image_paths)

def load_dataset(path, train = True):
    dataset = []

    classes = os.listdir(path)
    classes.sort()

    if train:
        for i in range(len(classes)):
            class_name = classes[i]
            face_dir = os.path.join(path, class_name)
            image_paths = get_image_paths(face_dir)
            dataset.append(ImageClass(class_name, image_paths))
    else:
        image_paths = get_image_paths(path)
        dataset.append(image_paths)

    return dataset

def load_train_dataset():
    train_path = './dataset/train'    
    return load_dataset(train_path)

def load_test_dataset():
    test_path = './dataset/test'  
    return load_dataset(test_path, train=False)

def get_image_paths(dir_path):
    image_paths = []
    if os.path.isdir(dir_path):
        images = os.listdir(dir_path)
        image_paths = [os.path.join(dir_path,img) for img in images]
    return image_paths

def get_image_paths_and_labels(dataset):
    image_paths_flat = []
    labels_flat = []
    for i in range(len(dataset)):
        image_paths_flat += dataset[i].image_paths
        labels_flat += [i] * len(dataset[i].image_paths)
    return image_paths_flat, labels_flat

In [51]:
import math
import cv2

class facenet_utils:

    def __init__(self, dataset = None, train = None):
        if train is not None:
            self.dataset = dataset
            self.paths, self.labels = get_image_paths_and_labels(dataset)
        else:
            if dataset is not None:
                self.dataset = dataset

        self.emb_array = None
        


    def load_model(self):
        model_filename = './models/20180402-114759.pb'
        
        model_filename = os.path.expanduser(model_filename)
        if (os.path.isfile(model_filename)):
            print(f'Model filename: {model_filename}')
            with tf.io.gfile.GFile(model_filename, 'rb') as f:
                graph_def = tf.compat.v1.GraphDef()
                graph_def.ParseFromString(f.read())
                tf.import_graph_def(graph_def, name='')
        else:
            print('File not found!')  

    def load_classifier(self):
        classifier_filename = './models/my_classifier.pkl'
        classifier_filename_exp = os.path.expanduser(classifier_filename)
        with open(classifier_filename_exp, 'rb') as infile:
            (model, class_names) = pickle.load(infile)
        print(f'loaded classifier file->{classifier_filename_exp}')
        return model, class_names

    def prewhiten(self, x):
        mean = np.mean(x)
        std = np.std(x)
        std_adj = np.maximum(std, 1.0/np.sqrt(x.size))
        y = np.multiply(np.subtract(x, mean), 1/std_adj)
        return y 

    def to_rgb(self, img):
        w, h = img.shape
        ret = np.empty((w, h, 3), dtype=np.uint8)
        ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
        return ret

    def to_rgb(img):
        w, h = img.shape
        ret = np.empty((w, h, 3), dtype=np.uint8)
        ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
        return ret

    def load_images(self, image_paths, image_size,  do_prewhiten=True):    
        images = np.zeros((len(image_paths), image_size, image_size, 3))
        print('Loading images...')
        for i in range(len(image_paths)):
            img = cv2.imread(image_paths[i])
            if img.ndim == 2:
                img = self.to_rgb(img)

            img = cv2.resize(img, (160, 160))
            
            if do_prewhiten:
                img = self.prewhiten(img)
            images[i,:,:,:] = img
        print('Images loaded')
        return images      
    

    def calculate_embeddings(self, session):

        images_placeholder = tf.compat.v1.get_default_graph().get_tensor_by_name("input:0")
        embeddings = tf.compat.v1.get_default_graph().get_tensor_by_name("embeddings:0")
        phase_train_placeholder = tf.compat.v1.get_default_graph().get_tensor_by_name("phase_train:0")
        embedding_size = embeddings.get_shape()[1]

        # Run forward pass to calculate embeddings
        print('Calculating features for images')
        batch_size = 2000
        image_size = 160
        nrof_images = len(paths)
        nrof_batches_per_epoch = int(math.ceil(1.0 * len(paths) / batch_size))
        self.emb_array = np.zeros((len(paths), embedding_size))
        for i in range(nrof_batches_per_epoch):
            print(f'Batch {i} of {nrof_batches_per_epoch}')
            start_index = i * batch_size
            end_index = min((i + 1) * batch_size, len(paths))
            paths_batch = paths[start_index:end_index]
            images = self.load_images(paths_batch, image_size)
            feed_dict = {images_placeholder: images, phase_train_placeholder: False}
            print('Calculating array of features')
            self.emb_array[start_index:end_index, :] = session.run(embeddings, feed_dict=feed_dict)
            print('Done')

    def train_classifier(self):
        classifier_filename = './models/my_classifier.pkl'
        classifier_filename_exp = os.path.expanduser(classifier_filename)

        # Train classifier
        print('Training classifier')
        model = SVC(kernel='rbf', probability=True)
        model.fit(self.emb_array, labels)

        # Create a list of class names
        class_names = [cls.name.replace('_', ' ') for cls in dataset]
        print(class_names)

        # Saving classifier model
        with open(classifier_filename_exp, 'wb+') as outfile:
            pickle.dump((model, class_names), outfile)
        print(f'Saved classifier model to file {classifier_filename_exp}')


    def train_model(self):        
        with tf.compat.v1.Session() as sess:
            self.load_model()  
            self.calculate_embeddings(sess)
            self.train_classifier()

    def get_img_embeddings(self, session, img):
        images_placeholder = tf.compat.v1.get_default_graph().get_tensor_by_name("input:0")
        embeddings = tf.compat.v1.get_default_graph().get_tensor_by_name("embeddings:0")
        phase_train_placeholder = tf.compat.v1.get_default_graph().get_tensor_by_name("phase_train:0")
        embedding_size = embeddings.get_shape()[1]

        self.emb_array = np.zeros((1, embedding_size))

        image = np.zeros((1, 160, 160, 3))

        if img.ndim == 2:
            img = self.to_rgb(img)
        img = cv2.resize(img, (160, 160))
        img = self.prewhiten(img)
        image[0,:,:,:] = img

        feed_dict = {images_placeholder: image, phase_train_placeholder: False}
        self.emb_array[0, :] = session.run(embeddings, feed_dict=feed_dict)

        return self.emb_array

    


In [52]:
dataset = load_train_dataset()
paths, labels = get_image_paths_and_labels(dataset)

facenet = facenet_utils(dataset=dataset)
facenet.train_model()


Model filename: ./models/20180402-114759.pb
Calculating features for images
Batch 0 of 7
Loading images...
Images loaded
Calculating array of features
Done
Batch 1 of 7
Loading images...
Images loaded
Calculating array of features


In [None]:
from sklearn.metrics import accuracy_score

test_dataset = load_train_dataset()
paths, labels = get_image_paths_and_labels(dataset)

facenet_test = facenet_utils()

with tf.compat.v1.Session() as sess:
    facenet_test.load_model()
    model, class_names = facenet_test.load_classifier()
    print(class_names)

    predicted = []

    for img_path in paths:
        img = cv2.imread(img_path)
        embeddings = facenet_test.get_img_embeddings(sess, img)
        
        predictions = model.predict_proba(embeddings)
        best_class_indices = np.argmax(predictions, axis=1)
        print(best_class_indices)
        best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]


        for H_i in class_names:
            if class_names[best_class_indices[0]] == H_i:
                result = class_names[best_class_indices[0]]
                predicted.append(best_class_indices[0])
                print(f"Img id: {img_path} Person: {result}")

    accuracy = accuracy_score(predicted, labels)
    print(labels)
    print()
    print()
    print()
    print()
    print(predicted)
    print(accuracy)


Model filename: ./models/20180402-114759.pb
loaded classifier file->./models/my_classifier.pkl
['Adam Sandler', 'Alec Baldwin', 'Angelina Jolie', 'Anna Kournikova', 'Ashton Kutcher', 'Avril Lavigne', 'Barack Obama', 'Ben Affleck', 'Beyonce Knowles', 'Brad Pitt', 'Cameron Diaz', 'Cate Blanchett', 'Charlize Theron', 'Christina Ricci', 'Claudia Schiffer', 'Clive Owen', 'Colin Farrell', 'Colin Powell', 'Cristiano Ronaldo', 'Daniel Craig', 'Daniel Radcliffe', 'David Beckham', 'David Duchovny', 'Denise Richards', 'Drew Barrymore', 'Dustin Hoffman', 'Ehud Olmert', 'Eva Mendes', 'Faith Hill', 'George Clooney', 'Gordon Brown', 'Gwyneth Paltrow', 'Halle Berry', 'Harrison Ford', 'Hugh Jackman', 'Hugh Laurie', 'Jack Nicholson', 'Jennifer Aniston', 'Jennifer Lopez', 'Jennifer Love Hewitt', 'Jessica Alba', 'Jessica Simpson', 'Joaquin Phoenix', 'John Travolta', 'Julia Roberts', 'Julia Stiles', 'Kate Moss', 'Kate Winslet', 'Katherine Heigl', 'Keira Knightley', 'Kiefer Sutherland', 'Leonardo DiCaprio',