# 1: Loading the dataset 

## First: importing the libraries 

In [2]:
from os import listdir
from os.path import isdir
from PIL import Image
from numpy import savez_compressed
from numpy import asarray
from numpy import load 
from mtcnn.mtcnn import MTCNN
from numpy import expand_dims
from keras.models import load_model

### os: for operatiton system 
### PIL: (Pillow)  the library dealling with images 
### numpy: the library dealing with the arrays 
### mtcnn: the face detector 
# --------------------------------------------------------------------------------------------

# Define some functions to exract faces and load images and dataset

## extract the face from the picture

In [3]:

def extract(filename, size=(160, 160)):
    #load image 
    image = Image.open(filename)
    image = image.convert('RGB')
    pixels = asarray(image)
    detector = MTCNN()
    results = detector.detect_faces(pixels)

    # extract the bounding box
    x1, y1, width, height = results[0]['box']
    #fix the value in case the library return negative index so we take the abslute value 
    x1, y1 = abs(x1), abs(y1)
    x2, y2 = x1 + width, y1 + height
    # extract the face
    face = pixels[y1:y2, x1:x2]
    # resize pixels to the model size
    image = Image.fromarray(face)
    image = image.resize(size)
    face_array = asarray(image)
    return face_array

In [4]:
## 

In [5]:
# load images and extract faces for all images in a directory
def load_faces(directory):
    faces = list()
    # enumerate files
    for filename in listdir(directory):
        # path
        path = directory + filename
        # get face
        face = extract(path)
        # store
        faces.append(face)
    return faces

In [6]:
def load_dataset(directory):
    X, y = list(), list()
    # enumerate folders, on per class
    for subdir in listdir(directory):
        # path
        path = directory + subdir + '/'
        # skip any files that might be in the dir
        if not isdir(path):
            continue
        # load all faces in the subdirectory
        faces = load_faces(path)
        # create labels
        labels = [subdir for _ in range(len(faces))]
        # summarize progress
        print('>loaded %d examples for class: %s' % (len(faces), subdir))
        # store
        X.extend(faces)
        y.extend(labels)
    return asarray(X), asarray(y)

In [7]:
# load train dataset
dataset, labels = load_dataset('datasets/')
print(dataset.shape, labels.shape)

>loaded 21 examples for class: Adrian Broody
>loaded 23 examples for class: Adriana Barazza
>loaded 22 examples for class: Ali Latter
>loaded 16 examples for class: Amena Khan
>loaded 19 examples for class: Andrew Lincolen
>loaded 24 examples for class: Angella Basset
>loaded 16 examples for class: Anna_Hathaway
>loaded 20 examples for class: Anthony Hopkins
>loaded 23 examples for class: arnold_schwarzenegger
>loaded 25 examples for class: Aron Judge
>loaded 23 examples for class: Aron Paul
>loaded 19 examples for class: Barabara Palvin
>loaded 21 examples for class: Barbra Streisand
>loaded 23 examples for class: Barry Pepper
>loaded 23 examples for class: Bella Hadid
>loaded 32 examples for class: bella thorne
>loaded 19 examples for class: ben_afflek
>loaded 20 examples for class: Beyonce Knowles
>loaded 16 examples for class: Buffon
>loaded 16 examples for class: De_Niro
>loaded 16 examples for class: Donnie Yen
>loaded 20 examples for class: dwayne_johnson
>loaded 22 examples for

In [8]:
from sklearn.model_selection import train_test_split

trainX, testX, trainy, testy = train_test_split(dataset, labels, test_size = 0.3)

savez_compressed('faces.npz', x_train, y_train, x_test, y_test)
print(trainX.shape, testX.shape, trainy.shape, testy.shape)

NameError: name 'x_train' is not defined

# 2: Embedding the dataset

In [None]:
def face_embedding(model, face_pixels):
    # scale pixel values
    face_pixels = face_pixels.astype('float32')
    mean, std = face_pixels.mean(), face_pixels.std()
    face_pixels = (face_pixels - mean) / std
    samples = expand_dims(face_pixels, axis=0)
    #We can then use the model to make a prediction and extract the embedding vector.
    yhat = model.predict(samples)
    return yhat[0]

In [None]:
# load the facenet model
model = load_model('facenet_keras.h5')
print('Loaded Model')

In [None]:
# convert each face in the train set to an embedding
train_x = []
for face_pixels in trainX:
    embedding = face_embedding(model, face_pixels)
    train_x.append(embedding)
train_x = asarray(train_x)
print(train_x.shape)

In [None]:
# convert each face in the test set to an embedding
test_x = []
for face_pixels in testX:
    embedding = face_embedding(model, face_pixels)
    test_x.append(embedding)
test_x = asarray(test_x)
print(newTestX.shape)

In [None]:
# save arrays to one file in compressed format
savez_compressed('face_embeddings.npz', newTrainX, trainy, newTestX, testy)
