# Import libraries

In [1]:
import os
from PIL import Image
from matplotlib import pyplot
import numpy as np
from mtcnn.mtcnn import MTCNN
import tensorflow as tf   
tf.get_logger().setLevel('ERROR')

# Create function that extracts faces

In [2]:
def extract_face_mtcnn(filepath):
    print(filepath)
    required_size = (160, 160)
    image = Image.open(filepath)
    image = image.convert('RGB')
    image_pixels = np.asarray(image)
    detector = MTCNN()
    result = detector.detect_faces(image_pixels)
    if len(result) == 0:
        return "no face"
    x1, y1, width, height = result[0]['box']
    x1, y1 = abs(x1), abs(y1)
    x2, y2 = x1 + width, y1 + height
    face = image_pixels[y1:y2, x1:x2]
    
    image = Image.fromarray(face)
    image = image.resize(required_size)
    face_array = np.asarray(image)
    return face_array

# Create function that loads all images from a subdirectory

In [3]:
def load_faces(directory):
#     print("in load_faces")
    faces = list()
    for filename in os.listdir(directory):
        path = directory + filename
        # get face
        face = extract_face_mtcnn(path)
        if faces == "no face":
            return False
        # store face
        faces.append(face)
    return faces

# Create functions that loads the entire dataset

In [4]:
def load_dataset(directory):
#     print("in load_dataset")
    X, y = list(), list()
    # enumerate folders, on per class
    for subdir in os.listdir(directory):
        # path
        path = directory + subdir + '/'
        # skip any files that might be in the dir
        if not os.path.isdir(path):
            continue
        # load all faces in the subdirectory
        faces = load_faces(path)
        if not faces:
            return 
        # create labels
        labels = [subdir for _ in range(len(faces))]
        # summarize progress
        print('>loaded %d examples for class: %s' % (len(faces), subdir))
        # store
        X.extend(faces)
        y.extend(labels)
    return np.asarray(X), np.asarray(y)

# save compressed file

In [5]:
X, y = load_dataset('../data/12plusbias/')

print(X.shape, y.shape)

# save arrays to one file in compressed format
np.savez_compressed('../data/12plusbias-dataset.npz', X, y)

../data/12plusbias/s01/01.jpg
../data/12plusbias/s01/02.jpg
../data/12plusbias/s01/03.jpg
../data/12plusbias/s01/04.jpg
../data/12plusbias/s01/05.jpg
../data/12plusbias/s01/06.jpg
../data/12plusbias/s01/07.jpg
../data/12plusbias/s01/08.jpg
../data/12plusbias/s01/09.jpg
../data/12plusbias/s01/10.jpg
../data/12plusbias/s01/11.jpg
../data/12plusbias/s01/12.jpg
../data/12plusbias/s01/13.jpg
../data/12plusbias/s01/14.jpg
../data/12plusbias/s01/15.jpg
>loaded 15 examples for class: s01
../data/12plusbias/s02/01.jpg
../data/12plusbias/s02/02.jpg
../data/12plusbias/s02/03.jpg
../data/12plusbias/s02/04.jpg
../data/12plusbias/s02/05.jpg
../data/12plusbias/s02/06.jpg
../data/12plusbias/s02/07.jpg
../data/12plusbias/s02/08.jpg
../data/12plusbias/s02/09.jpg
../data/12plusbias/s02/10.jpg
../data/12plusbias/s02/11.jpg
../data/12plusbias/s02/12.jpg
../data/12plusbias/s02/13.jpg
../data/12plusbias/s02/14.jpg
../data/12plusbias/s02/15.jpg
>loaded 15 examples for class: s02
../data/12plusbias/s03/01.jpg
