## Crop Faces

In [1]:
class ImageClass():
    "Stores the paths to images for a given class"
    def __init__(self, name, image_paths):
        self.name = name
        self.image_paths = image_paths
  
    def __str__(self):
        return self.name + ', ' + str(len(self.image_paths)) + ' images'
  
    def __len__(self):
        return len(self.image_paths)

Let's create a method to return all paths (images path) from a folder

In [None]:
import os

In [2]:
def get_image_paths(dir_path):
    image_paths = []
    if os.path.isdir(dir_path):
        images = os.listdir(dir_path)
        image_paths = [os.path.join(dir_path,img) for img in images]
    return image_paths

Let's create a method to have the images read and put into the class said above

In [3]:
def load_train_dataset(path):
    dataset = []    

    classes = os.listdir(path)
    classes.sort()
    for i in range(len(classes)):
        class_name = classes[i]
        face_dir = os.path.join(path, class_name)
        image_paths = get_image_paths(face_dir)
        dataset.append(ImageClass(class_name, image_paths))

    return dataset

In [4]:
import os
train_path = './dataset/train'    
dataset = load_train_dataset(train_path)
str(dataset[0])

'Adam Sandler, 88 images'

Let's flatten the dataset

In [5]:
def get_image_paths_and_labels(dataset):
    image_paths_flat = []
    labels_flat = []
    for i in range(len(dataset)):
        image_paths_flat += dataset[i].image_paths
        labels_flat += [i] * len(dataset[i].image_paths)
    return image_paths_flat, labels_flat

In [6]:
paths, labels = get_image_paths_and_labels(dataset)
print(paths)
print(labels)

['./dataset/train\\Adam Sandler\\103.jpg', './dataset/train\\Adam Sandler\\108.jpg', './dataset/train\\Adam Sandler\\111.jpg', './dataset/train\\Adam Sandler\\116.jpg', './dataset/train\\Adam Sandler\\119.jpg', './dataset/train\\Adam Sandler\\127.jpg', './dataset/train\\Adam Sandler\\132.jpg', './dataset/train\\Adam Sandler\\134.jpg', './dataset/train\\Adam Sandler\\136.jpg', './dataset/train\\Adam Sandler\\14.jpg', './dataset/train\\Adam Sandler\\149.jpg', './dataset/train\\Adam Sandler\\151.jpg', './dataset/train\\Adam Sandler\\152.jpg', './dataset/train\\Adam Sandler\\157.jpg', './dataset/train\\Adam Sandler\\161.jpg', './dataset/train\\Adam Sandler\\165.jpg', './dataset/train\\Adam Sandler\\166.jpg', './dataset/train\\Adam Sandler\\17.jpg', './dataset/train\\Adam Sandler\\173.jpg', './dataset/train\\Adam Sandler\\175.jpg', './dataset/train\\Adam Sandler\\182.jpg', './dataset/train\\Adam Sandler\\187.jpg', './dataset/train\\Adam Sandler\\189.jpg', './dataset/train\\Adam Sandler\\19.

### Face Detection

Let's create a method that have the images cropped in the face. This way we prevent noise from background

In [7]:
from numpy import asarray
from mtcnn.mtcnn import MTCNN
from PIL import Image

# extract a single face from a given photograph
def extract_face(filename, required_size=(160, 160)):
 # load image from file
 image = Image.open(filename)
 # convert to RGB, if needed
 image = image.convert('RGB')
 # convert to array
 pixels = asarray(image)
 # create the detector, using default weights
 detector = MTCNN()
 # detect faces in the image
 results = detector.detect_faces(pixels)

 if not results or results[0]['confidence'] < 0.9:
   return None
 # extract the bounding box from the first face
 x1, y1, width, height = results[0]['box']
 # bug fix
 x1, y1 = abs(x1), abs(y1)
 x2, y2 = x1 + width, y1 + height
 # extract the face
 face = pixels[y1:y2, x1:x2]
 # resize pixels to the model size
 image = Image.fromarray(face)
 image = image.resize(required_size)
 #face_array = asarray(image)
 return image

#### Saving the cropped images
We are saving the cropped images, just to make training easier and faster

In [9]:
from matplotlib import pyplot as plt
import cv2
import numpy as np
for path in paths:
    face = extract_face(path)
    if face is not None:
        np_img = np.array(face)
        img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
        dirpath = os.path.join(os.getcwd(), 'dataset', 'train_cropped', path.split(os.sep)[-2])
        path = os.path.join(os.getcwd(), 'dataset', 'train_cropped', path.split(os.sep)[-2], os.path.basename(path))
        if not os.path.exists(dirpath):
            os.mkdir(dirpath)
        cv2.imwrite(path, img)
        #path = f'{os.getcwd()}/dataset/train_cropped/{path.split(os.sep)[-2]}/{os.path.basename(path)}'
        #print(path)
        #face.save(path)
    

