# Data Preparation Notebook

+ This notebook prepares face datasets for Autoencoder based Face Swapping DeepFakes
+ See ```helper_notebook.ipynb``` for a step by step understanding of different utilities
+ We leverage ```cv2```, ```face_recognition``` and ```dlib``` to prepare the datasets

## Install ```face_recognition```

In [None]:
!pip install face_recognition

## Import Required Libraries

In [None]:
import cv2
import dlib
import glob
import face_recognition
import face_recognition_models
from face_utils import Extract 
from face_utils import FaceFilter 
from face_utils import DetectedFace
from matplotlib import pyplot as plt

In [None]:
included_extentions = ['jpg', 'bmp', 'png', 'gif']

## Instantiate recognition objects

In [None]:
predictor_68_point_model = face_recognition_models.pose_predictor_model_location()
pose_predictor = dlib.shape_predictor(predictor_68_point_model)

## Utility Methods for Identification and Cropping Faces

In [None]:
def _css_to_rect(css):
    return dlib.rectangle(css[3], css[0], css[1], css[2])
    
def _raw_face_landmarks(face_image, face_locations):
    face_locations = [_css_to_rect(face_location) for face_location in face_locations]
    return [pose_predictor(face_image, face_location) for face_location in face_locations]

def detect_faces(frame):
    face_locations = face_recognition.face_locations(frame)
    landmarks = _raw_face_landmarks(frame, face_locations)

    for ((y, right, bottom, x), landmarks) in zip(face_locations, landmarks):
        yield DetectedFace(frame[y: bottom, x: right], 
                           x, right - x, y, bottom - y, landmarks)

In [None]:
def get_faces(reference_image,image,extractor,debug=False):
    faces_count = 0
    facefilter = FaceFilter(reference_image)
    for face in detect_faces(image):
        if not facefilter.check(face):
            print('Skipping not recognized face!')
            continue
        resized_image = extractor.extract(image, face, 256)
        if debug:
            imgplot = plt.imshow(cv2.cvtColor(resized_image , cv2.COLOR_BGR2RGB ))
            plt.show()

        yield faces_count, face
        faces_count +=1

In [None]:
def create_face_dataset(reference_face_filepath,
                        input_dir,
                        output_dir,
                        extractor,
                        included_extentions=included_extentions):
    image_list = [fn for fn in glob.glob(input_dir+"/*.*") \
              if any(fn.endswith(ext) for ext in included_extentions)]
    print("Total Images to Scan={}".format(len(image_list)))
    positive_ctr = 0
    try:
    for filename in image_list:
        image = cv2.imread(filename)
        for idx, face in get_faces(reference_face_filepath,image,extractor):
            resized_image = extractor.extract(image, face, 256)
            output_file = output_dir+"/"+str(filename).split("/")[-1]
            cv2.imwrite(output_file, resized_image)
            positive_ctr += 1
    except Exception as e:
        print('Failed to extract from image: {}. Reason: {}'.format(filename, e))
    print("Images with reference face={}".format(positive_ctr))

## Start Dataset Preparation Process

In [None]:
extractor = Extract()

In [None]:
reference_face_filepath = "nicolas_ref.png"

In [None]:
create_face_dataset(reference_face_filepath,
                        'nicolas',
                        'nicolas_face',
                         extractor,
                         included_extentions=included_extentions)

In [None]:
!unzip trump.zip

In [None]:
trump_reference_face_filepath = "trump_ref.png"

In [None]:
# trump photos kaggle: https://www.kaggle.com/mbkinaci/trump-photos <-- fast+highres
# trump photos anon : https://anonfiles.com/p7w3m0d5be/face-swap.zip <-- slow
create_face_dataset(trump_reference_face_filepath,
                        'trump',
                        'trump_face',
                         extractor,
                         included_extentions=included_extentions)