# MST 698X Coding Exercise
Exercise adapted from https://www.pyimagesearch.com/2018/09/24/opencv-face-recognition/

<i>You are developing a facial recognition algorithm for a three-letter agency that will be deployed for target identification on various social media sites.  You have developed a prototype, which must now be evaluated.  You must make a recommendation to the Director of your organization as to whether or not this algorithm is suitable for deployment.</i>

First, ensure that your file structure is appropriately set up.  The hierarchy should look like this:<br>

dataset<br>
|<br>
&nbsp;&nbsp;&nbsp;&nbsp;-|YOUR NAME| &nbsp;&nbsp;<i>#this folder contains 5 of your selfies</i><br>
&nbsp;&nbsp;&nbsp;&nbsp;-|YOUR PARTNER'S NAME| &nbsp;&nbsp;<i>#this folder contains 5 of your partner's selfies</i><br>
|<br>
images &nbsp;&nbsp;<i>#this folder contains 5 of your selfies and 5 of your partner's selfies, with each file labeled numerically.</i><br>
|<br>
face_detection_model &nbsp;&nbsp;<i>#this folder contains a pre-trained deep learning model for face detection</i><br>
&nbsp;&nbsp;&nbsp;&nbsp;-deploy.prototxt<br>
&nbsp;&nbsp;&nbsp;&nbsp;-res10_300x300_ssd_iter_140000.caffemodel<br>
|<br>
output<br>
&nbsp;&nbsp;&nbsp;&nbsp;-embeddings.pickle &nbsp;&nbsp;<i>#embeddings model</i><br>
&nbsp;&nbsp;&nbsp;&nbsp;-le.pickle &nbsp;&nbsp;<i>#label encoder</i><br>
&nbsp;&nbsp;&nbsp;&nbsp;-recognizer.pickle &nbsp;&nbsp;<i>#Linear Support Vector Machine (SVM) model</i><br>
|<br>
MST 698X Coding Exercise.ipynb &nbsp;&nbsp;<i>#this file</i>

## Step 1: Build Facial Embeddings

In [None]:
# import and install relevant libraries

!pip install --upgrade imutils
!pip install opencv-python==3.4.2.17
!pip install scikit-learn

import os
import numpy as np
import imutils
from imutils import paths
import pickle
import cv2
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC

# instantiate fixed directories
if not os.path.isdir('dataset'):
    os.mkdir('dataset')
if not os.path.isdir('images'):
    os.path.mkdir('images')
if not os.path.isdir('face_detection_model'):
    os.path.mkdir('face_detection_model')
if not os.path.isdir('output'):
    os.path.mkdir('output')

embeddings_path = os.path.join('output','embeddings.pickle')
recognizer_path = os.path.join('output', 'recognizer.pickle')
label_encoder_path = os.path.join('output', 'le.pickle')


print('[INFO] done.')

In [None]:
# Instantiate deep learning models

# Load serialized face detector from disk
print("[INFO] loading face detector...")
protoPath = os.path.sep.join(['face_detection_model', "deploy.prototxt"])
modelPath = os.path.sep.join(['face_detection_model', "res10_300x300_ssd_iter_140000.caffemodel"])
detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)

# Load serialized face embedding model from disk
print("[INFO] loading face recognizer...")
embedder = cv2.dnn.readNetFromTorch('openface_nn4.small2.v1.t7')

print('[INFO] done.')

In [None]:
# Identify image paths and initialize variables
# Grab paths to the input images
print("[INFO] quantifying faces...")
imagePaths = list(paths.list_images('dataset'))

# Initialize lists of extracted facial embeddings and corresponding people names
knownEmbeddings = []
knownNames = []

# Initialize total number of faces processed
total = 0

print('[INFO] done.')

In [None]:
# load training images

target_confidence = 0.5

for (i, imagePath) in enumerate(imagePaths):
    # extract the person name from the image path
    print("[INFO] processing image {}/{}".format(i + 1, len(imagePaths)))
    name = imagePath.split(os.path.sep)[-2]
    
    # load the image, resize it to have a width of 600 pixels (while
    # maintaining the aspect ratio), and then grab the image dimensions
    image = cv2.imread(imagePath)
    image = imutils.resize(image, width=600)
    (h, w) = image.shape[:2]
    
    # construct a blob from the image
    imageBlob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, 
                                      (300, 300), (104.0, 177.0, 123.0), swapRB=False, crop=False)

    # apply OpenCV's deep learning-based face detector to localize faces in the input image
    detector.setInput(imageBlob)
    detections = detector.forward()

    # ensure at least one face was found
    if len(detections) > 0:

        # we're making the assumption that each image has only ONE
        # face, so find the bounding box with the largest probability

        i = np.argmax(detections[0, 0, :, 2])
        confidence = detections[0, 0, i, 2]

        # ensure that the detection with the largest probability also
        # means our minimum probability test (thus helping filter out
        # weak detections)

        if confidence > target_confidence:

            # compute the (x, y)-coordinates of the bounding box for
            # the face

            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # extract the face ROI and grab the ROI dimensions

            face = image[startY:endY, startX:endX]

            #(fH, fW) = face.shape[:2]

            # ensure the face width and height are sufficiently large

            #if fW < 20 or fH < 20:
                #continue

            # extract face embeddings

            # construct a blob for the face ROI, then pass the blob
            # through our face embedding model to obtain the 128-d
            # quantification of the face
            faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255, (96, 96), (0, 0, 0), swapRB=True, crop=False)
            embedder.setInput(faceBlob)
            vec = embedder.forward()

            # add the name of the person + corresponding face
            # embedding to their respective lists
            knownNames.append(name)
            knownEmbeddings.append(vec.flatten())
            total += 1

# save face embeddings to disk

# dump the facial embeddings + names to disk
print("[INFO] serializing {} encodings...".format(total))
data = {"embeddings": knownEmbeddings, "names": knownNames}
f = open(embeddings_path, "wb")
f.write(pickle.dumps(data))
f.close()

print('[INFO] done.')

## Step 2: Train Facial Recognition Model

In [None]:
# load the face embeddings

print("[INFO] loading face embeddings...")
data = pickle.loads(open(embeddings_path, "rb").read())

# encode the labels
print("[INFO] encoding labels...")
le = LabelEncoder()
labels = le.fit_transform(data["names"])
#labels = data['names']

print('[INFO] done.')

In [None]:
# train the facial recognition model

# train the model used to accept the 128-d embeddings of the face and
# then produce the actual face recognition
print("[INFO] training model...")
recognizer = SVC(C=1.0, kernel="linear", probability=True)
recognizer.fit(data["embeddings"], labels)

print('[INFO] done.')

In [None]:
# save the recognition model to disk

f = open(recognizer_path, "wb")
f.write(pickle.dumps(recognizer))
f.close()

# write the label encoder to disk
f = open(label_encoder_path, "wb")
f.write(pickle.dumps(le))
f.close()

print('[INFO] done.')

## Step 3: Recognize Faces

In [None]:
# load models

# load our serialized face detector from disk
print("[INFO] loading face detector...")
protoPath = os.path.sep.join(['face_detection_model', "deploy.prototxt"])
modelPath = os.path.sep.join(['face_detection_model', "res10_300x300_ssd_iter_140000.caffemodel"])
detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)

# load our serialized face embedding model from disk
print("[INFO] loading face recognizer...")
embedder = cv2.dnn.readNetFromTorch('openface_nn4.small2.v1.t7')

# load the actual face recognition model along with the label encoder
recognizer = pickle.loads(open(recognizer_path, "rb").read())
le = pickle.loads(open(label_encoder_path, "rb").read())

print('[INFO] done.')

In [None]:
# instantiate function to load images and detect faces

def recognize_image(image_path, target_confidence = 0.5, image_iteration=0):

    # load the image, resize it to have a width of 600 pixels (while
    # maintaining the aspect ratio), and then grab the image dimensions
    image = cv2.imread(image_path)
    image = imutils.resize(image, width=600)
    (h, w) = image.shape[:2]

    # construct a blob from the image
    imageBlob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, (300, 300), 
                                      (104.0, 177.0, 123.0), swapRB=False, crop=False)

    # apply OpenCV's deep learning-based face detector to localize
    # faces in the input image

    detector.setInput(imageBlob)
    detections = detector.forward()
    
    # loop over the detections

    for i in range(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated with the prediction
        confidence = detections[0, 0, i, 2]
        # filter out weak detections
        if confidence > target_confidence:
            # compute the (x, y)-coordinates of the bounding box for the face
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            # extract the face ROI
            face = image[startY:endY, startX:endX]
            #(fH, fW) = face.shape[:2]
            # ensure the face width and height are sufficiently large
            #if fW < 20 or fH < 20:
                #continue

            # construct a blob for the face ROI, then pass the blob
            # through our face embedding model to obtain the 128-d
            # quantification of the face
            faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255, (96, 96), (0, 0, 0), swapRB=True, crop=False)
            embedder.setInput(faceBlob)
            vec = embedder.forward()
            # perform classification to recognize the face
            preds = recognizer.predict_proba(vec)[0]
            j = np.argmax(preds)
            proba = preds[j]
            name = le.classes_[j]

            # draw the bounding box of the face along with the associated probability
            text = "{}: {:.2f}%".format(name, proba * 100)
            y = startY - 10 if startY - 10 > 10 else startY + 10
            cv2.rectangle(image, (startX, startY), (endX, endY), (0, 0, 255), 2)
            cv2.putText(image, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)

    # show the output image
    file_name = 'output_image_' + str(image_iteration) + '.jpg'
    image_write_path = os.path.join('output', file_name)
    
    if not cv2.imwrite(image_write_path, image):
        raise Exception("Could not write image")

print('[INFO] done.')

### After executing the next cell, check your output folder.  Record the number of true positives, true negatives, false positives, and false negatives in the generated images.

In [None]:
# iterate over test images

test_image_paths = list(paths.list_images('images'))

for i in range(len(test_image_paths)):
    recognize_image(test_image_paths[i], image_iteration=i)