# Generate Face Encodings for Known Artists

In [1]:
from datetime import datetime
import argparse
import cv2
import face_recognition
import pickle
import os
import numpy as np
from imutils import paths

In [2]:
# Argument Parser with 2 parameters: 
# 1. image_set specifies the folder in which subfolders are the name of the artists, i.e. /image_set/<artist>/<FileName>.png
# 2. encoding specifies the dictionary file in which the encodings of known artists are stored

parser = argparse.ArgumentParser()
parser.add_argument('-i','--image_set', help='Path to the Directory of Face Images Dataset', default='image_set')
parser.add_argument('-o','--encoding', help='Path to the Dictionary File of known artists face encordings',   
                    default='FaceEncoding.pickle')

_StoreAction(option_strings=['-o', '--encoding'], dest='encoding', nargs=None, const=None, default='FaceEncoding.pickle', type=None, choices=None, help='Path to the Dictionary File of known artists face encordings', metavar=None)

### Arguments with default value: dataset is the subfolder 'dataset', encoding is 'FaceEncoding.pickle'

In [3]:
#args = parser.parse_args(['-i','dataset','-o','FaceEncoding.pickle'])
args = parser.parse_args([])
print(args)

Namespace(encoding='FaceEncoding.pickle', image_set='image_set')


In [4]:
# imgPaths is a list which lists all the image files in folder dataset
imgPaths = list(paths.list_images(args.image_set))
print(f"There are {len(imgPaths)} images in the folder {args.image_set}.")

There are 236 images in the folder image_set.


### Initialization 

In [5]:
knownEncodings = []
knownArtists = []
start = datetime.now()
end = datetime.now()

# Important Assumption: There is only one face in each photo during the encoding process!!!

### Steps of Face Encoding Generation
#### 1. Get the artist name
#### 2. Read the image, convert the image from BGR to RGB
#### 3. Locate the face bounding box 
#### 4. Generating Face Encoding (Based on image and bounding box)
#### 5. Building a dictionary of Encodings
#### 6. Dump the dictionary to file

In [6]:
# Loop through all images in dataset folder
for (i, imgPath) in enumerate(imgPaths):
    name = imgPath.split('/')[1]     # Find the name of the artist, imgPath is sth like 'dataset/Josh/File14.png'
    # Show the process of encoding generation
    print("processing {n} {image}: {a:5d} of{b:5d}, percentage: {c: 5.2f}%, seconds spend:{t:5.2f}, total minutes spend:{tt:5.2f} "
          .format(n=name, image = imgPath.split('/')[-1], a = i + 1, b = len(imgPaths), c = (i + 1) * 100/len(imgPaths), 
          t = (datetime.now()-start).total_seconds(),tt = (datetime.now()-end).total_seconds()/60))
    start = datetime.now()
    
    
    img = cv2.imread(imgPath)    # cv2 read the image
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  #cv2 uses BGR, need convert to RGB, which is used by dlib
    
    # bounding box of face, model can also be HOG, which is faster, however, cnn is more accurate
    boxes = face_recognition.face_locations(img_rgb, model='cnn')
    
    # If there is more than one face (box) appears in the image, usually there are not the same person, 
    # this situation should be avoid
    if len(boxes) != 1:
        print(name, imgPath, len(boxes))
        break           
    else:     
        # face encoding based on the image and bounding box(face)
        encodings = face_recognition.face_encodings(img_rgb, boxes) 
    
        knownEncodings.append(encodings)
        knownArtists.append(name)

        for box in boxes: # should be only 1 box
            top,right,bottom,left = box
            cv2.rectangle(img,(left,top),(right,bottom),(0,255,0),2)
        cv2.imshow('img',img)    
        cv2.waitKey(1)
       

processing Hina File19.png:     1 of  236, percentage:  0.42%, seconds spend: 0.92, total minutes spend: 0.02 
processing Hina File3.png:     2 of  236, percentage:  0.85%, seconds spend: 3.51, total minutes spend: 0.07 
processing Hina File2.png:     3 of  236, percentage:  1.27%, seconds spend: 2.70, total minutes spend: 0.12 
processing Hina File18.png:     4 of  236, percentage:  1.69%, seconds spend: 3.84, total minutes spend: 0.18 
processing Hina File1.png:     5 of  236, percentage:  2.12%, seconds spend: 2.75, total minutes spend: 0.23 
processing Hina File5.png:     6 of  236, percentage:  2.54%, seconds spend: 3.38, total minutes spend: 0.29 
processing Hina File4.png:     7 of  236, percentage:  2.97%, seconds spend: 2.75, total minutes spend: 0.33 
processing Hina File20.png:     8 of  236, percentage:  3.39%, seconds spend: 3.33, total minutes spend: 0.39 
processing Hina File6.png:     9 of  236, percentage:  3.81%, seconds spend: 2.45, total minutes spend: 0.43 
process

processing Heyoon File13.png:    74 of  236, percentage:  31.36%, seconds spend: 3.14, total minutes spend: 3.86 
processing Heyoon File9.png:    75 of  236, percentage:  31.78%, seconds spend: 2.55, total minutes spend: 3.90 
processing Heyoon File8.png:    76 of  236, percentage:  32.20%, seconds spend: 1.64, total minutes spend: 3.93 
processing Heyoon File12.png:    77 of  236, percentage:  32.63%, seconds spend: 2.52, total minutes spend: 3.97 
processing Heyoon File16.png:    78 of  236, percentage:  33.05%, seconds spend: 1.94, total minutes spend: 4.00 
processing Heyoon File17.png:    79 of  236, percentage:  33.47%, seconds spend: 1.99, total minutes spend: 4.03 
processing Heyoon File15.png:    80 of  236, percentage:  33.90%, seconds spend: 1.81, total minutes spend: 4.06 
processing Heyoon File14.png:    81 of  236, percentage:  34.32%, seconds spend: 1.80, total minutes spend: 4.09 
processing Sofya File19.png:    82 of  236, percentage:  34.75%, seconds spend: 2.80, tota

processing Any File5.png:   147 of  236, percentage:  62.29%, seconds spend: 3.38, total minutes spend: 8.76 
processing Any File4.png:   148 of  236, percentage:  62.71%, seconds spend: 2.55, total minutes spend: 8.81 
processing Any File20.png:   149 of  236, percentage:  63.14%, seconds spend: 3.38, total minutes spend: 8.86 
processing Any File6.png:   150 of  236, percentage:  63.56%, seconds spend: 1.73, total minutes spend: 8.89 
processing Any File7.png:   151 of  236, percentage:  63.98%, seconds spend: 2.70, total minutes spend: 8.94 
processing Any File10.png:   152 of  236, percentage:  64.41%, seconds spend: 2.76, total minutes spend: 8.98 
processing Any File11.png:   153 of  236, percentage:  64.83%, seconds spend: 2.67, total minutes spend: 9.03 
processing Any File13.png:   154 of  236, percentage:  65.25%, seconds spend: 1.44, total minutes spend: 9.05 
processing Any File9.png:   155 of  236, percentage:  65.68%, seconds spend: 2.72, total minutes spend: 9.10 
proces

processing Josh File2.png:   221 of  236, percentage:  93.64%, seconds spend: 2.49, total minutes spend:12.05 
processing Josh File1.png:   222 of  236, percentage:  94.07%, seconds spend: 3.48, total minutes spend:12.11 
processing Josh File5.png:   223 of  236, percentage:  94.49%, seconds spend: 3.16, total minutes spend:12.16 
processing Josh File4.png:   224 of  236, percentage:  94.92%, seconds spend: 2.69, total minutes spend:12.21 
processing Josh File6.png:   225 of  236, percentage:  95.34%, seconds spend: 2.61, total minutes spend:12.25 
processing Josh File7.png:   226 of  236, percentage:  95.76%, seconds spend: 2.87, total minutes spend:12.30 
processing Josh File10.png:   227 of  236, percentage:  96.19%, seconds spend: 2.93, total minutes spend:12.35 
processing Josh File11.png:   228 of  236, percentage:  96.61%, seconds spend: 1.74, total minutes spend:12.38 
processing Josh File13.png:   229 of  236, percentage:  97.03%, seconds spend: 2.14, total minutes spend:12.41

In [8]:
# Check number of image equal to number of face encodings
len(knownEncodings) == len(imgPaths)

True

In [9]:
# Build the dictionary of face encodings
Encodings_Dictionary = {"encodings":knownEncodings, "artists":name}

In [10]:
# Open file for dump encodings
file = open(args.encoding, 'wb')
file.write(pickle.dumps(Encodings_Dictionary))
file.close()