# FaceGuard Classification  

The following experiments make use of a faceembedding model to generate faceembeddings for over 150 persons and compare the classification speed of a loop comparison approach, the annoy nearest neightbor prediction as well as a support vector machine. 

## 0. Initialization

In [18]:
import torch 
import os 
import numpy as np 
from PIL import Image 

## 0.1 Facedetection Model for facecropping and bounding box prediction

In [9]:
from facenet_pytorch import MTCNN

class FaceDetection:  
	
	mtcnn = MTCNN(image_size=160,margin=0) 
	
	def __init__(self): 
		pass 
	
	def cropface(self, img): 
		return self.mtcnn(img)   

In [10]:
facedetection_model = FaceDetection() 

## 0.2 FaceEmbedding model 
We use a pretrained InceptionResnetV1 for these experiments. You can freely choose to include your own trained model here or use one of ours. 

In [11]:
from facenet_pytorch import InceptionResnetV1
from torchvision import transforms
import numpy as np 
import torchvision.models as models   
import torch 
import torch.nn as nn

class EmbeddingModel:

    def __init__(self): 
        self.model = InceptionResnetV1(pretrained='vggface2').eval().cuda()
        #self.model = models.resnet34(pretrained=False) 
        #self.model = models.resnet34(pretrained=False)
        #self.model.fc = nn.Linear(512, 512) 

        #self.model.load_state_dict(torch.load('./resnet-34-no-augmentation-tripletloss-new-split-run-1-wmeanandstd-batchsize-8-outsize-512.pth'))
        
        #self.model.eval()  
        #self.model.cuda()

    def generateEmbedding(self, img):
        #img_embedding = self.resnet(img.unsqueeze(0)) 
        img_embedding = self.model.forward(img.unsqueeze(0))  

        return img_embedding
    

In [13]:
embedding_model = EmbeddingModel()

## 1. Test setup

We load the first image from our training dataset into an array and already create the embeeddings 

In [14]:
input_dir = '../faceembedding/lfw_cropped'  
testsize = 150

In [19]:
names = os.listdir(input_dir)[:testsize] 

train_images = [] 
for name in names:
    for file in os.listdir(input_dir + "/" + name)[:1]:
        train_images.append(input_dir+"/"+name+"/"+file) 

embeddings = [] 
for trainimage in train_images: 
    image = Image.open(trainimage)  
    image = transforms.ToTensor()(image)
    embedding = embedding_model.generateEmbedding(image.cuda())   
    embeddings.append(embedding.cpu())  
    del embedding

We choose one image from our training data that is unseen and generate an embedding vector. This is an example for a newly taken image that is used for prediction

In [39]:
print(names[0])
imgname = os.listdir(input_dir + "/" + names[0])[1] 
image = Image.open(input_dir + "/" + names[0]+"/"+imgname)   
image = transforms.ToTensor()(image)
embeddingtest = embedding_model.generateEmbedding(image.cuda())  

Astrid_Eyzaguirre


## 1.1. Memory Database approach

In [20]:
import pickle

class Database:

    def __init__(self):
        self.database={}
        pass

    def __len__(self):
        return len(list(self.database.keys()))

    def exists(self, name):
        return name in self.database

    def add_to_db(self, name, embedding):
        if self.exists(name):
            self.database[name].append(embedding)
        else:
            self.database[name] = [embedding]

    def delete_from_db(self, name):
        if self.exists(name):
            self.database[name] = None
        else:
            raise Exception('Name does not exist in database')

    def clear_database(self):
        self.database = {}

    def to_file(self, name):
        with open(name + '.pkl', 'wb') as f:
            pickle.dump(self.database, f, pickle.HIGHEST_PROTOCOL)

    def load_from_file(self, name):
        with open(name+'.pkl', 'rb') as f:
            self.database = pickle.load(f)

In [21]:
db = Database()  
db.clear_database()

Manually index all the images in the memory database

In [53]:
dbstart = time.time()
for idx in range(len(train_images)):
    db.add_to_db(train_images[idx], embeddings[idx])   
dbend = time.time() 
dbtrain = dbend-dbstart 
print(f"DBtrain: {dbtrain}s")

DBtrain: 0.00026154518127441406s


In the following we use a function to calculate the l2 distance between a database image and the new incoming image

In [None]:
def compare_face(img_embedding, db, threshold, debug = False): 
    curr_min = 9999  
    curr_face = 'unknown' 
    
    for face in db:  
        for embedding in db[face]:
            #L2 distance 
            dist = np.linalg.norm(np.subtract(embedding.cpu().detach().numpy(), img_embedding.cpu().detach().numpy()))  
            if debug: 
                print("Dist for " + face + " > "+ str(dist))
            if(dist < curr_min):  
                curr_min = dist 
                curr_face = face 
    
    if curr_min*100 > threshold: 
        return curr_min, 'unknown'
    else:  
        return curr_min, curr_face

In [38]:
import time
start = time.time()
print(compare_face(embeddingtest, db.database, 420, False))
end = time.time()
seconds = end-start
print(f"{seconds} s")

(0.4264202, '../ML_Training_Janine/dataset/dataset/lfw/cropped_split/train/Astrid_Eyzaguirre/augmented_dark_Astrid_Eyzaguirre_0001.jpg')
0.010991334915161133 s


## 1.2. Annoy

In [None]:
from annoy import AnnoyIndex  
import operator

# Train annoy 
class Annoy: 
    
    def __init__(self, vectorsize, name): 
        self.vectorsize = vectorsize  
        self.name = name 
        self.mapping = {} 
        
    def train(self, items, num_trees): 
        self.model = AnnoyIndex(self.vectorsize, 'angular') 
        for item in items: 
            name, vector = item  
            
            if name not in self.mapping:  
                if(len(self.mapping.items()) > 0):
                    highest_idx = max(self.mapping.items(), key=operator.itemgetter(1))[1]
                else: 
                    highest_idx = 0
                #print(f'Idx for {name}: {highest_idx}') 
                self.mapping[name] = highest_idx+1
                
            self.model.add_item(self.mapping[name], vector.detach().numpy().squeeze())  
        
        self.model.build(num_trees) 
        self.model.save(f'{self.name}.ann') 
    
    def load(self): 
        self.model = AnnoyIndex(self.vectorsize, 'angular') 
        self.model.load(f'{self.name}.ann')  
        
    def predict(self): 
        pass 
        

In [29]:
annoy = Annoy(512, 'performancetest')

In [51]:
start = time.time()
annoy_train = [] 
for idx in range(len(train_images)):
    annoy_train.append((idx, embeddings[idx].cpu())) 
annoy.train(annoy_train, 10) 
end = time.time()
seconds_train_annoy = end-start
print(f"{seconds_train_annoy} s train annoy")

0.016185998916625977 s train annoy


In [32]:
start = time.time()
prediction = annoy.model.get_nns_by_vector(embeddingtest.cpu().detach().numpy().squeeze(), 1, search_k=-1, include_distances=True)  
print(prediction)
end = time.time()
seconds = end-start
print(f"{seconds} s")

([1], [0.4264199733734131])
0.0006687641143798828 s


# 1.3 SVM 

In [43]:
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
import numpy as np

class PredictionSVM:
    X = []
    Y = []
    Y_encoded = []

    label_encoder = LabelEncoder()
    in_encoder = Normalizer(norm='l2')

    model = SVC(kernel='linear', probability=True)

    def __init__(self):
        pass

    def train(self, database):
        for face in database:
            for face_encode in database[face]:
                self.X.append(self.in_encoder.transform(face_encode.detach().numpy()).flatten())
                self.Y.append(face)

        self.label_encoder.fit(self.Y)
        self.Y_encoded = self.label_encoder.transform(self.Y)

        self.model.fit(self.X, self.Y_encoded)
        print("Model Trained")

    def predict(self, embedding):
        embedding_numpy = embedding.detach().numpy()
        faceclass = self.model.predict(embedding_numpy)
        faceprobability = self.model.predict_proba(embedding_numpy)

        class_index = faceclass[0]
        class_label = self.label_encoder.inverse_transform(faceclass)[0]
        
        # TODO: Setup probabilities
        print(faceprobability[0][0]*100)
        return class_label

In [50]:
start = time.time()  
prediction_model = PredictionSVM() 
prediction_model.train(db.database)
end = time.time()  
seconds_svm_train = end-start
print(f"{seconds_svm_train} s")

Model Trained
0.9798634052276611 s


In [45]:
def prediction(embedding): 
    label = prediction_model.predict(embedding)

    # Duplex prediction method 
    minl2distance = 9999 
    for face in db.database[label]: 
        l2distance = embedding_model.l2(face, embedding)  
        if l2distance < minl2distance:
            minl2distance = l2distance 

    if minl2distance * 100 > 95:
        label = "unknown" 
    
    return label

In [49]:
start = time.time() 
print(prediction(embeddingtest.cpu()))
end = time.time()  
seconds_svm = end-start
print(f"{seconds_svm} s")

0.7313113038167364
../ML_Training_Janine/dataset/dataset/lfw/cropped_split/train/Astrid_Eyzaguirre/augmented_dark_Astrid_Eyzaguirre_0001.jpg
0.003383636474609375 s
