In [30]:
import os
import pandas as pd
import numpy as np
import random

In [31]:
PROJECT_PATH = os.path.abspath('../..')

In [32]:
os.chdir(PROJECT_PATH)

In [33]:
# create dictionary of embeddings dataframes
os.chdir('embedded_face_dataset')
files = os.listdir()
faces = {}
for file in files:
    faces[file.replace('.csv','')] = pd.read_csv(file)
os.chdir(PROJECT_PATH)

In [34]:
# create list of tuples of embeddings of two different people
all_embed = []
all_embed_shuffle = []
all_faces = list(faces.keys())
for face_name, face in faces.items():
    # set randomly second face's embeddings
    second_face_name = random.choice(all_faces)
    while second_face_name == face_name:
        second_face_name = random.choice(all_faces)
    second_face = faces[second_face_name]
    
    all_embed_res = []
    cols1 = face.columns
    cols2 = second_face.columns
    # add to list tuples (name, data) of all embedded pictures in person embedding
    for col in cols1:
        all_embed_res.append(tuple((face_name, face[col].tolist())))
    for col in cols2:
        all_embed_res.append(tuple((second_face_name, second_face[col].tolist())))
    # add list of tuples to whole list
    all_embed.append(all_embed_res)
    # add to shuffled list shuffled list of tuples
    all_embed_shuffle.append(random.sample(all_embed_res, len(all_embed_res)))

# 

In [42]:
# in this scope we will create pairs of images from pairs of people
# we will create pairs of embeddings and shuffled embeddings to get situations 
# when we have the same person (1) and when we have different person on both images (0)

tuples_array = []
outputs_array = []
for i in range(len(all_embed)):
    for j in range(len(all_embed[i])):
        arr = tuple((all_embed[i][j], all_embed_shuffle[i][j]))
        d = np.array(arr[0][1]) - np.array(arr[1][1])
        tuples_array.append(d)
        if arr[0][0] == arr[1][0]:
            outputs_array.append(1)
        else:
            outputs_array.append(0)

In [43]:
from sklearn.model_selection import train_test_split

In [44]:
X_train, X_test, y_train, y_test = train_test_split(tuples_array, outputs_array, test_size = 0.3,
                                                   random_state = 42)

In [45]:
from sklearn import svm
from sklearn.metrics import accuracy_score

In [46]:
clf = svm.SVC(gamma = "scale")
clf.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [47]:
y_pred = clf.predict(X_test)

In [49]:
score = accuracy_score(y_test, y_pred)
print("Test set efficienty : {}%".format(score*100))

Test set efficienty : 95.90288315629742%


In [20]:
import pickle

In [21]:
model_filename = "face_comparison_svm.sav"
pickle.dump(clf, open(model_filename, "wb"))