In [None]:
import os
import numpy as np
import pandas as pd
import random

from PIL import Image, ImageOps, ImageFilter, ImageEnhance
from sklearn.metrics.pairwise import cosine_similarity

import cv2
import dlib

  from .autonotebook import tqdm as notebook_tqdm





In [6]:
DIR = "Cropped_50"

shape_predictor_path = "shape_predictor_68_face_landmarks.dat"
face_rec_model_path = "dlib_face_recognition_resnet_model_v1.dat"

detector = dlib.get_frontal_face_detector()
sp = dlib.shape_predictor(shape_predictor_path)
facerec = dlib.face_recognition_model_v1(face_rec_model_path)

label_ids = {}
current_id = 0
embeddings_list = []

for person_folder in os.listdir(DIR):
    person_path = os.path.join(DIR, person_folder)
    print(person_path)
    if os.path.isdir(person_path) and person_folder != 'default':
        for image_file in os.listdir(person_path):
            img_path = os.path.join(person_path, image_file)
            #print(img_path)
            img_label = os.path.basename(person_path)

            if img_label not in label_ids:
                label_ids[img_label] = current_id
                current_id += 1
            id_ = label_ids[img_label]

            img = Image.open(img_path).convert("RGB")
            img_cv = np.array(img)

            dets = detector(img_cv, 1)
            
            for k, d in enumerate(dets):
                shape = sp(img_cv, d)
                
                embedding = facerec.compute_face_descriptor(img_cv, shape)
                embedding = np.array(embedding)

                embeddings_list.append([id_, img_label, embedding])

df = pd.DataFrame(embeddings_list, columns=['id', 'label', 'embedding'])

df.to_pickle("embeddings_C_54.pkl")

print(df.head())


Cropped_50\Akif
Cropped_50\Alper
Cropped_50\Bart
Cropped_50\Daiane
Cropped_50\Florian
Cropped_50\Konrad
Cropped_50\Lasse
Cropped_50\Matthias
Cropped_50\Michiel
Cropped_50\Nelli
Cropped_50\Raul
Cropped_50\Senne
Cropped_50\Seppe
Cropped_50\Youssef
   id label                                          embedding
0   0  Akif  [-0.07321271300315857, -0.015161692164838314, ...
1   0  Akif  [-0.04495805874466896, -0.03904763609170914, 0...
2   0  Akif  [-0.06774255633354187, 0.03536646068096161, 0....
3   0  Akif  [-0.06966079771518707, 0.012463579885661602, 0...
4   0  Akif  [-0.08187726140022278, 0.07773150503635406, 0....


In [None]:
def evaluate_embeddings(df):
    intra_similarities = []
    inter_similarities = []

    for label in df['label'].unique():
        embeddings = df[df['label'] == label]['embedding'].tolist()

        for i in range(len(embeddings)):
            for j in range(i+1, len(embeddings)):
                similarity = cosine_similarity([embeddings[i]], [embeddings[j]])[0][0]
                intra_similarities.append(similarity)

        for other_label in df['label'].unique():
            if other_label != label:
                other_embeddings = df[df['label'] == other_label]['embedding'].tolist()
                for e1 in embeddings:
                    for e2 in other_embeddings:
                        similarity = cosine_similarity([e1], [e2])[0][0]
                        inter_similarities.append(similarity)

    return np.mean(intra_similarities), np.mean(inter_similarities)

intra_similarity, inter_similarity = evaluate_embeddings(df)

print(f"Average Intra-person Similarity: {intra_similarity}")
print(f"Average Inter-person Similarity: {inter_similarity}")

Average Intra-person Similarity: 0.9533899585932866
Average Inter-person Similarity: 0.8528155944917816
