In [4]:
!pip install deepface

Collecting deepface
  Downloading deepface-0.0.93-py3-none-any.whl.metadata (30 kB)
Collecting flask-cors>=4.0.1 (from deepface)
  Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting mtcnn>=0.1.0 (from deepface)
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting retina-face>=0.0.1 (from deepface)
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting fire>=0.4.0 (from deepface)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gunicorn>=20.1.0 (from deepface)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting lz4>=4.3.3 (from mtcnn>=0.1.0->deepface)
  Downloading lz4-4.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Downloading deepface-0.0.93-py3-none-any.whl (108 kB)
[2K   [90m━

In [5]:
import os
import pickle
import numpy as np
import pandas as pd
from deepface import DeepFace
from scipy.spatial.distance import cosine
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

25-02-21 07:23:30 - Directory /root/.deepface has been created
25-02-21 07:23:30 - Directory /root/.deepface/weights has been created


Extract embedding


In [None]:


def extract_embeddings(dataset_path, output_file, model_name="Facenet512"):
    """Extracts embeddings and saves them in a pickle file."""
    embeddings_dict = {}

    for person in os.listdir(dataset_path):
        person_dir = os.path.join(dataset_path, person)
        if not os.path.isdir(person_dir):
            continue  # Skip non-directory files

        embeddings_dict[person] = {}
        for image_name in os.listdir(person_dir):
            image_path = os.path.join(person_dir, image_name)
            try:
                embedding = DeepFace.represent(image_path, model_name=model_name, enforce_detection=False)[0]["embedding"]
                embeddings_dict[person][image_name] = np.array(embedding)
                print(f"✔️ Extracted embedding for: {image_name}")
            except Exception as e:
                print(f"❌ Error processing {image_path}: {e}")
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    with open(output_file, "wb") as f:
        pickle.dump(embeddings_dict, f)
    print(f"✅ Embeddings saved to {output_file}")

    return embeddings_dict




Load embedding

In [2]:
def load_embeddings(file_path):
    """Loads the precomputed embeddings from a file."""
    with open(file_path, "rb") as f:
        embeddings = pickle.load(f)
    print(f"✅ Loaded embeddings from {file_path}")
    return embeddings

def verify_faces_with_embeddings(embeddings, person1, img1, person2, img2, threshold=0.4):
    """Compares precomputed embeddings for face verification."""
    if person1 not in embeddings or person2 not in embeddings:
        print(f"Error: Person not found in embeddings.")
        return False, None

    if img1 not in embeddings[person1] or img2 not in embeddings[person2]:
        print(f"Error: Image not found in embeddings.")
        return False, None

    emb1 = embeddings[person1][img1]
    emb2 = embeddings[person2][img2]

    distance = cosine(emb1, emb2)  # Cosine similarity
    verified = distance <= threshold

    return verified, distance


evaluate performance

In [32]:
def evaluate_performance(train_embeddings, test_embeddings, threshold, model,results_path):
    """Evaluates face verification using precomputed embeddings."""
    results = []
    results_dir = f"{results_path}/results_{threshold}"
    os.makedirs(results_dir, exist_ok=True)

    for test_person, test_images in test_embeddings.items():
        print(f"Processing test person: {test_person}")

        for train_person, train_images in train_embeddings.items():
            if test_person == train_person:
                for test_img, test_emb in test_images.items():
                    for train_img, train_emb in train_images.items():
                        distance = cosine(test_emb, train_emb)
                        verified = distance <= threshold

                        results.append({
                            'train': train_img,
                            'test': test_img,
                            'verified': verified,
                            'y_true': 1,  # Since it's the same person
                            'y_pred': 1 if verified else 0,
                            'distance': distance,
                            'model': model
                        })

        # Save per-person results
        df_person = pd.DataFrame(results)
        #df_person.to_csv(os.path.join(results_dir, f"results_{test_person}_{threshold}_{model}.csv"), index=False)

    # Save overall results
    df_results = pd.DataFrame(results)
    df_results.to_csv(os.path.join(results_dir, f"results_{threshold}_{model}.csv"), index=False)

    return df_results


calcolate metrics

In [33]:
def calculate_metrics(results_df, model_name, threshold,results_path='/content/drive/MyDrive/dataset_colab/output'):
    """Calculates and exports classification metrics."""
    y_true = results_df['y_true']
    y_pred = results_df['y_pred']

    cm = confusion_matrix(y_true, y_pred)
    TP = cm[1, 1]
    TN = cm[0, 0]
    FP = cm[0, 1]
    FN = cm[1, 0]
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    metrics_df = pd.DataFrame({
        'Model': [model_name],
        'TP': [TP],
        'TN': [TN],
        'FP': [FP],
        'FN': [FN],
        'Accuracy': [accuracy],
        'Precision': [precision],
        'Recall': [recall],
        'F1-Score': [f1]
    })

    eval_dir = f'{results_path}/results_{threshold}/eval'
    os.makedirs(eval_dir, exist_ok=True)
    metrics_df.to_csv(f'{eval_dir}/evalmetrics_{model_name}_{threshold}.csv', index=False)

    return metrics_df


Run extract embedding




In [None]:
# Define dataset paths
pathd = '/content/drive/MyDrive/dataset_colab/'
path_embedding=pathd+'embedding/'
train_dir = os.path.join(pathd, 'mtcnn_temp/sampleset')
val_dir = os.path.join(pathd, 'mtcnn_temp/validationset')
test_dir = os.path.join(pathd, 'mtcnn_temp/testingset')
model_name = "Facenet512"
# Extract embeddings
extract_embeddings(train_dir, path_embedding+"train_embeddings.pkl", model_name=model_name)
extract_embeddings(test_dir,path_embedding+"test_embeddings.pkl", model_name=model_name)





✔️ Extracted embedding for: Alan_Alda_2168_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2349_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2196_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2177_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2316_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2217_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2277_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2258_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2248_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2205_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2273_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2259_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2247_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2507_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2441_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2409_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2267_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2173_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2242_0.jpeg
✔️ Extracted embedding for: Alan_Alda_2297_0.jpeg


{'Alan_Alda': {'Alan_Alda_2180_0.jpeg': array([ 2.31030762e-01,  3.92674863e-01, -1.22552609e+00, -1.31604588e+00,
         -1.20300901e+00,  7.26616740e-01, -4.24310684e-01,  1.27393365e+00,
         -7.73765504e-01,  8.54889691e-01, -4.09661382e-01,  2.45340014e+00,
         -2.66306698e-01,  2.39265114e-01, -7.07193017e-01, -2.39647895e-01,
         -4.02441889e-01, -1.55171442e+00, -1.77818257e-02, -2.83550191e+00,
         -6.46012604e-01,  6.14785329e-02, -6.35969639e-01,  7.09181279e-02,
          4.79089081e-01, -1.26669717e+00, -3.80903423e-01, -3.07851374e-01,
         -2.35252678e-01, -1.11877072e+00, -9.84367430e-01, -2.43120790e+00,
          1.14990914e+00,  6.40777886e-01, -1.63115597e+00,  7.21602440e-01,
          2.49433070e-01,  1.06214929e+00,  3.06278914e-01, -9.57878888e-01,
          2.70492822e-01,  2.38465399e-01, -4.94711876e-01,  5.63156009e-01,
          2.34679982e-01,  9.12210941e-01, -1.72961557e+00, -9.92024183e-01,
         -4.55158651e-02, -2.74574906e

Calculate cosine for each model from embedding

In [39]:
# Define models and thresholds
models = ["Facenet512","Facenet","VGG-Face"]
threshold = 0.5

pathd = '/content/drive/MyDrive/dataset_colab/'
path_embedding=pathd+'embedding/'
for model in models:
    print(f"Evaluating model: {model}")
    # Load precomputed embeddings
    train_embeddings = load_embeddings(path_embedding+f"embedding_{model}_train_embeddings.pkl")
    test_embeddings = load_embeddings(path_embedding+f"embedding_{model}_test_embeddings.pkl")
    results = evaluate_performance(train_embeddings, test_embeddings, threshold, model,results_path=pathd+"output")
    metrics_df = calculate_metrics(results, model, threshold,results_path=pathd+"output")
    print(metrics_df)


Evaluating model: Facenet512
✅ Loaded embeddings from /content/drive/MyDrive/dataset_colab/embedding/embedding_Facenet512_train_embeddings.pkl
✅ Loaded embeddings from /content/drive/MyDrive/dataset_colab/embedding/embedding_Facenet512_test_embeddings.pkl
Processing test person: Aaron_Eckhart
Processing test person: Adam_Brody
Processing test person: Adrienne_Barbeau
Processing test person: Al_Pacino
Processing test person: Alexander_Skarsgård
Processing test person: Andrea_Bogart
Processing test person: Andy_Garcia
Processing test person: Angie_Harmon
Processing test person: Anne_Hathaway
Processing test person: Annie_Ilonzeh
Processing test person: Anthony_Hopkins
Processing test person: Antonio_Banderas
Processing test person: Ashton_Kutcher
Processing test person: Audrey_Landers
Processing test person: Ben_Kingsley
Processing test person: Ben_McKenzie
Processing test person: Ben_Stiller
Processing test person: Bernie_Mac
Processing test person: Billy_Zane
Processing test person: B

Combine result

In [40]:
def combine_results(dir_path):
    """Combines results from CSV files within a directory."""
    all_results = []
    for file_name in os.listdir(dir_path):
        file_path = os.path.join(dir_path, file_name)  # Form the complete file path
        if os.path.isfile(file_path) and file_name.endswith(".csv"):  # Check if it's a file and a CSV
            try:
                df = pd.read_csv(file_path)
                all_results.append(df)
            except FileNotFoundError:
                print(f"Warning: File not found - {file_path}")
            except pd.errors.EmptyDataError:
                print(f"Warning: Empty file - {file_path}")
    combined_df = pd.concat(all_results, ignore_index=True)
    return combined_df

Evaluate Result Threshold =0.4

In [43]:
threshold=0.4
eval_result=combine_results(dir_path=pathd+"output"f'/results_{threshold}/eval')
eval_result


Unnamed: 0,Model,TP,TN,FP,FN,Accuracy,Precision,Recall,F1-Score
0,Facenet512,11881,0,0,7709,0.606483,1.0,0.606483,0.755044
1,Facenet,11910,0,0,7680,0.607963,1.0,0.607963,0.75619
2,VGG-Face,3229,0,0,16361,0.164829,1.0,0.164829,0.28301


In [44]:
all_result=combine_results(dir_path=pathd+"output"f'/results_{threshold}')

all_result.head()


Unnamed: 0,train,test,verified,y_true,y_pred,distance,model
0,Aaron_Eckhart_11_11.jpeg,Aaron_Eckhart_108_84.jpeg,True,1,1,0.245835,Facenet512
1,Aaron_Eckhart_149_109.jpeg,Aaron_Eckhart_108_84.jpeg,False,1,0,0.456095,Facenet512
2,Aaron_Eckhart_187_132.jpeg,Aaron_Eckhart_108_84.jpeg,True,1,1,0.241423,Facenet512
3,Aaron_Eckhart_188_133.jpeg,Aaron_Eckhart_108_84.jpeg,False,1,0,0.44215,Facenet512
4,Aaron_Eckhart_196_140.jpeg,Aaron_Eckhart_108_84.jpeg,True,1,1,0.295349,Facenet512


In [38]:
# prompt: convert all_result  and eval_result to xlsx

# Convert DataFrames to xlsx files
all_result.to_excel(f'/content/drive/MyDrive/dataset_colab/output/all_results_{threshold}.xlsx', index=False)
eval_result.to_excel(f'/content/drive/MyDrive/dataset_colab/output/eval_results_{threshold}.xlsx', index=False)


Evaluation Threshold 0.5

In [54]:
threshold=0.5
eval_result=combine_results(dir_path=pathd+"output"f'/results_{threshold}/eval')
eval_result

Unnamed: 0,Model,TP,TN,FP,FN,Accuracy,Precision,Recall,F1-Score
0,Facenet512,15414,0,0,4176,0.78683,1.0,0.78683,0.880699
1,Facenet,15208,0,0,4382,0.776314,1.0,0.776314,0.874073
2,VGG-Face,6657,0,0,12933,0.339816,1.0,0.339816,0.507258


In [57]:
all_result=combine_results(dir_path=pathd+"output"f'/results_{threshold}')

all_result.head()


Unnamed: 0,train,test,verified,y_true,y_pred,distance,model
0,Aaron_Eckhart_11_11.jpeg,Aaron_Eckhart_108_84.jpeg,True,1,1,0.245835,Facenet512
1,Aaron_Eckhart_149_109.jpeg,Aaron_Eckhart_108_84.jpeg,True,1,1,0.456095,Facenet512
2,Aaron_Eckhart_187_132.jpeg,Aaron_Eckhart_108_84.jpeg,True,1,1,0.241423,Facenet512
3,Aaron_Eckhart_188_133.jpeg,Aaron_Eckhart_108_84.jpeg,True,1,1,0.44215,Facenet512
4,Aaron_Eckhart_196_140.jpeg,Aaron_Eckhart_108_84.jpeg,True,1,1,0.295349,Facenet512
