# Performance Comparison of Face Recognition Models (Assignment 3)

Nama:Najwan Yusnianda
NIM: 2408207010029

## 1. Prepare Dataset

### 1.1 Load Library

In [8]:
import os
import random
import shutil
import numpy as np
import pandas as pd
import requests
import cv2
import tensorflow as tf
from mtcnn import MTCNN
from imutils import paths
from tqdm import tqdm
from pathlib import Path
from deepface import DeepFace
from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score
)

### 1.2 Download Dataset

In [9]:
## import metadata
facescrub_df_actor = pd.read_csv('faceScrub/facescrub_actors.txt',delimiter='\t',header=None)
facescrub_df_actress = pd.read_csv('faceScrub/facescrub_actresses.txt',delimiter='\t',header=None)

#combine dataframe
facescrub_df=pd.concat([facescrub_df_actor,facescrub_df_actress],axis=0)

print(f"number of row (images) : {len(facescrub_df)}")

number of row (images) : 106865


In [10]:
# Group by the first column (name) and count the occurrences
name_count_df = facescrub_df.groupby(facescrub_df.columns[0]).size().reset_index(name='count')

# Rename the columns for clarity
name_count_df.columns = ['name', 'count']
name_count_df.head()
# Display the new dataframe
#name_count_df=name_count_df.sort_values(by='count', ascending=False)
print(f"number of unique faces (people): {len(name_count_df)}")
print(name_count_df.head())


number of unique faces (people): 531
            name  count
0  Aaron Eckhart    231
1     Adam Brody    200
2     Adam McKay    108
3   Adam Sandler    208
4  Adrianne León     62


In [11]:
def download_and_detect_faces(url, filename):
    detector = MTCNN()
    
    try:

        response = requests.get(url, timeout=10)
        response.raise_for_status()
        
        # Convert to numpy array 
        image_array = np.asarray(bytearray(response.content), dtype=np.uint8)
        image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
    
        #check if image
        if image is None:
            return 0

        # Detect faces in the image
        result = detector.detect_faces(image)
        if not len(result) ==1:
            #print(f"no face detected in {filename} number of face detected: {len(result)}")
            return 0
            
        # Only save the image if faces detected
        cv2.imwrite(filename, image)
        return 1
        
    except (requests.exceptions.RequestException, cv2.error) as e:
        #remove image if not contain face
        if os.path.exists(filename):
            try:
                os.remove(filename)
            except OSError:
                pass
        return 0
    except Exception as e:
        # Catch any other unexpected errors
        if os.path.exists(filename):
            try:
                os.remove(filename)
            except OSError:
                pass
        return 0
    

    

def get_image_sample(num_person,ntrain_person,ntest_person,nval_person,facescrub_df):
    current_num_person = 0

    list_person = []

    while current_num_person<num_person:
        current_ntrain_person = 0
        current_ntest_person = 0
        current_nval_person = 0
        ##generate angka random sebagai index dari dataset
        random_num = np.random.randint(0, facescrub_df.shape[0])
        ##ambil nama orang dari dataset
        
        current_person=facescrub_df.iloc[random_num,0]
        
        if current_person in list_person:
            continue
        else:
        ##buat dataset dengan data orang tersebut
            df_persons=facescrub_df[facescrub_df[0]==current_person]
            
            train_path = 'dataset/stagging/sampleset/'+current_person
            val_path = 'dataset/stagging/validationset/'+current_person
            test_path = 'dataset/stagging/testingset/'+current_person
            ##buat folder untuk orang tersebut
            if not os.path.exists(train_path):
                
                os.makedirs(train_path)
            if not os.path.exists(test_path):
                os.makedirs(test_path)

            len_train = print(f"dwonload image person={current_person} num_image={df_persons.shape[0]}")
            
            
            list_index_person=[]
            while current_ntrain_person<ntrain_person:
                    #get random index
                i = np.random.randint(0, df_persons.shape[0])
                if i in list_index_person:
                    continue
                else:
                    list_index_person.append(i)
                    ##ambil url gambar
                    url = df_persons.iloc[i,3]
                    ##download gambar
                    isdownload=download_and_detect_faces(url, train_path+'/'+current_person+'_'+str(current_ntrain_person+1)+'.jpg')
                    if isdownload==1:
                        current_ntrain_person+=1
                        if current_ntrain_person % 10 == 0:
                            print(f"sample_person set image added for {current_person}: {current_ntrain_person}")
            while current_nval_person<nval_person:
                i = np.random.randint(0, df_persons.shape[0])
                if i in list_index_person:
                    continue
                else:
                    list_index_person.append(i)    
                     ##ambil url gambar
                    url = df_persons.iloc[i,3]
                    ##download gambar
                    isdownload=download_and_detect_faces(url, val_path+'/'+current_person+'_'+str(current_nval_person+1)+'.jpg')
                    if isdownload==1:
                        current_nval_person+=1
                        if current_nval_person % 5 == 0:
                            print(f"val_person set image added for {current_person}: {current_nval_person}")
            while current_ntest_person<ntest_person:
                i = np.random.randint(0, df_persons.shape[0])
                if i in list_index_person:
                    continue
                else:
                    list_index_person.append(i)    
                     ##ambil url gambar
                    url = df_persons.iloc[i,3]
                    ##download gambar
                    isdownload=download_and_detect_faces(url, test_path+'/'+current_person+'_'+str(current_ntest_person+1)+'.jpg')
                    if isdownload==1:
                        current_ntest_person+=1
                        if current_ntest_person % 5 == 0:
                            print(f"test_person set image added fro {current_person}: {current_ntest_person}")
            list_person.append(current_person)
            current_num_person+=1
            print(f"=== [{current_num_person}/{num_person}] Person added: {current_person} ===")


In [None]:
##run function to get image sample
get_image_sample(num_person=200,ntrain_person=20,ntest_person=5,nval_person=5,facescrub_df=facescrub_df)

dwonload image person=Robert Di Niro num_image=200
sample_person set image added for Robert Di Niro: 10
sample_person set image added for Robert Di Niro: 20
val_person set image added for Robert Di Niro: 5
test_person set image added fro Robert Di Niro: 5
=== [1/200] Person added: Robert Di Niro ===
dwonload image person=Wendie Malick num_image=219
sample_person set image added for Wendie Malick: 10
sample_person set image added for Wendie Malick: 20
val_person set image added for Wendie Malick: 5
test_person set image added fro Wendie Malick: 5
=== [2/200] Person added: Wendie Malick ===
dwonload image person=Jennie Garth num_image=245
sample_person set image added for Jennie Garth: 10
sample_person set image added for Jennie Garth: 20
val_person set image added for Jennie Garth: 5
test_person set image added fro Jennie Garth: 5
=== [3/200] Person added: Jennie Garth ===
dwonload image person=Linda Gray num_image=226
sample_person set image added for Linda Gray: 10
sample_person set i

### 1.2.2 Organize folder(optional if all faces allready download)

In [None]:
import os
import shutil
import random
import cv2
from mtcnn import MTCNN

# Initialize MTCNN detector
detector = MTCNN()

# Define paths
base_folders = ["dataset/actors", "dataset/actress"]
train_folder = "dataset/stagging/sampleset"
validation_folder = "dataset/stagging/validationset"
test_folder = "dataset/stagging/testingset"

# Function to reset a folder
def reset_folder(folder_path):
    if os.path.exists(folder_path):
        shutil.rmtree(folder_path)
    os.makedirs(folder_path)

# Reset dataset folders
reset_folder(train_folder)
reset_folder(validation_folder)
reset_folder(test_folder)

# Collect all person folders
all_persons = []
for base_folder in base_folders:
    if os.path.exists(base_folder):
        persons = [
            os.path.join(base_folder, p) for p in os.listdir(base_folder)
            if os.path.isdir(os.path.join(base_folder, p))
        ]
        all_persons.extend(persons)
print(f"Found {len(all_persons)} persons.")

# Randomly select up to 200 persons
selected_persons = random.sample(all_persons, min(200, len(all_persons)))
print(f"Selected {len(selected_persons)} persons.")

def is_valid_face(image_path):
    """Check if an image contains exactly one detectable face."""
    try:
        image = cv2.imread(image_path)  # Read the image
        result = detector.detect_faces(image)  # Detect faces

        if len(result) == 1:  # Keep only images with exactly 1 face
            return True
        else:
            return False
    except Exception as e:
        return False

# Process each selected person
for person_path in selected_persons:
    person_name = os.path.basename(person_path)
    print(f"Processing {person_name}...")

    images = [img for img in os.listdir(person_path) if img.endswith((".jpeg", ".jpg", ".png"))]

    # Shuffle images
    random.shuffle(images)

    # Filter images with exactly one detected face
    valid_images = [img for img in images if is_valid_face(os.path.join(person_path, img))]

    # Skip if fewer than 20 valid images
    if len(valid_images) < 20:
        print(f"⚠️ Skipping {person_name}, not enough valid face images.")
        continue

    # Select max 25 valid images
    selected_images = valid_images[:30]
    train_images = selected_images[:20]  # 15 for training
    validation_images = selected_images[20:25]  # 5 for validation
    test_images = selected_images[25:]  # 5 for testing

    # Create person-specific directories
    train_person_path = os.path.join(train_folder, person_name)
    validation_person_path = os.path.join(validation_folder, person_name)
    test_person_path = os.path.join(test_folder, person_name)

    os.makedirs(train_person_path, exist_ok=True)
    os.makedirs(validation_person_path, exist_ok=True)
    os.makedirs(test_person_path, exist_ok=True)

    # Copy images to respective folders
    for img in train_images:
        shutil.copy(os.path.join(person_path, img), os.path.join(train_person_path, img))

    for img in validation_images:
        shutil.copy(os.path.join(person_path, img), os.path.join(validation_person_path, img))

    for img in test_images:
        shutil.copy(os.path.join(person_path, img), os.path.join(test_person_path, img))

    print(f"✅ Processed {person_name}: Train={len(train_images)}, Validation={len(validation_images)}, Test={len(test_images)}")

print("🎉 Dataset successfully filtered and organized!")


Selected 200 persons.
Processed Kassie_DePaiva: Train=20, Test=5
Processed Bradley_Cooper: Train=20, Test=5
Processed Cam_Gigandet: Train=20, Test=5
Processed Audra_McDonald: Train=20, Test=5
Processed Shannon_Kane: Train=20, Test=5
Processed Crystal_Chappell: Train=20, Test=5
Processed Jennette_McCurdy: Train=20, Test=5
Processed John_Malkovich: Train=20, Test=5
Processed Seth_Rogen: Train=20, Test=5
Processed Kris_Kristofferson: Train=20, Test=5
Processed John_Noble: Train=20, Test=5
Processed Catherine_Bell: Train=20, Test=5
Processed Sean_Bean: Train=20, Test=5
Processed Alyssa_Milano: Train=20, Test=5
Processed Jason_Behr: Train=20, Test=5
Processed Martin_Sheen: Train=20, Test=5
Processed Antonio_Banderas: Train=20, Test=5
Processed Tempestt_Bledsoe: Train=20, Test=5
Processed Ed_Harris: Train=20, Test=5
Processed Valerie_Harper: Train=20, Test=5
Processed Amaury_Nolasco: Train=20, Test=5
Processed Oliver_Platt: Train=20, Test=5
Processed Chyler_Leigh: Train=20, Test=5
Processed 

### 1.3 Process Image with MTCNN

In [3]:

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)




In [15]:
def processed_mtcnn(detector,dirpath, dirdest):
    
    imagePaths = sorted(list(paths.list_images(dirpath)))
    for imagePath in tqdm(imagePaths):
        #path_split = imagePath.split(os.sep)
        path_split=Path(imagePath).parts
        name_person = path_split[-2]
        fn = path_split[-1].split('.')
        filename, fileformat = fn[0], fn[1]

        os.makedirs(dirdest, exist_ok=True)

        image = cv2.cvtColor(cv2.imread(imagePath), cv2.COLOR_BGR2RGB)
        result = detector.detect_faces(image)
        #print(f"Processing: {imagePath}, Faces detected: {len(result)}")

        for i in range(len(result)):
            bounding_box = result[i]['box']
            keypoints = result[i]['keypoints']

            bounding_box[0] = max(0, bounding_box[0])
            bounding_box[1] = max(0, bounding_box[1])

            person_dir = os.path.join(dirdest, name_person)
            os.makedirs(person_dir, exist_ok=True)
            path_save = os.path.join(person_dir, f"{filename}_{i}.{fileformat}")
            print(path_save)
            img = image[bounding_box[1]:bounding_box[1] + bounding_box[3],
                    bounding_box[0]:bounding_box[0] + bounding_box[2]]

            cv2.imwrite(path_save, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))


    print("Face detection and cropping completed!")

In [16]:
dirpaths=['dataset/stagging/sampleset','dataset/stagging/testingset']

for dirpath in dirpaths:
    processed_mtcnn(detector=MTCNN(),dirpath=dirpath,dirdest=dirpath.replace("stagging","mtcnn_faces"))

  2%|▏         | 2/100 [00:00<00:17,  5.46it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9044_4231_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9045_4232_0.jpeg


  4%|▍         | 4/100 [00:00<00:14,  6.45it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9059_4240_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9065_4243_0.jpeg


  6%|▌         | 6/100 [00:00<00:14,  6.69it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9082_4252_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9096_4256_0.jpeg


  7%|▋         | 7/100 [00:01<00:14,  6.38it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9119_4271_0.jpeg


  9%|▉         | 9/100 [00:01<00:14,  6.34it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9124_4274_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9131_4278_0.jpeg


 11%|█         | 11/100 [00:02<00:18,  4.70it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9149_4286_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9154_4289_0.jpeg


 13%|█▎        | 13/100 [00:02<00:16,  5.28it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9187_4306_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9198_4310_0.jpeg


 15%|█▌        | 15/100 [00:02<00:14,  5.68it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9200_4312_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9213_4318_0.jpeg


 16%|█▌        | 16/100 [00:02<00:15,  5.38it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9232_4327_0.jpeg


 18%|█▊        | 18/100 [00:03<00:15,  5.40it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9238_4329_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9280_4352_0.jpeg


 20%|██        | 20/100 [00:03<00:12,  6.49it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9300_4362_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Barbeau\Adrienne_Barbeau_9341_4378_0.jpeg


 21%|██        | 21/100 [00:03<00:11,  7.09it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58140_27673_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58149_27680_0.jpeg


 23%|██▎       | 23/100 [00:04<00:12,  6.10it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58157_27688_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58167_27697_0.jpeg


 25%|██▌       | 25/100 [00:04<00:10,  7.27it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58175_27703_0.jpeg


 27%|██▋       | 27/100 [00:04<00:11,  6.31it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58177_27704_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58185_27711_0.jpeg


 28%|██▊       | 28/100 [00:04<00:12,  5.79it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58192_27716_0.jpeg


 30%|███       | 30/100 [00:05<00:12,  5.71it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58200_27721_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58213_27729_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58218_27733_0.jpeg


 33%|███▎      | 33/100 [00:05<00:10,  6.64it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58242_27754_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58249_27760_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58259_27767_0.jpeg


 36%|███▌      | 36/100 [00:06<00:09,  6.86it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58280_27784_0.jpeg
dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58329_27821_0.jpeg


 38%|███▊      | 38/100 [00:06<00:08,  7.41it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58338_27829_0.png
dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58347_27835_0.jpeg


 39%|███▉      | 39/100 [00:06<00:08,  6.96it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58377_27858_0.jpeg


 41%|████      | 41/100 [00:06<00:09,  6.50it/s]

dataset/mtcnn_faces/sampleset\Adrienne_Frantz\Adrienne_Frantz_58380_27861_0.jpeg
dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78813_35707_0.jpeg


 42%|████▏     | 42/100 [00:07<00:12,  4.60it/s]

dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78815_35709_0.jpeg


 44%|████▍     | 44/100 [00:07<00:12,  4.44it/s]

dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78819_35712_0.jpeg
dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78831_35719_0.jpeg


 46%|████▌     | 46/100 [00:07<00:09,  5.91it/s]

dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78833_35721_0.jpeg
dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78837_35723_0.jpeg


 47%|████▋     | 47/100 [00:08<00:11,  4.48it/s]

dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78843_35726_0.jpeg


 48%|████▊     | 48/100 [00:08<00:11,  4.41it/s]

dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78848_35727_0.jpeg


 50%|█████     | 50/100 [00:08<00:11,  4.29it/s]

dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78851_35729_0.jpeg
dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78853_35731_0.jpeg


 52%|█████▏    | 52/100 [00:09<00:10,  4.42it/s]

dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78858_35734_0.jpeg
dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78890_35746_0.jpeg
dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78894_35747_0.jpeg
dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78927_35758_0.jpeg


 57%|█████▋    | 57/100 [00:10<00:06,  6.38it/s]

dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78934_35761_0.jpeg
dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78952_35771_0.jpeg
dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78977_35776_0.jpeg


 59%|█████▉    | 59/100 [00:10<00:05,  6.93it/s]

dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_78995_35784_0.jpeg
dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_79041_35796_0.jpeg


 60%|██████    | 60/100 [00:10<00:05,  7.27it/s]

dataset/mtcnn_faces/sampleset\Aisha_Hinds\Aisha_Hinds_79077_35803_0.jpeg


 61%|██████    | 61/100 [00:10<00:08,  4.51it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_1800_964_0.jpeg


 62%|██████▏   | 62/100 [00:11<00:09,  4.13it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_1814_975_0.jpeg


 63%|██████▎   | 63/100 [00:11<00:09,  3.97it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_1844_999_0.jpeg
dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_1852_1007_0.jpeg


 66%|██████▌   | 66/100 [00:12<00:06,  4.89it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_1855_1008_0.jpeg
dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_1856_1009_0.jpeg


 68%|██████▊   | 68/100 [00:12<00:05,  5.91it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_1859_1012_0.jpeg
dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_1879_1027_0.jpeg


 69%|██████▉   | 69/100 [00:12<00:05,  5.48it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_1940_1062_0.jpeg


 70%|███████   | 70/100 [00:12<00:05,  5.04it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_1952_1070_0.jpeg


 71%|███████   | 71/100 [00:13<00:06,  4.56it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_1970_1076_0.jpeg


 72%|███████▏  | 72/100 [00:13<00:06,  4.48it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_1992_1087_0.jpeg


 73%|███████▎  | 73/100 [00:14<00:10,  2.54it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_2015_1101_0.jpeg


 74%|███████▍  | 74/100 [00:14<00:11,  2.23it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_2034_1112_0.jpeg


 75%|███████▌  | 75/100 [00:15<00:10,  2.45it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_2049_1120_0.jpeg


 76%|███████▌  | 76/100 [00:15<00:10,  2.21it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_2095_1139_0.jpeg


 77%|███████▋  | 77/100 [00:15<00:09,  2.54it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_2100_1141_0.jpeg
dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_2111_1146_0.jpeg


 79%|███████▉  | 79/100 [00:16<00:07,  2.71it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_2121_1151_0.jpeg


 80%|████████  | 80/100 [00:16<00:06,  3.07it/s]

dataset/mtcnn_faces/sampleset\Al_Pacino\Al_Pacino_2122_1152_0.jpeg


 82%|████████▏ | 82/100 [00:17<00:05,  3.47it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2165_1180_0.jpeg
dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2192_1204_0.jpeg


 83%|████████▎ | 83/100 [00:17<00:05,  3.27it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2206_1215_0.jpeg


 84%|████████▍ | 84/100 [00:17<00:04,  3.23it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2207_1216_0.jpeg


 85%|████████▌ | 85/100 [00:18<00:05,  2.98it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2218_1222_0.jpeg


 86%|████████▌ | 86/100 [00:18<00:04,  3.14it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2224_1228_0.jpeg


 87%|████████▋ | 87/100 [00:18<00:04,  2.92it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2233_1235_0.jpeg


 89%|████████▉ | 89/100 [00:19<00:03,  3.47it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2256_1251_0.jpeg
dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2259_1253_0.jpeg


 90%|█████████ | 90/100 [00:19<00:03,  3.18it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2297_1279_0.jpeg


 91%|█████████ | 91/100 [00:20<00:02,  3.44it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2308_1287_0.jpeg


 92%|█████████▏| 92/100 [00:20<00:02,  3.02it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2321_1298_0.jpeg


 93%|█████████▎| 93/100 [00:20<00:02,  3.08it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2323_1300_0.jpeg


 94%|█████████▍| 94/100 [00:21<00:02,  2.54it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2352_1323_0.jpeg


 95%|█████████▌| 95/100 [00:21<00:01,  2.93it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2385_1345_0.jpeg


 96%|█████████▌| 96/100 [00:21<00:01,  2.81it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2389_1349_0.jpeg


 97%|█████████▋| 97/100 [00:22<00:01,  2.79it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2396_1354_0.jpeg


 98%|█████████▊| 98/100 [00:22<00:00,  2.69it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2442_1388_0.jpeg


 99%|█████████▉| 99/100 [00:23<00:00,  2.87it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2451_1395_0.jpeg


100%|██████████| 100/100 [00:23<00:00,  4.22it/s]

dataset/mtcnn_faces/sampleset\Alan_Alda\Alan_Alda_2490_1420_0.jpeg
Face detection and cropping completed!



  4%|▍         | 1/25 [00:00<00:04,  5.03it/s]

dataset/mtcnn_faces/testingset\Adrienne_Barbeau\Adrienne_Barbeau_9127_4276_0.jpeg
dataset/mtcnn_faces/testingset\Adrienne_Barbeau\Adrienne_Barbeau_9229_4326_0.jpeg


 12%|█▏        | 3/25 [00:00<00:06,  3.26it/s]

dataset/mtcnn_faces/testingset\Adrienne_Barbeau\Adrienne_Barbeau_9267_4347_0.jpeg


 16%|█▌        | 4/25 [00:01<00:06,  3.01it/s]

dataset/mtcnn_faces/testingset\Adrienne_Barbeau\Adrienne_Barbeau_9302_4363_0.jpeg


 24%|██▍       | 6/25 [00:01<00:05,  3.66it/s]

dataset/mtcnn_faces/testingset\Adrienne_Barbeau\Adrienne_Barbeau_9335_4376_0.jpeg
dataset/mtcnn_faces/testingset\Adrienne_Frantz\Adrienne_Frantz_58205_27724_0.jpeg


 28%|██▊       | 7/25 [00:01<00:05,  3.55it/s]

dataset/mtcnn_faces/testingset\Adrienne_Frantz\Adrienne_Frantz_58230_27743_0.png


 32%|███▏      | 8/25 [00:02<00:05,  3.32it/s]

dataset/mtcnn_faces/testingset\Adrienne_Frantz\Adrienne_Frantz_58250_27761_0.jpeg


 36%|███▌      | 9/25 [00:02<00:05,  3.10it/s]

dataset/mtcnn_faces/testingset\Adrienne_Frantz\Adrienne_Frantz_58301_27800_0.jpeg
dataset/mtcnn_faces/testingset\Adrienne_Frantz\Adrienne_Frantz_58327_27819_0.jpeg


 44%|████▍     | 11/25 [00:03<00:04,  3.25it/s]

dataset/mtcnn_faces/testingset\Aisha_Hinds\Aisha_Hinds_78817_35710_0.jpeg


 48%|████▊     | 12/25 [00:03<00:03,  3.57it/s]

dataset/mtcnn_faces/testingset\Aisha_Hinds\Aisha_Hinds_78942_35766_0.jpeg


 52%|█████▏    | 13/25 [00:03<00:03,  3.73it/s]

dataset/mtcnn_faces/testingset\Aisha_Hinds\Aisha_Hinds_78981_35777_0.jpeg


 56%|█████▌    | 14/25 [00:04<00:03,  3.43it/s]

dataset/mtcnn_faces/testingset\Aisha_Hinds\Aisha_Hinds_79048_35798_0.jpeg


 60%|██████    | 15/25 [00:04<00:02,  3.63it/s]

dataset/mtcnn_faces/testingset\Aisha_Hinds\Aisha_Hinds_79128_35809_0.jpeg


 64%|██████▍   | 16/25 [00:04<00:02,  3.20it/s]

dataset/mtcnn_faces/testingset\Al_Pacino\Al_Pacino_1833_992_0.jpeg


 68%|██████▊   | 17/25 [00:05<00:02,  3.06it/s]

dataset/mtcnn_faces/testingset\Al_Pacino\Al_Pacino_1867_1018_0.jpeg


 72%|███████▏  | 18/25 [00:05<00:02,  2.69it/s]

dataset/mtcnn_faces/testingset\Al_Pacino\Al_Pacino_1973_1077_0.jpeg


 76%|███████▌  | 19/25 [00:05<00:02,  2.69it/s]

dataset/mtcnn_faces/testingset\Al_Pacino\Al_Pacino_1976_1078_0.jpeg


 80%|████████  | 20/25 [00:06<00:01,  2.81it/s]

dataset/mtcnn_faces/testingset\Al_Pacino\Al_Pacino_2008_1096_0.jpeg


 84%|████████▍ | 21/25 [00:06<00:01,  2.72it/s]

dataset/mtcnn_faces/testingset\Alan_Alda\Alan_Alda_2173_1188_0.jpeg


 88%|████████▊ | 22/25 [00:06<00:01,  2.93it/s]

dataset/mtcnn_faces/testingset\Alan_Alda\Alan_Alda_2209_1218_0.jpeg


 92%|█████████▏| 23/25 [00:07<00:00,  2.66it/s]

dataset/mtcnn_faces/testingset\Alan_Alda\Alan_Alda_2225_1229_0.jpeg


 96%|█████████▌| 24/25 [00:07<00:00,  3.05it/s]

dataset/mtcnn_faces/testingset\Alan_Alda\Alan_Alda_2342_1315_0.jpeg


100%|██████████| 25/25 [00:07<00:00,  3.16it/s]

dataset/mtcnn_faces/testingset\Alan_Alda\Alan_Alda_2408_1362_0.jpeg
Face detection and cropping completed!





## 2.Face Verification with DeepFace 


Pengujian dilakukan pada google colab (https://colab.research.google.com/drive/1FJffmhncRhJePS87fkacBXv3gSxY8mvz?usp=sharing )

### 2.1 Extract Embedding


In [None]:

def extract_embeddings(dataset_path, output_file, model_name="Facenet512"):
    """Extracts embeddings and saves them in a pickle file."""
    embeddings_dict = {}

    for person in os.listdir(dataset_path):
        person_dir = os.path.join(dataset_path, person)
        if not os.path.isdir(person_dir):
            continue  # Skip non-directory files

        embeddings_dict[person] = {}
        for image_name in os.listdir(person_dir):
            image_path = os.path.join(person_dir, image_name)
            try:
                embedding = DeepFace.represent(image_path, model_name=model_name, enforce_detection=False)[0]["embedding"]
                embeddings_dict[person][image_name] = np.array(embedding)
                print(f"✔️ Extracted embedding for: {image_name}")
            except Exception as e:
                print(f"❌ Error processing {image_path}: {e}")
    os.makedirs(os.path.dirname(output_file), exist_ok=True)
    with open(output_file, "wb") as f:
        pickle.dump(embeddings_dict, f)
    print(f"✅ Embeddings saved to {output_file}")

    return embeddings_dict

### 2.2 Load Embedding

In [None]:
def load_embeddings(file_path):
    """Loads the precomputed embeddings from a file."""
    with open(file_path, "rb") as f:
        embeddings = pickle.load(f)
    print(f"✅ Loaded embeddings from {file_path}")
    return embeddings

def verify_faces_with_embeddings(embeddings, person1, img1, person2, img2, threshold=0.4):
    """Compares precomputed embeddings for face verification."""
    if person1 not in embeddings or person2 not in embeddings:
        print(f"Error: Person not found in embeddings.")
        return False, None

    if img1 not in embeddings[person1] or img2 not in embeddings[person2]:
        print(f"Error: Image not found in embeddings.")
        return False, None

    emb1 = embeddings[person1][img1]
    emb2 = embeddings[person2][img2]

    distance = cosine(emb1, emb2)  # Cosine similarity
    verified = distance <= threshold

    return verified, distance

### 2.3 Run Verification

In [None]:
def evaluate_performance(train_embeddings, test_embeddings, threshold, model="Facenet512",results_path='/content/drive/MyDrive/dataset_colab/output'):
    """Evaluates face verification using precomputed embeddings."""
    results = []
    results_dir = f"{results_path}/results_{threshold}"
    os.makedirs(results_dir, exist_ok=True)

    for test_person, test_images in test_embeddings.items():
        print(f"Processing test person: {test_person}")

        for train_person, train_images in train_embeddings.items():
            if test_person == train_person:
                for test_img, test_emb in test_images.items():
                    for train_img, train_emb in train_images.items():
                        distance = cosine(test_emb, train_emb)
                        verified = distance <= threshold

                        results.append({
                            'train': train_img,
                            'test': test_img,
                            'verified': verified,
                            'y_true': 1,  # Since it's the same person
                            'y_pred': 1 if verified else 0,
                            'distance': distance,
                            'model': model
                        })

        # Save per-person results
        df_person = pd.DataFrame(results)
        df_person.to_csv(os.path.join(results_dir, f"results_{test_person}_{threshold}_{model}.csv"), index=False)

    # Save overall results
    df_results = pd.DataFrame(results)
    df_results.to_csv(os.path.join(results_dir, f"results_{threshold}_{model}.csv"), index=False)

    return df_results

In [None]:
train_dir=pathd+'/sampleset'
val_dir=pathd+'/validationset'
test_dir=pathd+'/testingset'
threshold=0.4
model



results_df=evaluate_performance(train_dir=train_dir,test_dir=test_dir,threshold=threshold)

## 3. Performance Evaluation

### 3.1 Calculate Metrics

In [1]:
def calculate_metrics(results_df, model_name, threshold,results_path='/content/drive/MyDrive/dataset_colab/output'):
    """Calculates and exports classification metrics."""
    y_true = results_df['y_true']
    y_pred = results_df['y_pred']

    cm = confusion_matrix(y_true, y_pred)
    TP = cm[1, 1]
    TN = cm[0, 0]
    FP = cm[0, 1]
    FN = cm[1, 0]
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    metrics_df = pd.DataFrame({
        'Model': [model_name],
        'TP': [TP],
        'TN': [TN],
        'FP': [FP],
        'FN': [FN],
        'Accuracy': [accuracy],
        'Precision': [precision],
        'Recall': [recall],
        'F1-Score': [f1]
    })

    eval_dir = f'{results_path}/results_{threshold}/eval'
    os.makedirs(eval_dir, exist_ok=True)
    metrics_df.to_csv(f'{eval_dir}/evalmetrics_{model_name}_{threshold}.csv', index=False)

    return metrics_df

In [2]:
def combine_results(dir_path):
    """Combines results from CSV files within a directory."""
    all_results = []
    for file_name in os.listdir(dir_path):
        file_path = os.path.join(dir_path, file_name)  # Form the complete file path
        if os.path.isfile(file_path) and file_name.endswith(".csv"):  # Check if it's a file and a CSV
            try:
                df = pd.read_csv(file_path)
                all_results.append(df)
            except FileNotFoundError:
                print(f"Warning: File not found - {file_path}")
            except pd.errors.EmptyDataError:
                print(f"Warning: Empty file - {file_path}")
    combined_df = pd.concat(all_results, ignore_index=True)
    return combined_df