# Performance Comparison of Face Recognition Models (Assignment 3)

## 1. Prepare Dataset

### 1.1 Load Library

In [7]:
import os
import random
import shutil
import numpy as np
import pandas as pd
import requests
import cv2
import tensorflow as tf
from mtcnn import MTCNN
from imutils import paths
from tqdm import tqdm
from pathlib import Path

### 1.2 Download Dataset

In [8]:
## import metadata
facescrub_df_actor = pd.read_csv('faceScrub/facescrub_actors.txt',delimiter='\t',header=None)
facescrub_df_actress = pd.read_csv('faceScrub/facescrub_actresses.txt',delimiter='\t',header=None)

#combine dataframe
facescrub_df=pd.concat([facescrub_df_actor,facescrub_df_actress],axis=0)

print(f"number of row (images) : {len(facescrub_df)}")

number of row (images) : 106865


In [9]:
# Group by the first column (name) and count the occurrences
name_count_df = facescrub_df.groupby(facescrub_df.columns[0]).size().reset_index(name='count')

# Rename the columns for clarity
name_count_df.columns = ['name', 'count']
name_count_df.head()
# Display the new dataframe
#name_count_df=name_count_df.sort_values(by='count', ascending=False)
print(f"number of unique faces (people): {len(name_count_df)}")
print(name_count_df.head())


number of unique faces (people): 531
            name  count
0  Aaron Eckhart    231
1     Adam Brody    200
2     Adam McKay    108
3   Adam Sandler    208
4  Adrianne León     62


In [13]:
def download_image(url, filename):
    
    try:
        response = requests.get(url, timeout=10, stream=True)
        #print(f"Status Code: {response.status_code}")
        response.raise_for_status()  # Raises an error for HTTP issues
        with open(filename, 'wb') as f:
            for chunk in response.iter_content(1024):
                f.write(chunk)
        #print("Download complete!")
        return 1
    except requests.exceptions.RequestException as e:
        #print(f"Error: {e}")
        #print("Download failed.")
        return 0
    

    

def get_image_sample(num_person,ntrain_person,ntest_person,facescrub_df):
    current_num_person = 0

    list_person = []

    while current_num_person<num_person:
        current_ntrain_person = 0
        current_ntest_person = 0
        ##generate angka random sebagai index dari dataset
        random_num = np.random.randint(0, facescrub_df.shape[0])
        ##ambil nama orang dari dataset
        
        current_person=facescrub_df.iloc[random_num,0]
        
        if current_person in list_person:
            continue
        else:
        ##buat dataset dengan data orang tersebut
            df_persons=facescrub_df[facescrub_df[0]==current_person]
            
            train_path = 'dataset/stagging/sampleset/'+current_person
            test_path = 'dataset/stagging/testingset/'+current_person
            ##buat folder untuk orang tersebut
            if not os.path.exists(train_path):
                
                os.makedirs(train_path)
            if not os.path.exists(test_path):
                os.makedirs(test_path)

            len_train = print(f"dwonload image person={current_person} num_image={df_persons.shape[0]}")
            
            
            list_index_person=[]
            while current_ntrain_person<ntrain_person:
                    #get random index
                i = np.random.randint(0, df_persons.shape[0])
                if i in list_index_person:
                    continue
                else:
                    list_index_person.append(i)
                    ##ambil url gambar
                    url = df_persons.iloc[i,3]
                    ##download gambar
                    isdownload=download_image(url, train_path+'/'+current_person+'_'+str(current_ntrain_person+1)+'.jpg')
                    if isdownload==1:
                        current_ntrain_person+=1
                        if current_ntrain_person % 10 == 0:
                            print(f"sample_person set image added for {current_person}: {current_ntrain_person}")
            while current_ntest_person<ntest_person:
                i = np.random.randint(0, df_persons.shape[0])
                if i in list_index_person:
                    continue
                else:
                    list_index_person.append(i)    
                     ##ambil url gambar
                    url = df_persons.iloc[i,3]
                    ##download gambar
                    isdownload=download_image(url, test_path+'/'+current_person+'_'+str(current_ntest_person+1)+'.jpg')
                    if isdownload==1:
                        current_ntest_person+=1
                        if current_ntest_person % 5 == 0:
                            print(f"test_person set image added fro {current_person}: {current_ntest_person}")
            list_person.append(current_person)
            current_num_person+=1
            print(f"=== [{current_num_person}/{num_person}] Person added: {current_person} ===")


In [15]:
##run function to get image sample
get_image_sample(num_person=1,ntrain_person=20,ntest_person=5,facescrub_df=facescrub_df)

dwonload image person=Hugh Jackman num_image=211
sample_person set image added for Hugh Jackman: 10
sample_person set image added for Hugh Jackman: 20
test_person set image added fro Hugh Jackman: 5
=== [1/1] Person added: Hugh Jackman ===


### 1.2.2 Organize folder(optional if all faces allready download)

In [None]:


# Define paths
base_folders = ["dataset/actor_faces", "dataset/actress_faces"]
train_folder = "dataset/stagging/sampleset"
test_folder = "dataset/stagging/testingset"

# Function to clear and recreate a folder
def reset_folder(folder_path):
    if os.path.exists(folder_path):
        shutil.rmtree(folder_path)  # Delete everything inside
    os.makedirs(folder_path)  # Recreate the empty folder

# Reset train and test folders
reset_folder(train_folder)
reset_folder(test_folder)
# Collect all person folders from both `actor_faces` and `actress_faces`
all_persons = []
for base_folder in base_folders:
    if os.path.exists(base_folder):  # Ensure the folder exists before listing
        persons = [os.path.join(base_folder, p) for p in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, p))]
        all_persons.extend(persons)  # Add full paths

#print(all_persons)
# Randomly select up to 200 persons
selected_persons = random.sample(all_persons, min(200, len(all_persons)))
print(f"Selected {len(selected_persons)} persons.")

# Process each selected person
for person_path in selected_persons:
    person_name = os.path.basename(person_path)  # Extract only folder name

    images = [img for img in os.listdir(person_path) if img.endswith((".jpeg", ".jpg", ".png"))]

    if len(images) < 20:  # Skip if not enough images
        print(f"Skipping {person_name}, not enough images.")
        continue

    # Shuffle and select max 25 images
    random.shuffle(images)
    selected_images = images[:25]

    # Split into train (20) and test (5)
    train_images = selected_images[:20]
    test_images = selected_images[20:]

    # Create person folders in train and test
    train_person_path = os.path.join(train_folder, person_name)
    test_person_path = os.path.join(test_folder, person_name)
    os.makedirs(train_person_path, exist_ok=True)
    os.makedirs(test_person_path, exist_ok=True)

    # Move images
    for img in train_images:
        shutil.copy(os.path.join(person_path, img), os.path.join(train_person_path, img))

    for img in test_images:
        shutil.copy(os.path.join(person_path, img), os.path.join(test_person_path, img))

    print(f"Processed {person_name}: Train={len(train_images)}, Test={len(test_images)}")

print("✅ Dataset organized successfully!")


Selected 200 persons.
Processed Kassie_DePaiva: Train=20, Test=5
Processed Bradley_Cooper: Train=20, Test=5
Processed Cam_Gigandet: Train=20, Test=5
Processed Audra_McDonald: Train=20, Test=5
Processed Shannon_Kane: Train=20, Test=5
Processed Crystal_Chappell: Train=20, Test=5
Processed Jennette_McCurdy: Train=20, Test=5
Processed John_Malkovich: Train=20, Test=5
Processed Seth_Rogen: Train=20, Test=5
Processed Kris_Kristofferson: Train=20, Test=5
Processed John_Noble: Train=20, Test=5
Processed Catherine_Bell: Train=20, Test=5
Processed Sean_Bean: Train=20, Test=5
Processed Alyssa_Milano: Train=20, Test=5
Processed Jason_Behr: Train=20, Test=5
Processed Martin_Sheen: Train=20, Test=5
Processed Antonio_Banderas: Train=20, Test=5
Processed Tempestt_Bledsoe: Train=20, Test=5
Processed Ed_Harris: Train=20, Test=5
Processed Valerie_Harper: Train=20, Test=5
Processed Amaury_Nolasco: Train=20, Test=5
Processed Oliver_Platt: Train=20, Test=5
Processed Chyler_Leigh: Train=20, Test=5
Processed 

### 1.3 Process Image with MTCNN

In [3]:

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)




In [16]:
def processed_mtcnn(detector,dirpath, dirdest):
    
    imagePaths = sorted(list(paths.list_images(dirpath)))
    for imagePath in tqdm(imagePaths):
        #path_split = imagePath.split(os.sep)
        path_split=Path(imagePath).parts
        name_person = path_split[-2]
        fn = path_split[-1].split('.')
        filename, fileformat = fn[0], fn[1]

        os.makedirs(dirdest, exist_ok=True)

        image = cv2.cvtColor(cv2.imread(imagePath), cv2.COLOR_BGR2RGB)
        result = detector.detect_faces(image)
        print(f"Processing: {imagePath}, Faces detected: {len(result)}")

        for i in range(len(result)):
            bounding_box = result[i]['box']
            keypoints = result[i]['keypoints']

            bounding_box[0] = max(0, bounding_box[0])
            bounding_box[1] = max(0, bounding_box[1])

            person_dir = os.path.join(dirdest, name_person)
            os.makedirs(person_dir, exist_ok=True)
            path_save = os.path.join(person_dir, f"{filename}_{i}.{fileformat}")
            print(path_save)
            img = image[bounding_box[1]:bounding_box[1] + bounding_box[3],
                    bounding_box[0]:bounding_box[0] + bounding_box[2]]

            cv2.imwrite(path_save, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))


    print("Face detection and cropping completed!")

In [17]:
dirpaths=['dataset/stagging/sampleset','dataset/stagging/testingset']

for dirpath in dirpaths:
    processed_mtcnn(detector=MTCNN(),dirpath=dirpath,dirdest=dirpath.replace("stagging","mtcnn_faces"))

 10%|█         | 2/20 [00:00<00:05,  3.12it/s]

Processing: dataset/stagging/sampleset\Hugh Jackman\Hugh Jackman_1.jpg, Faces detected: 1
dataset/mtcnn_faces/sampleset\Hugh Jackman\Hugh Jackman_1_0.jpg
Processing: dataset/stagging/sampleset\Hugh Jackman\Hugh Jackman_10.jpg, Faces detected: 0





error: OpenCV(4.11.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:199: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


## 2.Face Verification with DeepFace

In [2]:
from sklearn.metrics import precision_score, recall_score, f1_score

y_true = []  # 1 for same person, 0 for different person
y_pred = []  # 1 if verified, 0 if not verified

train_path='dataset/mtcnn_faces/sampleset/'
test_path= 'dataset/mtcnn_faces/testingset'

# Example: Compare images
for img1 in os.listdir(train_path):
    for img2 in os.listdir(test_path):
        result = DeepFace.verify(
            img1_path=os.path.join(train_path, img1),
            img2_path=os.path.join(test_path, img2),
            model_name="VGG-Face",
            distance_metric="cosine"
        )

        y_true.append(1 if "personA" in img1 and "personA" in img2 else 0)
        y_pred.append(1 if result["verified"] else 0)

# Calculate Metrics
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'dataset/mtcnn_faces/sampleset/'

## 3. Performance Evaluation