# RetinaFace + ArcFace
## Improve the accuracy by switching to other pre-trained model
With limite of min box size and keep top 3 largest face will get cleaner embeddings which include higher similarity scores and better overall MCC, TPR, and TNR.

## Filter Applied in Embedding to ensure Accuracy:
1. Number of face restriction
2. Face size restriction

## Embedding Fallback Strategy Applied:
1. Try det_score ≥ 0.9 + size ≥ 80px (strict)
2. If none: try det_score ≥ 0.5 + size ≥ 80px
3. If none: try det_score ≥ 0.5 + size ≥ 30px
4. If none: take highest confidence face regardless of size

In [74]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from insightface.app import FaceAnalysis
import pickle

# Configuration
dataset_path    = r"C:\Users\peggy\Desktop\FaceDetection\celebrities_face\face"
output_pkl      = r"C:\Users\peggy\Desktop\FaceDetection\celebrity_face_embeddings_top3.pkl"
min_box_size    = 80     # drop faces smaller than 80*80 pixels
top_n           = 3      # keep at most 3 largest faces per image
det_thresh      = 0.9    # only accept detections with score ≥ 0.90
fallback_det_thresh = 0.5  # fallback threshold if no faces pass strict criteria
fallback_min_size = 30     # fallback minimum size if no faces pass strict criteria

# Initialize model
app = FaceAnalysis(name='buffalo_l')
app.prepare(ctx_id=0, det_size=(640, 640))

# Process images
embedding_output = {}
total_faces_detected = 0
total_faces_after_strict_filter = 0
total_faces_after_fallback = 0
total_images_processed = 0
total_images_with_faces = 0
images_needing_fallback = 0

for celeb_name in os.listdir(dataset_path):
    celeb_folder = os.path.join(dataset_path, celeb_name)
    if not os.path.isdir(celeb_folder):
        continue
    
    all_embs = []
    celeb_faces_detected = 0
    celeb_faces_strict = 0
    celeb_images_fallback = 0
    
    for img_name in tqdm(os.listdir(celeb_folder), desc=celeb_name):
        img_path = os.path.join(celeb_folder, img_name)
        img = cv2.imread(img_path)
        if img is None:
            continue
            
        total_images_processed += 1
        faces = app.get(img)
        if not faces:
            continue
            
        total_images_with_faces += 1
        celeb_faces_detected += len(faces)
        total_faces_detected += len(faces)
        
        # Strategy 1: Apply strict filters (det_thresh + size)
        strict_filtered = []
        for f in faces:
            if f.det_score >= det_thresh:
                x1, y1, x2, y2 = map(int, f.bbox[:4])
                w, h = x2-x1, y2-y1
                if w >= min_box_size and h >= min_box_size:
                    strict_filtered.append((f, w*h))
        
        selected_faces = []
        
        if strict_filtered:
            # Use strict criteria - sort by area and take top_n
            strict_filtered.sort(key=lambda tup: tup[1], reverse=True)
            selected_faces = [tup[0] for tup in strict_filtered[:top_n]]
            celeb_faces_strict += len(selected_faces)
            total_faces_after_strict_filter += len(selected_faces)
            
        else:
            # Fallback strategy: ensure every image contributes at least one face
            # Priority 1: Lower detection threshold but keep size requirement
            fallback_candidates = []
            for f in faces:
                if f.det_score >= fallback_det_thresh:
                    x1, y1, x2, y2 = map(int, f.bbox[:4])
                    w, h = x2-x1, y2-y1
                    if w >= min_box_size and h >= min_box_size:
                        fallback_candidates.append((f, w*h))
            
            if fallback_candidates:
                # Use lower det_thresh but same size requirement
                fallback_candidates.sort(key=lambda tup: tup[1], reverse=True)
                selected_faces = [fallback_candidates[0][0]]  # Take only the largest
            else:
                # Priority 2: Lower both detection threshold and size requirement
                fallback_candidates = []
                for f in faces:
                    if f.det_score >= fallback_det_thresh:
                        x1, y1, x2, y2 = map(int, f.bbox[:4])
                        w, h = x2-x1, y2-y1
                        if w >= fallback_min_size and h >= fallback_min_size:
                            fallback_candidates.append((f, w*h, f.det_score))
                
                if fallback_candidates:
                    # Sort by detection score (confidence), then by size
                    fallback_candidates.sort(key=lambda tup: (tup[2], tup[1]), reverse=True)
                    selected_faces = [fallback_candidates[0][0]]  # Take highest confidence
                else:
                    # Last resort: take the most confident face regardless of size
                    best_face = max(faces, key=lambda f: f.det_score)
                    selected_faces = [best_face]
            
            celeb_images_fallback += 1
            images_needing_fallback += 1
            total_faces_after_fallback += len(selected_faces)
        
        # Collect embeddings from selected faces
        for face in selected_faces:
            all_embs.append(face.normed_embedding)
    
    if all_embs:
        embedding_output[celeb_name] = np.stack(all_embs)
        
    # Print stats for this celebrity
    strict_embs = celeb_faces_strict
    fallback_embs = len(all_embs) - celeb_faces_strict
    print(f"{celeb_name}: {celeb_faces_detected} detected → {strict_embs} strict → {fallback_embs} fallback → {len(all_embs)} total embeddings ({celeb_images_fallback} images used fallback)")

# Save embedded data
with open(output_pkl, "wb") as f:
    pickle.dump(embedding_output, f)

# Print final statistics
print(f"\n" + "="*80)
print(f"PROCESSING COMPLETE")
print(f"="*80)
print(f"Strict detection threshold: {det_thresh}")
print(f"Fallback detection threshold: {fallback_det_thresh}")
print(f"Strict minimum box size: {min_box_size}px")
print(f"Fallback minimum box size: {fallback_min_size}px")
print(f"Top faces per image: {top_n}")
print(f"")
print(f"Total images processed: {total_images_processed}")
print(f"Images with detected faces: {total_images_with_faces}")
print(f"Images needing fallback strategy: {images_needing_fallback} ({images_needing_fallback/total_images_with_faces*100:.1f}%)")
print(f"")
print(f"Total faces detected: {total_faces_detected}")
print(f"Faces passing strict criteria: {total_faces_after_strict_filter}")
print(f"Faces added via fallback: {total_faces_after_fallback}")
print(f"Final embeddings saved: {sum(len(embs) for embs in embedding_output.values())}")
print(f"Celebrities with valid faces: {len(embedding_output)}")
print(f"")
print(f"Output saved to: {output_pkl}")



Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\peggy/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\peggy/.insightface\models\buffalo_l\2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\peggy/.insightface\models\buffalo_l\det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\peggy/.insightface\models\buffalo_l\genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: C:\Users\peggy/.insightface\models\buffalo_l\w600k_r50.onnx recognition ['None', 3, 112, 112] 127.

Angelina_Jolie: 100%|██████████| 6/6 [00:01<00:00,  4.51it/s]


Angelina_Jolie: 6 detected → 0 strict → 6 fallback → 6 total embeddings (6 images used fallback)


Brad_Pitt: 100%|██████████| 7/7 [00:01<00:00,  3.91it/s]


Brad_Pitt: 8 detected → 1 strict → 6 fallback → 7 total embeddings (6 images used fallback)


Chris_Evans: 100%|██████████| 8/8 [00:02<00:00,  2.71it/s]


Chris_Evans: 16 detected → 0 strict → 8 fallback → 8 total embeddings (8 images used fallback)


Chris_Hemsworth: 100%|██████████| 9/9 [00:02<00:00,  3.41it/s]


Chris_Hemsworth: 13 detected → 0 strict → 9 fallback → 9 total embeddings (9 images used fallback)


Cristiano_Ronaldo: 100%|██████████| 9/9 [00:02<00:00,  4.27it/s]


Cristiano_Ronaldo: 10 detected → 0 strict → 9 fallback → 9 total embeddings (9 images used fallback)


Dwayne_Johnson: 100%|██████████| 8/8 [00:02<00:00,  3.28it/s]


Dwayne_Johnson: 12 detected → 2 strict → 6 fallback → 8 total embeddings (6 images used fallback)


Emma_Stone: 100%|██████████| 10/10 [00:04<00:00,  2.44it/s]


Emma_Stone: 22 detected → 1 strict → 9 fallback → 10 total embeddings (9 images used fallback)


Emma_Watson: 100%|██████████| 12/12 [00:03<00:00,  3.53it/s]


Emma_Watson: 17 detected → 1 strict → 11 fallback → 12 total embeddings (11 images used fallback)


Gal_Gadot: 100%|██████████| 11/11 [00:03<00:00,  3.08it/s]


Gal_Gadot: 18 detected → 0 strict → 11 fallback → 11 total embeddings (11 images used fallback)


Hugh_Jackman: 100%|██████████| 11/11 [00:02<00:00,  3.77it/s]


Hugh_Jackman: 13 detected → 0 strict → 11 fallback → 11 total embeddings (11 images used fallback)


Jennifer_Aniston: 100%|██████████| 8/8 [00:03<00:00,  2.41it/s]


Jennifer_Aniston: 17 detected → 2 strict → 6 fallback → 8 total embeddings (6 images used fallback)


Jennifer_Lawrence: 100%|██████████| 8/8 [00:02<00:00,  3.37it/s]


Jennifer_Lawrence: 12 detected → 0 strict → 8 fallback → 8 total embeddings (8 images used fallback)


Johnny_Depp: 100%|██████████| 11/11 [00:02<00:00,  4.01it/s]


Johnny_Depp: 13 detected → 1 strict → 10 fallback → 11 total embeddings (10 images used fallback)


Julia_Roberts: 100%|██████████| 11/11 [00:03<00:00,  3.42it/s]


Julia_Roberts: 16 detected → 2 strict → 9 fallback → 11 total embeddings (9 images used fallback)


Keanu_Reeves: 100%|██████████| 11/11 [00:03<00:00,  3.32it/s]


Keanu_Reeves: 17 detected → 0 strict → 11 fallback → 11 total embeddings (11 images used fallback)


Leonardo_DiCaprio: 100%|██████████| 8/8 [00:02<00:00,  3.68it/s]


Leonardo_DiCaprio: 10 detected → 0 strict → 8 fallback → 8 total embeddings (8 images used fallback)


Margot_Robbie: 100%|██████████| 11/11 [00:03<00:00,  2.84it/s]


Margot_Robbie: 21 detected → 0 strict → 11 fallback → 11 total embeddings (11 images used fallback)


Meryl_Streep: 100%|██████████| 10/10 [00:02<00:00,  3.92it/s]


Meryl_Streep: 12 detected → 0 strict → 10 fallback → 10 total embeddings (10 images used fallback)


Morgan_Freeman: 100%|██████████| 10/10 [00:02<00:00,  4.13it/s]


Morgan_Freeman: 11 detected → 0 strict → 10 fallback → 10 total embeddings (10 images used fallback)


Natalie_Portman: 100%|██████████| 11/11 [00:02<00:00,  4.03it/s]


Natalie_Portman: 13 detected → 1 strict → 10 fallback → 11 total embeddings (10 images used fallback)


Rihanna: 100%|██████████| 7/7 [00:01<00:00,  3.77it/s]


Rihanna: 8 detected → 0 strict → 7 fallback → 7 total embeddings (7 images used fallback)


Robert_Downey_Jr: 100%|██████████| 10/10 [00:02<00:00,  3.76it/s]


Robert_Downey_Jr: 13 detected → 1 strict → 8 fallback → 9 total embeddings (8 images used fallback)


Samuel_L._Jackson: 100%|██████████| 8/8 [00:02<00:00,  3.94it/s]


Samuel_L._Jackson: 10 detected → 0 strict → 8 fallback → 8 total embeddings (8 images used fallback)


Scarlett_Johansson: 100%|██████████| 10/10 [00:02<00:00,  3.38it/s]


Scarlett_Johansson: 16 detected → 2 strict → 7 fallback → 9 total embeddings (7 images used fallback)


Selena_Gomez: 100%|██████████| 13/13 [00:03<00:00,  3.41it/s]


Selena_Gomez: 19 detected → 2 strict → 11 fallback → 13 total embeddings (11 images used fallback)


Taylor_Swift: 100%|██████████| 10/10 [00:05<00:00,  1.85it/s]


Taylor_Swift: 31 detected → 0 strict → 10 fallback → 10 total embeddings (10 images used fallback)


Tom_Hanks: 100%|██████████| 6/6 [00:01<00:00,  3.48it/s]


Tom_Hanks: 8 detected → 1 strict → 5 fallback → 6 total embeddings (5 images used fallback)


Will_Smith: 100%|██████████| 9/9 [00:02<00:00,  3.68it/s]


Will_Smith: 12 detected → 1 strict → 8 fallback → 9 total embeddings (8 images used fallback)


Zendaya: 100%|██████████| 10/10 [00:02<00:00,  3.65it/s]

Zendaya: 13 detected → 0 strict → 10 fallback → 10 total embeddings (10 images used fallback)

PROCESSING COMPLETE
Strict detection threshold: 0.9
Fallback detection threshold: 0.5
Strict minimum box size: 80px
Fallback minimum box size: 30px
Top faces per image: 3

Total images processed: 273
Images with detected faces: 271
Images needing fallback strategy: 253 (93.4%)

Total faces detected: 407
Faces passing strict criteria: 18
Faces added via fallback: 253
Final embeddings saved: 271
Celebrities with valid faces: 29

Output saved to: C:\Users\peggy\Desktop\FaceDetection\celebrity_face_embeddings_top3.pkl





# Cosine Similarity comparison - find the best threshold

In [75]:
import pickle
with open(r"C:\Users\peggy\Desktop\FaceDetection\celebrity_face_embeddings_top3.pkl", "rb") as f:
    embeddings_dict = pickle.load(f)

# Flatten to list of (label, embedding)
data = []
for label, embeddings in embeddings_dict.items():
    for emb in embeddings:
        data.append((label, emb))

labels, vectors = zip(*data)
labels = np.array(labels)
vectors = np.stack(vectors)

In [76]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import matthews_corrcoef, confusion_matrix

def find_matches(embedding, all_embeddings, threshold):
    sims = cosine_similarity([embedding], all_embeddings)[0]
    return sims >= threshold

def evaluate_threshold(threshold, vectors, labels):
    TP, FP, FN, TN = 0, 0, 0, 0

    for i in range(len(vectors)):
        anchor = vectors[i]
        label = labels[i]

        # Predict similar identities
        pred_mask = find_matches(anchor, vectors, threshold)

        # Ground truth: which embeddings belong to the same label
        true_mask = labels == label

        for j in range(len(vectors)):
            if i == j:
                continue
            pred = pred_mask[j]
            truth = true_mask[j]

            if pred and truth:
                TP += 1
            elif pred and not truth:
                FP += 1
            elif not pred and truth:
                FN += 1
            elif not pred and not truth:
                TN += 1

    mcc = matthews_corrcoef(
        [1]*TP + [1]*FP + [0]*FN + [0]*TN,
        [1]*TP + [0]*FP + [1]*FN + [0]*TN
    )

    return {
        "threshold": threshold,
        "TP": TP, "FP": FP, "FN": FN, "TN": TN,
        "TPR": TP / (TP + FN) if TP + FN > 0 else 0,
        "TNR": TN / (TN + FP) if TN + FP > 0 else 0,
        "MCC": mcc,
    }

best_result = None
for t in np.linspace(0.1, 0.9, 50):
    result = evaluate_threshold(t, vectors, labels)
    if best_result is None or result["MCC"] > best_result["MCC"]:
        best_result = result

# Print result
print(f"Best threshold by MCC: {best_result['threshold']:.4f}")
print(f"MCC = {best_result['MCC']:.4f}")
print(f"TPR (Recall) = {best_result['TPR']:.4f}, TNR (Specificity) = {best_result['TNR']:.4f}")
print(f"Confusion matrix: TP={best_result['TP']}, FP={best_result['FP']}, FN={best_result['FN']}, TN={best_result['TN']}")

Best threshold by MCC: 0.3122
MCC = 0.9041
TPR (Recall) = 0.8390, TNR (Specificity) = 0.9994
Confusion matrix: TP=1970, FP=40, FN=378, TN=70782
