# Import Dependencies

In [65]:
import os
import cv2

import insightface
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import umap.umap_ as umap

from xgboost import XGBClassifier
from tqdm import tqdm
from insightface.app import FaceAnalysis
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report

In [3]:
CONFIG = {
    "APP_MODEL": "buffalo_l",
    "APP_CONFIG": {
        "ctx_id": 0,
        "det_size": (320, 320),
        "det_thresh": 0.2,
    },
    "RANDOM_STATE": 42,
}

In [4]:
main_dir = "dataset"

train_dir = os.path.join(main_dir, "train")
train_labels_dir = os.path.join(train_dir, "labels.csv")
test_dir = os.path.join(main_dir, "test")
ref_dir = os.path.join(main_dir, "reference_faces")

In [5]:
train_labels = pd.read_csv(train_labels_dir)
train_labels.head()

Unnamed: 0,filename,emp_id
0,face_0568.jpg,emp016
1,face_0433.jpg,emp014
2,face_1751.jpg,emp004
3,face_0675.jpg,emp028
4,face_0112.jpg,emp001


# Create Reference Embeddings

In [6]:
app = FaceAnalysis(name=CONFIG["APP_MODEL"], providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
app.prepare(**CONFIG["APP_CONFIG"])

def robust_face_detection(img, app, attempts=3):
    if img is None:
        return None
        
    strategies = [
        lambda x: x,  # Original
        lambda x: cv2.convertScaleAbs(x, alpha=1.5, beta=40),  # Brighten
        lambda x: cv2.equalizeHist(cv2.cvtColor(x, cv2.COLOR_BGR2GRAY))[:,:,np.newaxis].repeat(3,2),  # Hist equal
        lambda x: cv2.GaussianBlur(x, (5,5), 0),  # De-noise
        lambda x: cv2.medianBlur(x, 3),  # Alternative de-noise
    ]
    
    for i in range(attempts):
        try:
            modified = strategies[i](img) if i < len(strategies) else img
            faces = app.get(modified)
            if len(faces) > 0:
                return faces
        except Exception as e:
            print(f"Detection attempt {i+1} failed: {str(e)}")
            continue
    return None

def create_reference_embeddings(ref_dir, ref_labels):
    gallery = {emp_id: [] for emp_id in ref_labels}

    for emp_id in tqdm(gallery.keys(), desc="Creating Embeddings"):
        emp_dir = os.path.join(ref_dir, emp_id)
        if not os.path.isdir(emp_dir):
            continue

        for file in tqdm(sorted(os.listdir(emp_dir)), desc=f"Embedding {emp_id}"):
            if file.lower().endswith('.mp4'):
                continue
            filepath = os.path.join(emp_dir, file)
            try:
                img = cv2.imread(filepath)
                if img is None:
                    continue
                faces = robust_face_detection(img, app)
                if faces:
                    embedding = faces[0].embedding / np.linalg.norm(faces[0].embedding)
                    gallery[emp_id].append(embedding)
                else:
                    print(f"Warning: {filepath} - {len(faces)} faces detected")
            except Exception as e:
                print(f"Error processing {filepath}: {str(e)}")
                continue
        if gallery[emp_id]:
            gallery[emp_id] = np.array(gallery[emp_id])
        else:
            print(f"Warning: No valid faces found for {emp_id}")
            del gallery[emp_id]
    return gallery

ref_embed = create_reference_embeddings(ref_dir, sorted(os.listdir(ref_dir)))



Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/seang/.insightface/models/buffalo_l/1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/seang/.insightface/models/buffalo_l/2d106det.onnx landmark_2d_106 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/seang/.insightface/models/buffalo_l/det_10g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/seang/.insightface/models/buffalo_l/genderage.onnx genderage ['None', 3, 96, 96] 0.0 1.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/seang/.insightface/models/buffalo_l/w600k_r50.onnx recognition ['None', 3, 112, 112] 127.5 127.5
set det

  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4
Embedding emp001: 100%|██████████| 13/13 [00:03<00:00,  4.04it/s]
Embedding emp002: 100%|██████████| 13/13 [00:02<00:00,  4.42it/s]t]
Embedding emp003: 100%|██████████| 11/11 [00:02<00:00,  4.25it/s]t]
Embedding emp004: 100%|██████████| 15/15 [00:04<00:00,  3.20it/s]t]
Embedding emp005: 100%|██████████| 14/14 [00:03<00:00,  3.58it/s]t]
Embedding emp006: 100%|██████████| 8/8 [00:02<00:00,  3.02it/s]/it]
Embedding emp007: 100%|██████████| 14/14 [00:03<00:00,  4.06it/s]t]
Embedding emp008: 100%|██████████| 15/15 [00:03<00:00,  3.93it/s]t]
Embedding emp009: 100%|██████████| 17/17 [00:03<00:00,  4.32it/s]t]
Embedding emp010: 100%|██████████| 15/15 [00:03<00:00,  4.19it/s]t]
Embedding emp011: 100%|██████████| 11/11 [00:02<00:00,  3.92it/s]it]
Embedding emp012: 100%|██████████| 14/14 [00:03<00:00,  3.89it/s]it]
Embedding emp013: 100%|██████████| 16/16 [00:06<00:00,  2.47it/s]it]
Embedding emp014: 100%|██████████| 15/15 [00:04<00:00,

In [11]:
gallery_embed = np.vstack(list(ref_embed.values()))
gallery_labels = []

for id in ref_embed.keys():
    for n in range(len(ref_embed[id])):
        gallery_labels.append(id)

len(gallery_embed), len(gallery_labels)

(452, 452)

# Create Train Embeddings

In [8]:
def create_train_embeddings(train_dir, labels_df, normalize=True):
    train_data = {"embedding": [], "label": []}
    missing = []
    img_path = os.path.join(train_dir, "images")
    for _, row in tqdm(labels_df.iterrows(), desc="Creating Train Embeddings", total=len(labels_df)):
        filename, emp_id = row
        filepath = os.path.join(img_path, filename)
        img = cv2.imread(filepath)
        faces = robust_face_detection(img, app)
        if faces:
            embedding = faces[0].embedding
            if normalize:
                embedding /= np.linalg.norm(embedding)
            train_data['embedding'].append(embedding)
            train_data['label'].append(emp_id)
        else:
            train_data['embedding'].append(np.random.normal(0, 0.01, 512))
            train_data['label'].append('UNKNOWN')
            missing.append((emp_id, filepath))
    return train_data, missing

train_dict, train_missing = create_train_embeddings(train_dir, train_labels)

  P = np.linalg.lstsq(X_homo, Y)[0].T # Affine matrix. 3 x 4
Creating Train Embeddings: 100%|██████████| 1179/1179 [05:08<00:00,  3.83it/s]


In [9]:
print(f"Training images used (%): {((len(train_labels) - len(train_missing)) / len(train_labels)) * 100:.2f}")

Training images used (%): 94.40


# Train-Validation Split

In [12]:
classes = np.unique(train_dict['label'] + gallery_labels)
le = LabelEncoder()
le.fit(classes)

train_embed = np.array(train_dict['embedding'])
train_labels = le.transform(train_dict['label'])

X = np.vstack([train_embed, gallery_embed])
y = np.hstack([train_labels, le.transform(gallery_labels)])

In [47]:
class CrossValidator:
    def __init__(self, models, metric_fns, cv_method, name=None, verbose=True):
        self.models = models
        self.metric_fns = metric_fns
        self.cv_method = cv_method
        self.name = name
        self.verbose = verbose
        self.results = {}

    def _calculate_metrics(self, y_true, y_pred):
        results = {}
        for name, fn in self.metric_fns:
            try:
                results[name] = fn(y_true, y_pred)
            except Exception as e:
                print(f"Metric {name} failed: {str(e)}")
                results[name] = np.nan
        return results

    def fit(self, X, y):
        self.results = {model[0]: [] for model in self.models}
        
        for fold, (train_idx, test_idx) in enumerate(self.cv_method.split(X, y)):
            if self.verbose:
                print(f"\nFold {fold + 1}/{self.cv_method.n_splits}")
                print("-"*40)
            
            X_train, X_test = X[train_idx], X[test_idx]
            y_train, y_test = y[train_idx], y[test_idx]
            
            for name, model in self.models:
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                
                metrics = self._calculate_metrics(y_test, y_pred)
                self.results[name].append(metrics)
                
                if self.verbose:
                    print(f"- {name}:")
                    for m, v in metrics.items():
                        print(f"  {m}: {v:.4f}")
                    print("\n")

    def summarize(self):
        summary = {}
        for name in self.results.keys():
            fold_results = pd.DataFrame(self.results[name])
            summary[name] = {
                'mean': fold_results.mean(),
                'std': fold_results.std()
            }
        return summary

In [73]:
def macro_accuracy(y_true, y_pred):
    unique_classes = np.unique(y_true)
    acc_per_class = []
    
    for c in unique_classes:
        class_mask = y_true == c
        correct = np.sum((y_pred == y_true) & class_mask)
        acc = correct / np.sum(class_mask)
        acc_per_class.append(acc)
        
    return np.mean(acc_per_class) if acc_per_class else 0


models = [
    ("SVM", SVC(kernel='linear', C=0.5, probability=True, class_weight="balanced", random_state=CONFIG["RANDOM_STATE"])),
    ("LogisticRegression", LogisticRegression(solver="lbfgs", class_weight="balanced", random_state=CONFIG["RANDOM_STATE"])),
    ("RidgeClassifier", RidgeClassifier(alpha=1.0, class_weight="balanced")),
    ("KNeighborsClassifier", KNeighborsClassifier(n_neighbors=3, metric='cosine', weights='distance')),
]

metric_fns = [
    ('macro_accuracy', macro_accuracy),
    ('accuracy', accuracy_score)
]

In [74]:
cv = CrossValidator(
    models=models, 
    metric_fns=metric_fns,
    cv_method=StratifiedKFold(
        n_splits=20, shuffle=True, random_state=CONFIG["RANDOM_STATE"]), 
    name="FaceRecognition CV"
)

cv.fit(X, y)




Fold 1/20
----------------------------------------
- SVM:
  macro_accuracy: 0.9583
  accuracy: 0.9512


- LogisticRegression:
  macro_accuracy: 0.9681
  accuracy: 0.9634


- RidgeClassifier:
  macro_accuracy: 0.9681
  accuracy: 0.9634


- KNeighborsClassifier:
  macro_accuracy: 0.8382
  accuracy: 0.8293



Fold 2/20
----------------------------------------
- SVM:
  macro_accuracy: 0.9108
  accuracy: 0.8902


- LogisticRegression:
  macro_accuracy: 0.9353
  accuracy: 0.9146


- RidgeClassifier:
  macro_accuracy: 0.9279
  accuracy: 0.9024


- KNeighborsClassifier:
  macro_accuracy: 0.8887
  accuracy: 0.8537



Fold 3/20
----------------------------------------
- SVM:
  macro_accuracy: 0.9059
  accuracy: 0.9146


- LogisticRegression:
  macro_accuracy: 0.8985
  accuracy: 0.9024


- RidgeClassifier:
  macro_accuracy: 0.8912
  accuracy: 0.8902


- KNeighborsClassifier:
  macro_accuracy: 0.8701
  accuracy: 0.8780



Fold 4/20
----------------------------------------
- SVM:
  macro_accuracy:

In [75]:
results = cv.summarize()

for model_name, metrics in results.items():
    print(f"\n{model_name}:")
    for metric, stats in metrics['mean'].items():
        print(f"  {metric}: {stats:.4f} ± {metrics['std'][metric]:.4f}")


SVM:
  macro_accuracy: 0.8783 ± 0.0477
  accuracy: 0.8767 ± 0.0400

LogisticRegression:
  macro_accuracy: 0.8907 ± 0.0454
  accuracy: 0.8896 ± 0.0406

RidgeClassifier:
  macro_accuracy: 0.8957 ± 0.0390
  accuracy: 0.8901 ± 0.0363

KNeighborsClassifier:
  macro_accuracy: 0.8502 ± 0.0477
  accuracy: 0.8338 ± 0.0422
