# Phase 4: Face Detection & Recognition Evaluation
Using RetinaFace for detection, ArcFace for feature extraction, and FAISS for matching.

**Metrics**: Recognition Accuracy, FAR, FRR, Latency
**Datasets**: LFW, WiderFace (preprocessed)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
PROJECT_DIR = '/content/drive/MyDrive/computer_vision'
DATASETS_DIR = f'{PROJECT_DIR}/datasets'
RESULTS_DIR = f'{PROJECT_DIR}/results/phase4'
os.makedirs(RESULTS_DIR, exist_ok=True)

%cd /content
!rm -rf computer_vision_expirement
!git clone https://github.com/Ib-Programmer/computer_vision_expirement.git
%cd computer_vision_expirement

!pip install -q insightface onnxruntime-gpu faiss-gpu
!pip install -q -r requirements.txt

## 4.1 Face Detection with RetinaFace

In [None]:
import cv2
import numpy as np
from insightface.app import FaceAnalysis

# Initialize face analysis (RetinaFace + ArcFace)
app = FaceAnalysis(name='buffalo_l', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
app.prepare(ctx_id=0, det_size=(640, 640))
print("InsightFace loaded: RetinaFace (detection) + ArcFace (recognition)")

In [None]:
import glob
import time

lfw_test = sorted(glob.glob(f'{DATASETS_DIR}/lfw_processed/test/*.jpg'))
print(f"LFW test images: {len(lfw_test)}")

detection_results = []
detection_times = []

for img_path in lfw_test[:200]:  # Process 200 images
    img = cv2.imread(img_path)
    if img is None:
        continue
    
    start = time.time()
    faces = app.get(img)
    elapsed = (time.time() - start) * 1000
    
    detection_times.append(elapsed)
    detection_results.append({
        'path': img_path,
        'num_faces': len(faces),
        'time_ms': elapsed
    })

print(f"Processed: {len(detection_results)} images")
print(f"Avg detection time: {np.mean(detection_times):.1f} ms")
print(f"Faces found: {sum(r['num_faces'] for r in detection_results)}")

## 4.2 Feature Extraction (ArcFace Embeddings)

In [None]:
# Extract face embeddings from detected faces
embeddings_db = []
labels_db = []

print("Extracting face embeddings from LFW...")
for img_path in lfw_test[:200]:
    img = cv2.imread(img_path)
    if img is None:
        continue
    
    faces = app.get(img)
    for face in faces:
        embedding = face.embedding  # 512-d ArcFace embedding
        label = os.path.basename(os.path.dirname(img_path)) if '/' in img_path else os.path.basename(img_path)
        
        embeddings_db.append(embedding)
        labels_db.append(label)

embeddings_db = np.array(embeddings_db).astype('float32')
print(f"Extracted {len(embeddings_db)} embeddings, shape: {embeddings_db.shape}")

## 4.3 Face Matching with FAISS

In [None]:
import faiss

if len(embeddings_db) > 0:
    # Normalize embeddings for cosine similarity
    faiss.normalize_L2(embeddings_db)
    
    # Build FAISS index
    dimension = embeddings_db.shape[1]  # 512
    index = faiss.IndexFlatIP(dimension)  # Inner product (cosine after normalization)
    index.add(embeddings_db)
    
    print(f"FAISS index built: {index.ntotal} vectors, {dimension}-d")
    
    # Search: query each embedding against the database
    k = 5  # top-5 matches
    search_start = time.time()
    distances, indices = index.search(embeddings_db[:50], k)  # Query first 50
    search_time = (time.time() - search_start) * 1000
    
    print(f"Search time for 50 queries: {search_time:.1f} ms")
    print(f"Avg per query: {search_time/50:.2f} ms")
    
    # Show sample results
    print("\nSample matches (query -> top match, similarity):")
    for i in range(min(5, len(distances))):
        print(f"  Query {i}: match_idx={indices[i][1]}, similarity={distances[i][1]:.4f}")
else:
    print("No embeddings extracted. Check face detection results.")

## 4.4 Evaluate Recognition Performance

In [None]:
import pandas as pd

def evaluate_recognition(embeddings, labels, thresholds=np.arange(0.1, 1.0, 0.05)):
    """Evaluate face recognition at different thresholds."""
    faiss.normalize_L2(embeddings.copy())
    
    results = []
    n = len(embeddings)
    
    for threshold in thresholds:
        tp, fp, tn, fn = 0, 0, 0, 0
        
        for i in range(min(n, 100)):  # Sample pairs
            for j in range(i+1, min(n, 100)):
                sim = np.dot(embeddings[i], embeddings[j])
                same_person = (labels[i] == labels[j])
                
                if sim >= threshold:
                    if same_person:
                        tp += 1
                    else:
                        fp += 1
                else:
                    if same_person:
                        fn += 1
                    else:
                        tn += 1
        
        total_genuine = tp + fn
        total_impostor = fp + tn
        
        far = fp / max(total_impostor, 1)
        frr = fn / max(total_genuine, 1)
        accuracy = (tp + tn) / max(tp + fp + tn + fn, 1)
        
        results.append({
            'Threshold': round(threshold, 2),
            'Accuracy': round(accuracy, 4),
            'FAR': round(far, 4),
            'FRR': round(frr, 4),
        })
    
    return pd.DataFrame(results)

if len(embeddings_db) > 0:
    eval_df = evaluate_recognition(embeddings_db, labels_db)
    print("Recognition Performance at Different Thresholds:")
    print(eval_df.to_string(index=False))
    eval_df.to_csv(f'{RESULTS_DIR}/recognition_metrics.csv', index=False)

## 4.5 Test Robustness Under Outdoor Conditions

In [None]:
import matplotlib.pyplot as plt

conditions = ['fog', 'low_light', 'motion_blur', 'rain']
condition_results = []

aug_base = f'{PROJECT_DIR}/outputs/augmented/lfw/train'

for condition in conditions:
    aug_dir = f'{aug_base}/{condition}'
    if not os.path.exists(aug_dir):
        print(f"[SKIP] {condition}: augmented data not found")
        continue
    
    aug_images = sorted(glob.glob(f'{aug_dir}/*.jpg'))[:50]
    detected = 0
    total = 0
    
    for img_path in aug_images:
        img = cv2.imread(img_path)
        if img is None:
            continue
        total += 1
        faces = app.get(img)
        if len(faces) > 0:
            detected += 1
    
    rate = detected / max(total, 1) * 100
    condition_results.append({'Condition': condition, 'Images': total, 'Detected': detected, 'Rate': f'{rate:.1f}%'})
    print(f"  {condition}: {detected}/{total} faces detected ({rate:.1f}%)")

if condition_results:
    cond_df = pd.DataFrame(condition_results)
    print("\nRobustness Summary:")
    print(cond_df.to_string(index=False))
    cond_df.to_csv(f'{RESULTS_DIR}/robustness_analysis.csv', index=False)

In [None]:
print(f"\nPhase 4 results saved to: {RESULTS_DIR}")
print("Next: Open Phase5_Model_Optimization.ipynb")