# Face Recognition Login Demo with Cancelable Biometrics & Fuzzy Commitment
## End-to-End System using LFW Dataset

This notebook implements a complete face verification system that simulates a login experience.
**Key Features:**
- **Face Embeddings**: Uses `facenet-pytorch` (InceptionResnetV1) to convert faces to 512-d vectors.
- **Cancelable Biometrics**: Implements a seed-based permutation/transformation to protect raw templates.
- **Fuzzy Commitment**: Adds an extra layer of security using Error Correcting Codes (Reed-Solomon) to bind a cryptographic key to the biometric data.
- **Performance Evaluation**: Calculates FAR, FRR, and EER for both baseline and secured systems.
- **Interactive UI**: A Gradio web app running inside the notebook.


## 1. Setup and Imports

In [1]:
# Robust installation
!pip install --upgrade -q pip setuptools wheel
!pip install -q gradio reedsolo
!pip install -q facenet-pytorch --no-deps torchvision

import os
import random
import numpy as np
import pandas as pd
import cv2
from PIL import Image
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.metrics.pairwise import cosine_similarity

import torch
from facenet_pytorch import InceptionResnetV1, MTCNN
import reedsolo

import gradio as gr

# Reproducibility
SEED = 42
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True

seed_everything(SEED)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


  return torch._C._cuda_getDeviceCount() > 0


## 2. Dataset Loading
Scanning the LFW dataset folder to build an index of identities and their images.

In [2]:
LFW_DIR = './lfw_funneled'

if not os.path.exists(LFW_DIR):
    if os.path.exists('archive.zip'):
        print("Unzipping dataset...")
        !unzip -q archive.zip
    else:
        print("Warning: LFW dataset not found.")

def scan_dataset(root_dir):
    paths = []
    identities = []
    if not os.path.exists(root_dir):
        return pd.DataFrame(columns=['name', 'path'])
    for person_name in sorted(os.listdir(root_dir)):
        person_dir = os.path.join(root_dir, person_name)
        if os.path.isdir(person_dir):
            images = sorted([f for f in os.listdir(person_dir) if f.lower().endswith(('.jpg', '.png'))])
            for img in images:
                paths.append(os.path.join(person_dir, img))
                identities.append(person_name)
    return pd.DataFrame({'name': identities, 'path': paths})

df_lfw = scan_dataset(LFW_DIR)
print(f"Found {len(df_lfw)} images for {df_lfw['name'].nunique()} identities.")

def get_images_of_person(name, max_imgs=5):
    subset = df_lfw[df_lfw['name'] == name]
    return subset['path'].head(max_imgs).tolist()

# People with enough images for enrolling + testing
people_counts = df_lfw['name'].value_counts()
enrolled_candidates = people_counts[people_counts >= 2].index.tolist()
print(f"People with >= 2 images: {len(enrolled_candidates)}")

Found 13233 images for 5749 identities.
People with >= 2 images: 1680


## 3. Face Embedding Model
Loading FaceNet (InceptionResnetV1).

In [3]:
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)
mtcnn = MTCNN(image_size=160, margin=0, keep_all=False, device=device)

def get_embedding(img_path_or_array):
    if isinstance(img_path_or_array, str):
        try:
            img = Image.open(img_path_or_array)
        except Exception:
            return None
    elif isinstance(img_path_or_array, np.ndarray):
        img = Image.fromarray(cv2.cvtColor(img_path_or_array, cv2.COLOR_BGR2RGB))
    else:
        img = img_path_or_array
    
    try:
        img_cropped = mtcnn(img)
    except Exception:
        return None
        
    if img_cropped is not None:
        img_tensor = img_cropped.unsqueeze(0).to(device)
        with torch.no_grad():
            embedding = resnet(img_tensor).cpu().numpy().flatten()
        return embedding
    return None

print("Model loaded.")

Model loaded.


## 4. Baseline Performance Evaluation
Simulating the LFW verification protocol to establish a baseline (FAR at different thresholds).

In [4]:
# Load Pairs
PAIRS_PATH = 'pairsDevTest.txt'
if not os.path.exists(PAIRS_PATH):
    # Fallback to creating random pairs from existing data if file missing
    print("Pairs file not found. Generatig random pairs for eval...")
    def generate_pairs(df, num_pairs=500):
        pairs = []
        # Genuine
        multi_img_people = df['name'].value_counts()[df['name'].value_counts() >= 2].index
        for _ in range(num_pairs // 2):
            p = np.random.choice(multi_img_people)
            imgs = df[df['name'] == p]['path'].sample(2).tolist()
            pairs.append({'p1': imgs[0], 'p2': imgs[1], 'label': 1})
        # Impostor
        for _ in range(num_pairs // 2):
            p1, p2 = np.random.choice(df['name'].unique(), 2, replace=False)
            img1 = df[df['name'] == p1]['path'].sample(1).iloc[0]
            img2 = df[df['name'] == p2]['path'].sample(1).iloc[0]
            pairs.append({'p1': img1, 'p2': img2, 'label': 0})
        return pd.DataFrame(pairs)
    df_pairs = generate_pairs(df_lfw)
else:
    print(f"Loading pairs from {PAIRS_PATH}")
    pairs_list = []
    with open(PAIRS_PATH, 'r') as f:
        lines = f.readlines()[1:] # Skip header
        for line in lines:
            parts = line.strip().split()
            if len(parts) == 3:
                name = parts[0]
                # Construct paths assuming lfw structure
                p1 = os.path.join(LFW_DIR, name, f"{name}_{int(parts[1]):04d}.jpg")
                p2 = os.path.join(LFW_DIR, name, f"{name}_{int(parts[2]):04d}.jpg")
                pairs_list.append({'p1': p1, 'p2': p2, 'label': 1})
            elif len(parts) == 4:
                name1, idx1, name2, idx2 = parts
                p1 = os.path.join(LFW_DIR, name1, f"{name1}_{int(idx1):04d}.jpg")
                p2 = os.path.join(LFW_DIR, name2, f"{name2}_{int(idx2):04d}.jpg")
                pairs_list.append({'p1': p1, 'p2': p2, 'label': 0})
    df_pairs = pd.DataFrame(pairs_list)

print(f"Evaluated on {len(df_pairs)} pairs.")

def evaluate_baseline(df_pairs):
    scores = []
    labels = []
    
    # print("Computing embeddings for evaluation pairs...")
    # Cache embeddings to avoid re-computing
    cache = {}
    
    for idx, row in tqdm(df_pairs.iterrows(), total=len(df_pairs)):
        p1, p2, label = row['p1'], row['p2'], row['label']
        
        if p1 not in cache: cache[p1] = get_embedding(p1)
        if p2 not in cache: cache[p2] = get_embedding(p2)
        
        emb1 = cache[p1]
        emb2 = cache[p2]
        
        if emb1 is not None and emb2 is not None:
            score = cosine_similarity([emb1], [emb2])[0][0]
            scores.append(score)
            labels.append(label)
            
    fpr, tpr, thresholds = roc_curve(labels, scores)
    roc_auc = auc(fpr, tpr)
    
    # Calculate EER
    fnr = 1 - tpr
    eer_threshold = thresholds[np.nanargmin(np.absolute((fnr - fpr)))]
    EER = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Baseline ROC')
    plt.legend(loc="lower right")
    plt.show()
    
    print(f"Baseline EER: {EER:.4f} at Threshold: {eer_threshold:.4f}")
    return eer_threshold

# Run Baseline Eval (Uncomment to run automatically separately)
# baseline_threshold = evaluate_baseline(df_pairs.sample(200)) # Sample for speed

Loading pairs from pairsDevTest.txt
Evaluated on 1000 pairs.


## 5. Security Layer 1: Cancelable Biometrics (BioHashing)
Transforming embeddings using a seed-based permutation.

In [5]:
def protect_template_biohash(embedding, seed):
    if embedding is None: return None
    np.random.seed(int(seed))
    perm = np.random.permutation(len(embedding))
    return embedding[perm]

def verify_biohash(probe, ref, seed, threshold):
    if probe is None or ref is None: return 0.0, False
    prot_probe = protect_template_biohash(probe, seed)
    score = cosine_similarity([prot_probe], [ref])[0][0]
    return score, score >= threshold

## 6. Security Layer 2: Fuzzy Commitment Scheme
Using Error Correcting Codes (Reed-Solomon) to bind a key to the face data.

**Steps:**
1. **Binarization**: Convert float embedding to binary string (Feature Vector $F$).
2. **Enrollment**:
   - Generate random Key $K$.
   - ECC Encode $K$ to get Codeword $C$.
   - Calculate Helper Data $H = F \oplus C$ (XOR).
   - Store $Hash(K)$ and $H$.
3. **Authentication**:
   - Compute Feature $F'$ from probe.
   - Retrieve $H$.
   - Calculate Candidate Codeword $C' = F' \oplus H = F' \oplus (F \oplus C) = (F' \oplus F) \oplus C = E \oplus C$ (where $E$ is error).
   - ECC Decode $C'$ to retrieve $K'$.
   - Verify $Hash(K') == Hash(K)$.

In [6]:
# Binarization of Embedding
def binarize_embedding(embedding):
    # Simple sign-based binarization relative to median/mean
    if embedding is None: return None
    threshold = np.median(embedding)
    binary = (embedding > threshold).astype(int)
    return binary

# Fuzzy Commitment Class
class FuzzyCommitment:
    def __init__(self, key_len=16, error_correction_bytes=16):
        self.rsc = reedsolo.RSCodec(error_correction_bytes)
        self.key_len = key_len
        
    def enroll(self, feature_vector):
        # 1. Generate Random Key
        key = os.urandom(self.key_len)
        
        # 2. Encode Key to Codeword
        # RSCodec encodes bytes. We need a codeword length <= feature length
        # Feature vector (512 bits) -> can hide ~64 bytes of data max if 1-to-1
        # ReedSolo adds EC bytes. Total len = key_len + ec_bytes
        codeword = self.rsc.encode(key)
        codeword_bits = np.unpackbits(np.frombuffer(codeword, dtype=np.uint8))
        
        # Pad or Truncate feature to match codeword length
        # FaceNet 512 dimensions -> 512 bits.
        # If codeword is smaller, we replicate or pad. If larger, we need more features.
        # For Demo: using 512 features.
        if len(feature_vector) < len(codeword_bits):
            # print(f"Warning: Feature vector too short ({len(feature_vector)}) for codeword ({len(codeword_bits)})")
            return None, None
            
        # Align lengths
        feat_bits = feature_vector[:len(codeword_bits)]
        
        # 3. Helper Data (XOR)
        helper = np.bitwise_xor(feat_bits, codeword_bits)
        
        key_hash = hash(key) # Simple hash for demo
        return key_hash, helper
        
    def authenticate(self, helper, probe_vector, stored_hash):
        # 1. Align Probe
        codeword_len = len(helper)
        if len(probe_vector) < codeword_len:
            return False
        probe_bits = probe_vector[:codeword_len]
        
        # 2. XOR to recover noisy codeword
        noisy_codeword_bits = np.bitwise_xor(probe_bits, helper)
        noisy_codeword_bytes = np.packbits(noisy_codeword_bits).tobytes()
        
        # 3. Decode
        try:
            decoded_key, _, _ = self.rsc.decode(noisy_codeword_bytes)
            # 4. Verify Hash
            if hash(decoded_key) == stored_hash:
                return True
        except reedsolo.ReedSolomonError:
            pass
            
        return False

# Initialize global FC scheme
fc_system = FuzzyCommitment(key_len=16, error_correction_bytes=16)
# Note: 16 bytes key + 16 bytes EC = 32 bytes = 256 bits. Well within 512-bit embedding.

## 7. Interactive System (Updated)

In [7]:
# Global State Update
DB = {} 
SYSTEM_SEED = 12345
LOGS = []

def enroll(name, img, modes):
    raw = get_embedding(img)
    if raw is None: return "Face fail", {}, gr.update()
    if not modes: return "Please select a security mode", {}, gr.update()
    
    record = {'img': img}
    info = {}

    if "BioHash" in modes:
        tmpl = protect_template_biohash(raw, SYSTEM_SEED)
        record['biohash'] = tmpl
        info['BioHash'] = str(tmpl.shape)
    
    if "FuzzyCommitment" in modes:
        binary = binarize_embedding(raw)
        key_hash, helper = fc_system.enroll(binary)
        if key_hash is None: return "Enroll fail (dim error)", {}, gr.update()
        record['fc'] = {'hash': key_hash, 'helper': helper}
        info['FC_Helper'] = len(helper)
        
    DB[name] = record
    return f"Enrolled {name} with {modes}", info, gr.update(choices=list(DB.keys()))

def authenticate(name, img, threshold):
    if not name: return "Please select a user", 0.0, False
    if name not in DB: return "User not found", 0.0, False
    
    record = DB[name]
    raw = get_embedding(img)
    if raw is None: return "Face fail", 0.0, False
    
    results = []
    decisions = []
    
    if 'biohash' in record:
        score, decision = verify_biohash(raw, record['biohash'], SYSTEM_SEED, threshold)
        results.append(f"BioHash: {'Match' if decision else 'Fail'} ({score:.3f})")
        decisions.append(decision)
        
    if 'fc' in record:
        binary = binarize_embedding(raw)
        fc_rec = record['fc']
        decision = fc_system.authenticate(fc_rec['helper'], binary, fc_rec['hash'])
        results.append(f"FC: {'Success' if decision else 'Fail'}")
        decisions.append(decision)
    
    final_str = " | ".join(results)
    final_bool = any(decisions) if decisions else False
    
    return final_str, 0.0, final_bool

# Setup Gradio
with gr.Blocks() as app:
    gr.Markdown("# Secure Face Login (BioHash & Fuzzy Commitment)")
    
    with gr.Tab("Enroll"):
        gr.Markdown("Select a person from the dataset or upload your own image.")
        with gr.Row():
            with gr.Column():
                # Limit dropdown to top 200 for performance
                top_candidates = enrolled_candidates[:200]
                en_dropdown = gr.Dropdown(choices=top_candidates, label="Select Person from LFW", filterable=True)
                en_img = gr.Image(type="filepath", label="Face Image")
                if not top_candidates: print("Warning: Candidate list is empty!")
            with gr.Column():
                en_name = gr.Textbox(label="User ID (Name)")
                en_mode = gr.CheckboxGroup(["BioHash", "FuzzyCommitment"], label="Security Modes", value=["BioHash"])
                en_btn = gr.Button("Enroll", variant="primary")
        
        en_out = gr.JSON(label="Enrollment Info")
        en_res = gr.Textbox(label="Status")
        
        # Dropdown logic
        def on_select_person(name):
            if not name: return None, ""
            imgs = get_images_of_person(name)
            if not imgs: return None, name
            return imgs[0], name
            
        en_dropdown.change(on_select_person, en_dropdown, [en_img, en_name])
        en_btn.click(enroll, [en_name, en_img, en_mode], [en_res, en_out, 
                                                          gr.Dropdown(label="Claimed Identity (Enrolled Users)", choices=[], interactive=True)]) 
        # Note: We need a handle to the auth dropdown output in the enroll click, 
        # but since 'au_name' is defined in the next tab scope block, we can't reference it easily 
        # UNLESS we define components first. Let's restructure slightly to ensure Variable visibility.

    with gr.Tab("Authenticate"):
        with gr.Row():
            # Dropdown for enrolled users (starts empty)
            au_name = gr.Dropdown(label="Claimed Identity (Enrolled Users)", choices=[], interactive=True)
            au_img = gr.Image(type="filepath")
            au_thresh = gr.Slider(0.0, 1.0, 0.6, label="BioHash Threshold")
            au_btn = gr.Button("Login", variant="primary")
        au_res = gr.Textbox(label="Result")
        au_btn.click(authenticate, [au_name, au_img, au_thresh], [au_res, gr.Number(label="Score", visible=False), gr.State()])

    # Link Enroll button to Auth Dropdown update (Need to act on the au_name object defined above)
    en_btn.click(enroll, [en_name, en_img, en_mode], [en_res, en_out, au_name])

    with gr.Tab("Evaluation"):
        gr.Markdown("## Comparative Performance Evaluation")
        ev_modes = gr.CheckboxGroup(["Baseline", "BioHash", "FuzzyCommitment"], label="Select Methods to Evaluate", value=["Baseline"])
        ev_btn = gr.Button("Run Comparative Evaluation")
        ev_out_text = gr.Markdown(label="Results Summary")
        ev_plot = gr.Plot(label="ROC Curves")
        
        def run_comparative_eval(modes):
            # Subset for speed
            subset = df_pairs.sample(min(200, len(df_pairs)))
            results_text = "### Evaluation Results\n"
            plt.figure(figsize=(8, 6))
            
            # 1. Baseline
            if "Baseline" in modes:
                # print("Running Baseline...")
                scores, labels = [], []
                for _, row in subset.iterrows():
                    e1, e2 = get_embedding(row['p1']), get_embedding(row['p2'])
                    if e1 is not None and e2 is not None:
                        scores.append(cosine_similarity([e1], [e2])[0][0])
                        labels.append(row['label'])
                if scores:
                    fpr, tpr, thresholds = roc_curve(labels, scores)
                    roc_auc = auc(fpr, tpr)
                    # EER
                    fnr = 1 - tpr
                    eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
                    plt.plot(fpr, tpr, label=f'Baseline (AUC={roc_auc:.2f}, EER={eer:.2f})')
                    results_text += f"- **Baseline**: EER = {eer:.4f}, AUC = {roc_auc:.4f}\n"

            # 2. BioHash
            if "BioHash" in modes:
                # print("Running BioHash...")
                scores, labels = [], []
                for _, row in subset.iterrows():
                    e1, e2 = get_embedding(row['p1']), get_embedding(row['p2'])
                    if e1 is not None and e2 is not None:
                        # Permute BOTH
                        p1 = protect_template_biohash(e1, SYSTEM_SEED)
                        p2 = protect_template_biohash(e2, SYSTEM_SEED)
                        scores.append(cosine_similarity([p1], [p2])[0][0])
                        labels.append(row['label'])
                if scores:
                    fpr, tpr, thresholds = roc_curve(labels, scores)
                    roc_auc = auc(fpr, tpr)
                    fnr = 1 - tpr
                    eer = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
                    plt.plot(fpr, tpr, linestyle='--', label=f'BioHash (AUC={roc_auc:.2f}, EER={eer:.2f})')
                    results_text += f"- **BioHash**: EER = {eer:.4f}, AUC = {roc_auc:.4f}\n"

            # 3. Fuzzy Commitment
            if "FuzzyCommitment" in modes:
                # print("Running FC...")
                correct_gen, total_gen = 0, 0
                correct_imp, total_imp = 0, 0
                for _, row in subset.iterrows():
                    e1, e2 = get_embedding(row['p1']), get_embedding(row['p2'])
                    if e1 is not None and e2 is not None:
                        bin1 = binarize_embedding(e1)
                        key, helper = fc_system.enroll(bin1)
                        if key is None: continue
                        
                        bin2 = binarize_embedding(e2)
                        success = fc_system.authenticate(helper, bin2, key)
                        
                        if row['label'] == 1:
                            total_gen += 1
                            if success: correct_gen += 1
                        else:
                            total_imp += 1
                            if not success: correct_imp += 1
                
                gar = correct_gen / total_gen if total_gen > 0 else 0
                far = (total_imp - correct_imp) / total_imp if total_imp > 0 else 0
                results_text += f"- **Fuzzy Commitment**: GAR = {gar:.2%}, FAR = {far:.2%}\n"

            plt.plot([0, 1], [0, 1], 'k--')
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('ROC Curve Comparison')
            plt.legend()
            return results_text, plt.gcf()

        ev_btn.click(run_comparative_eval, ev_modes, [ev_out_text, ev_plot])

app.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://d18faea5a70f459bb2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


