# Cell 1: Imports and Configuration

This cell sets up the Python environment and defines constants for the facial recognition model.

- **Imports**: Libraries for numerical computation (`numpy`), MLX for model building and training (`mlx.core`, `mlx.nn`, `mlx.optimizers`), dataset loading (`sklearn.datasets`), and image processing (`PIL`, `cv2`).
- **Constants**:
  - Image size: $IMG\_SIZE = (224, 224)$, the target dimensions for input images.
  - Number of triplets: $NUM\_TRIPLETS = 1000$, the number of training examples for triplet loss.
  - Epochs: $EPOCHS = 10$, iterations over the dataset.
  - Learning rate: $LEARNING\_RATE = 0.001$, step size for gradient descent, where the update rule is $w_{t+1} = w_t - \eta \nabla L(w_t)$ with $\eta = 0.001$.
  - Margin: $MARGIN = 0.2$, used in triplet loss to enforce separation, defined as $L = \max(0, d(a, p) - d(a, n) + m)$ where $m = 0.2$.
  - Threshold: $THRESHOLD = 0.6$, for recognition, where a distance $d(e, e_k) < \theta$ identifies a match, with $\theta = 0.6$.

In [60]:
import os
import numpy as np
import mlx.core as mx
import mlx.nn as nn
import mlx.optimizers as optim
import random
from sklearn.datasets import fetch_lfw_people
from PIL import Image
import cv2

# Configuration
IMG_SIZE = (224, 224)
NUM_TRIPLETS = 1000
EPOCHS = 10
LEARNING_RATE = 0.001
MARGIN = 0.2
THRESHOLD = 0.6

print("Configuration set. Using sklearn's LFW dataset.")

Configuration set. Using sklearn's LFW dataset.


In [61]:
def preprocess_image(img):
    """Resize and normalize LFW image."""
    # img is a numpy array (H, W) or (H, W, C) from sklearn
    if len(img.shape) == 2:  # Grayscale to RGB
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    face = cv2.resize(img, IMG_SIZE)
    return face / 255.0  # Normalize to [0, 1]

# Test preprocessing
lfw_data = fetch_lfw_people(min_faces_per_person=2, resize=0.4, download_if_missing=True)
sample_img = lfw_data.images[0]  # Get first image (70x50 grayscale by default)
sample_face = preprocess_image(sample_img)
print("Sample face shape:", sample_face.shape)

Sample face shape: (224, 224, 3)


In [67]:
def load_dataset():
    """Load LFW from sklearn and group by identity."""
    lfw_data = fetch_lfw_people(min_faces_per_person=2, resize=0.4, download_if_missing=True)
    image_dict = {}
    
    for img, target in zip(lfw_data.images, lfw_data.target):
        label = str(target)  # Use target ID as string key
        face = preprocess_image(img)
        if label not in image_dict:
            image_dict[label] = []
        image_dict[label].append(face)
    
    # Filter already done by min_faces_per_person, but confirm
    image_dict = {k: v for k, v in image_dict.items() if len(v) > 1}
    print(f"Loaded {len(image_dict)} people with images.")
    return image_dict

# Load the dataset
image_dict = load_dataset()

Loaded 1680 people with images.


In [68]:
def generate_triplets(image_dict, num_triplets=NUM_TRIPLETS):
    """Generate (anchor, positive, negative) triplets."""
    triplets = []
    person_names = list(image_dict.keys())
    
    for _ in range(num_triplets):
        anchor_person = random.choice(person_names)
        anchor, positive = random.sample(image_dict[anchor_person], 2)
        negative_person = random.choice([p for p in person_names if p != anchor_person])
        negative = random.choice(image_dict[negative_person])
        triplets.append((anchor, positive, negative))
    
    return triplets

def to_mlx_arrays(triplets):
    """Convert triplets to MLX arrays with batch dimension."""
    return [(mx.array(a[None, ...]), mx.array(p[None, ...]), mx.array(n[None, ...]))
            for a, p, n in triplets]

# Generate and convert triplets
triplets = generate_triplets(image_dict)
mlx_triplets = to_mlx_arrays(triplets)
print(f"Generated {len(triplets)} triplets.")

Generated 1000 triplets.


In [69]:
class FaceCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1, 1)
        self.conv3 = nn.Conv2d(64, 128, 3, 1, 1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc = nn.Linear(128 * 28 * 28, 128)  # 128D embedding

    def __call__(self, x):
        x = self.pool(nn.relu(self.conv1(x)))
        x = self.pool(nn.relu(self.conv2(x)))
        x = self.pool(nn.relu(self.conv3(x)))
        x = x.reshape(x.shape[0], -1)  # Flatten
        x = self.fc(x)
        return x / mx.sqrt(mx.sum(x * x, axis=-1, keepdims=True))  # L2 normalize

# Initialize model
model = FaceCNN()
print("Model initialized.")

Model initialized.


In [72]:
def triplet_loss(anchor, positive, negative, margin=MARGIN):
    """Compute triplet loss."""
    pos_dist = mx.sum(mx.square(anchor - positive), axis=-1)
    neg_dist = mx.sum(mx.square(anchor - negative), axis=-1)
    return mx.mean(mx.maximum(pos_dist - neg_dist + margin, 0))

def train_model(model, triplets, epochs=EPOCHS, learning_rate=LEARNING_RATE):
    """Train the model with triplet loss."""
    optimizer = optim.Adam(learning_rate=learning_rate)

    def compute_loss(params, inputs):
        """Compute loss given model parameters and triplet inputs."""
        anchor, positive, negative = inputs
        # Update model parameters
        model.update(params)
        anchor_emb = model(anchor)
        pos_emb = model(positive)
        neg_emb = model(negative)
        return triplet_loss(anchor_emb, pos_emb, neg_emb)

    # Get initial model parameters
    params = model.parameters()

    for epoch in range(epochs):
        random.shuffle(triplets)
        total_loss = 0
        for anchor, positive, negative in triplets:
            # Compute value and gradients with respect to model parameters
            loss, grads = mx.value_and_grad(compute_loss)(params, (anchor, positive, negative))
            optimizer.update(model, grads)
            mx.eval(model.parameters(), optimizer.state)
            params = model.parameters()  # Update params after optimization step
            total_loss += loss.item()
        avg_loss = total_loss / len(triplets)
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss:.4f}")

print("Training functions defined.")

Training functions defined.


In [None]:
print("Starting training...")
train_model(model, mlx_triplets)
print("Training completed.")

Starting training...


In [33]:
def get_embedding(model, image_path):
    """Generate embedding for an image."""
    face = preprocess_image(image_path)
    if face is not None:
        face = mx.array(face[None, ...])
        return model(face).squeeze()
    return None

def build_database(model, image_dict, max_images_per_person=5):
    """Build a database of embeddings for known people."""
    database = {}
    for person, images in image_dict.items():
        embeddings = [get_embedding(model, os.path.join(DATASET_PATH, person, img_file))
                      for img_file in os.listdir(os.path.join(DATASET_PATH, person))[:max_images_per_person]
                      if get_embedding(model, os.path.join(DATASET_PATH, person, img_file)) is not None]
        if embeddings:
            database[person] = mx.mean(mx.stack(embeddings), axis=0)
    return database

def recognize_face(model, image_path, database, threshold=THRESHOLD):
    """Recognize a face from an image."""
    embedding = get_embedding(model, image_path)
    if embedding is None:
        return "No face detected"
    distances = {name: mx.sum(mx.square(embedding - emb)).item() for name, emb in database.items()}
    closest = min(distances, key=distances.get)
    return closest if distances[closest] < threshold else "Unknown"

print("Inference functions defined.")

Inference functions defined.


In [34]:
# Build database
print("Building database...")
database = build_database(model, image_dict)

# Test recognition
test_image_path = "path_to_test_image.jpg"  # Replace with a real image path
result = recognize_face(model, test_image_path, database)
print(f"Recognized as: {result}")

Building database...


NameError: name 'image_dict' is not defined