<a href="https://colab.research.google.com/github/RahmaMissaoui/Facial_Rwcognition/blob/main/Facial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Homework 2: Facial Recognition Using CNN**
**Group Members**: Rahma Missaoui & Khettabi Fadila Meissoune   
**Date**: January 10 20026

## **Objective**
This work aims to train a **convolutional neural network** (CNN) that predicts the *boundary* of a face in an image.

Instead of classifying images, the model performs a `regression` process and outputs four connected values ​​representing the face's `position`.

## **Dataset**

It makes use of the **WIDER FACE** dataset.
To keep things simple, only the `first face` in each picture is taken into account. Multiple-face images are not fully utilized.

## **Models compared**
1. CNN from Scratch - Simple 3-layer convolutional network
2. VGG16 - Pre-trained on ImageNet, fine-tuned for face detection
3. ResNet50 - Pre-trained on ImageNet, fine-tuned for face detection

### Imports

In [14]:
import kagglehub
from pathlib import Path
import tensorflow as tf
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import VGG16, ResNet50
import matplotlib.pyplot as plt

### Download dataset

In [15]:
# Download WIDER FACE dataset
path_str = kagglehub.dataset_download("iamprateek/wider-face-a-face-detection-dataset")
path = Path(path_str)

# Define paths
img_base_dir = path / 'WIDER_train' / 'WIDER_train' / 'images'
annot_file = path / 'wider_face_annotations' / 'wider_face_split' / 'wider_face_train_bbx_gt.txt'

print("Images folder exists:", img_base_dir.exists())
print("Annotations file exists:", annot_file.exists())

Using Colab cache for faster access to the 'wider-face-a-face-detection-dataset' dataset.
Images folder exists: True
Annotations file exists: True


In [16]:
# Configuration
IMG_SIZE = 224
BATCH_SIZE = 16

### Parse annotations


    Parse WIDER FACE annotation file.
    Format can have empty lines and special cases.
    Structure: image_path, num_faces, then bbox lines (x y w h + additional attributes)


In [17]:
def parse_wider_face(annot_file, img_dir, max_samples=None):

    samples = []

    with open(annot_file, 'r') as f:
        lines = [line.strip() for line in f.readlines() if line.strip()]  # Remove empty lines

    i = 0
    while i < len(lines):
        # Read image filename (contains .jpg)
        img_name = lines[i]
        i += 1

        if i >= len(lines):
            break

        # Try to read number of faces
        try:
            num_faces = int(lines[i])
            i += 1
        except ValueError:
            # If can't parse as int, might be another image name
            continue

        # If there are faces, extract the first one
        if num_faces > 0 and i < len(lines):
            try:
                parts = lines[i].split()
                if len(parts) >= 4:
                    x, y, w, h = map(int, parts[:4])
                    # Only add valid bounding boxes (positive width/height)
                    if w > 0 and h > 0:
                        samples.append((img_name, x, y, w, h))
            except (ValueError, IndexError):
                pass

        # Skip remaining face annotations for this image
        i += num_faces

        if max_samples and len(samples) >= max_samples:
            break

    return samples

In [18]:
# Load 5000 samples for training
samples = parse_wider_face(annot_file, img_base_dir, max_samples=5000)

# Split: 80% training, 20% validation
train_samples, val_samples = train_test_split(samples, test_size=0.2, random_state=42)
print(f"Training samples: {len(train_samples)}")
print(f"Validation samples: {len(val_samples)}")

Training samples: 4000
Validation samples: 1000


## PREPROCESSING

    Load image and normalize both image and bounding box.
    
    Process:
    1. Load image and resize to 224x224
    2. Normalize pixel values to [0, 1]
    3. Normalize box coordinates to [0, 1]
    





In [19]:
def load_and_preprocess(sample, img_dir):

    img_name, x, y, w, h = sample
    img_path = img_dir / img_name

    # Load image with error checking
    image = cv2.imread(str(img_path))

    # Check if image loaded successfully
    if image is None:
        raise FileNotFoundError(f"Could not load image: {img_path}")

    # Convert color space and resize
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    H, W, _ = image.shape
    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
    image = image / 255.0  # Normalize to [0, 1]

    # Normalize bounding box coordinates
    bbox = np.array([
        x / W,           # x_min
        y / H,           # y_min
        (x + w) / W,     # x_max
        (y + h) / H      # y_max
    ], dtype=np.float32)

    return image.astype(np.float32), bbox

### DATA AUGMENTATION

In [20]:
# Apply augmentation only to training data
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomBrightness(0.2),
    tf.keras.layers.RandomContrast(0.2),
    tf.keras.layers.RandomZoom(0.1)
])

### CREATE DATASETS

In [21]:
def train_generator():
    for s in train_samples:
        img, bbox = load_and_preprocess(s, img_base_dir)
        img = data_augmentation(img)
        yield img, bbox

def val_generator():
    for s in val_samples:
        img, bbox = load_and_preprocess(s, img_base_dir)
        yield img, bbox

In [22]:

# Create TensorFlow datasets
train_ds = tf.data.Dataset.from_generator(
    train_generator,
    output_signature=(
        tf.TensorSpec(shape=(IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(4,), dtype=tf.float32)
    )
).shuffle(512).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

val_ds = tf.data.Dataset.from_generator(
    val_generator,
    output_signature=(
        tf.TensorSpec(shape=(IMG_SIZE, IMG_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(4,), dtype=tf.float32)
    )
).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

### BUILD MODELS

MODEL 1 : CNN FROM SCRATCH

In [23]:
def build_scratch_model():
    """Simple CNN with 3 convolutional layers"""
    return tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(224,224,3)),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(64, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(128, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(4, activation='sigmoid')  # Output: 4 bbox coordinates
    ])

In [None]:
scratch_model = build_scratch_model()
scratch_model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss="mse")

scratch_history = scratch_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=5
)

MODEL 2: VGG16

In [25]:
def build_vgg16_model():
    """VGG16 pre-trained on ImageNet, adapted for bounding box regression"""
    base = VGG16(weights="imagenet", include_top=False, input_shape=(224,224,3))
    base.trainable = False  # Freeze pre-trained layers

    x = tf.keras.layers.Flatten()(base.output)
    x = tf.keras.layers.Dense(512, activation="relu")(x)
    output = tf.keras.layers.Dense(4, activation="sigmoid")(x)
    return tf.keras.Model(base.input, output)

In [None]:
vgg_model = build_vgg16_model()
vgg_model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss="mse")

vgg_history = vgg_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=5
)

Epoch 1/5
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3121s[0m 12s/step - loss: 0.0769 - val_loss: 0.0640
Epoch 2/5
[1m 31/250[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m36:49[0m 10s/step - loss: 0.0522

MODEL 3: RESNET50

In [None]:
def build_resnet_model():
    """ResNet50 pre-trained on ImageNet, adapted for bounding box regression"""
    base = ResNet50(weights="imagenet", include_top=False, input_shape=(224,224,3))
    base.trainable = False  # Freeze pre-trained layers

    x = tf.keras.layers.GlobalAveragePooling2D()(base.output)
    x = tf.keras.layers.Dense(512, activation="relu")(x)
    output = tf.keras.layers.Dense(4, activation="sigmoid")(x)
    return tf.keras.Model(base.input, output)

In [None]:
resnet_model = build_resnet_model()
resnet_model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss="mse")

resnet_history = resnet_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=5
)

### EVALUATE AND COMPARE MODELS

In [None]:
scratch_loss = scratch_model.evaluate(val_ds)
vgg_loss = vgg_model.evaluate(val_ds)
resnet_loss = resnet_model.evaluate(val_ds)

print(f"\nCNN from Scratch - Loss: {scratch_loss:.6f}")
print(f"VGG16 - Loss: {vgg_loss:.6f}")
print(f"ResNet50 - Loss: {resnet_loss:.6f}")

In [None]:
# Plot comparison
plt.figure(figsize=(10, 6))
models = ['CNN from Scratch', 'VGG16', 'ResNet50']
losses = [scratch_loss, vgg_loss, resnet_loss]
plt.bar(models, losses, color=['#1f77b4', '#ff7f0e', '#2ca02c'])
plt.ylabel('Validation Loss (MSE)')
plt.title('Model Comparison - Lower is Better')
plt.grid(axis='y', alpha=0.3)
plt.show()

### VISUALIZE PREDICTIONS FROM ALL MODELS

In [None]:
def predict_and_draw_box(model, sample):
    """Predict face location and draw bounding box on image"""
    img, true_bbox = load_and_preprocess(sample, img_base_dir)

    # Predict bounding box
    pred_bbox = model.predict(np.expand_dims(img, 0), verbose=0)[0]

    # Convert normalized coordinates to pixels
    h, w = IMG_SIZE, IMG_SIZE

    # Ground truth box (GREEN)
    x1_true = int(true_bbox[0] * w)
    y1_true = int(true_bbox[1] * h)
    x2_true = int(true_bbox[2] * w)
    y2_true = int(true_bbox[3] * h)

    # Predicted box (RED)
    x1_pred = int(pred_bbox[0] * w)
    y1_pred = int(pred_bbox[1] * h)
    x2_pred = int(pred_bbox[2] * w)
    y2_pred = int(pred_bbox[3] * h)

    # Draw both boxes on image
    img_with_box = (img * 255).astype(np.uint8).copy()
    cv2.rectangle(img_with_box, (x1_true, y1_true), (x2_true, y2_true), (0, 255, 0), 2)  # Green = Ground Truth
    cv2.rectangle(img_with_box, (x1_pred, y1_pred), (x2_pred, y2_pred), (255, 0, 0), 2)  # Red = Prediction

    return img_with_box

In [None]:
# Show 5 sample images with predictions from all 3 models
num_samples = 5
fig, axes = plt.subplots(num_samples, 3, figsize=(15, num_samples * 5))
fig.suptitle('Face Detection Comparison - Green=Truth, Red=Prediction', fontsize=16)


In [None]:
for i, sample in enumerate(val_samples[:num_samples]):
    # CNN from Scratch
    result_scratch = predict_and_draw_box(scratch_model, sample)
    axes[i, 0].imshow(result_scratch)
    axes[i, 0].set_title(f'Sample {i+1} - CNN from Scratch')
    axes[i, 0].axis('off')

    # VGG16
    result_vgg = predict_and_draw_box(vgg_model, sample)
    axes[i, 1].imshow(result_vgg)
    axes[i, 1].set_title(f'Sample {i+1} - VGG16')
    axes[i, 1].axis('off')

    # ResNet50
    result_resnet = predict_and_draw_box(resnet_model, sample)
    axes[i, 2].imshow(result_resnet)
    axes[i, 2].set_title(f'Sample {i+1} - ResNet50')
    axes[i, 2].axis('off')

plt.tight_layout()
plt.show()