#### 📦 Import Libraries

Imports necessary Python modules such as PyTorch, torchvision, PIL, and other utilities for deep learning and image processing.


In [1]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings('ignore')

#### ⚙️ Device Setup & Reproducibility

Here we configure the device (CPU/GPU) for training and set manual seeds to ensure reproducibility of results.


In [2]:
# Set random seeds for reproducibility
torch.manual_seed(42)

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cuda


#### 🧾 Custom Dataset Loader

This defines a custom PyTorch `Dataset` class (`FaceRecognitionDataset`) that:
- Loads both clean and distorted face images.
- Assigns class labels based on folder names.
- Applies appropriate transformations during loading.


In [3]:
# Dataset class
class FaceRecognitionDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.data = []
        self.labels = []
        self.id_to_label = {}

        person_folders = sorted(os.listdir(root_dir))
        for idx, person_folder in enumerate(person_folders):
            person_folder_path = os.path.join(root_dir, person_folder)
            if not os.path.isdir(person_folder_path):
                continue
            self.id_to_label[idx] = person_folder

            # Load clean images
            for file in os.listdir(person_folder_path):
                file_path = os.path.join(person_folder_path, file)
                if os.path.isfile(file_path) and file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    if file == 'distortion':  # skip distortion folder here
                        continue
                    self.data.append(file_path)
                    self.labels.append(idx)

            # Load distorted images
            distortion_folder = os.path.join(person_folder_path, 'distortion')
            if os.path.isdir(distortion_folder):
                for distorted_file in os.listdir(distortion_folder):
                    distorted_path = os.path.join(distortion_folder, distorted_file)
                    if os.path.isfile(distorted_path) and distorted_path.lower().endswith(('.jpg', '.jpeg', '.png')):
                        self.data.append(distorted_path)
                        self.labels.append(idx)

        self.data = np.array(self.data)
        self.labels = np.array(self.labels)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return image, label



#### 🎨 Image Transformations

Image augmentations for training and normalization for validation/testing are applied using `torchvision.transforms`. These include resizing, horizontal flipping, normalization, etc.


In [4]:
# Transforms
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

#### 🗃️ Dataset Loading

We load the training and validation datasets using the custom class and prepare dataloaders with appropriate batch sizes and shuffling.


In [5]:

train_dir = r"Task_B\train"
val_dir = r"Task_B\val"

# Load train dataset
train_dataset = FaceRecognitionDataset(train_dir, transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

num_classes = len(train_dataset.id_to_label)
print(f"Number of classes (train): {num_classes}")

Number of classes (train): 877


#### 🧠 Model Architecture: ResNet-50

- Load a pretrained ResNet-50 model.
- Freeze all layers except `layer4` and the final fully connected (`fc`) layer.
- Replace the last `fc` layer with one suited for our number of classes (i.e., number of persons).
## 🧮 Loss Function, Optimizer & LR Scheduler

- Use CrossEntropyLoss with label smoothing for stable training.
- AdamW optimizer is selected with weight decay.
- Learning rate scheduling using `StepLR`.


In [6]:
# Model setup - pretrained ResNet50
model = models.resnet50(pretrained=True)

# Freeze all layers except layer4 and fc
for name, param in model.named_parameters():
    if 'layer4' in name or 'fc' in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

# Replace final layer
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

In [7]:
train_names = set(os.listdir(train_dir))
val_names = set(os.listdir(val_dir))
print("Overlap:", train_names & val_names)

Overlap: set()


#### 🚀 Training the Model

This section trains the modified ResNet-50 model for a fixed number of epochs:
- Tracks training loss and accuracy.
- Updates the model weights using backpropagation.


In [8]:
# Training loop
num_epochs = 10
best_val_acc = 0.0
best_model_wts = None

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        running_loss += loss.item()

    scheduler.step()

    epoch_loss = running_loss / len(train_loader)
    epoch_acc = correct / total
    print(f"Epoch {epoch+1}/{num_epochs} Train Loss: {epoch_loss:.4f} Accuracy: {epoch_acc:.4f}")


Epoch 1/10 Train Loss: 4.9673 Accuracy: 0.2665
Epoch 2/10 Train Loss: 2.9802 Accuracy: 0.5847
Epoch 3/10 Train Loss: 1.7365 Accuracy: 0.9127
Epoch 4/10 Train Loss: 1.3007 Accuracy: 0.9872
Epoch 5/10 Train Loss: 1.1948 Accuracy: 0.9957
Epoch 6/10 Train Loss: 1.1430 Accuracy: 0.9988
Epoch 7/10 Train Loss: 1.1226 Accuracy: 0.9996
Epoch 8/10 Train Loss: 1.1150 Accuracy: 0.9995
Epoch 9/10 Train Loss: 1.1092 Accuracy: 0.9998
Epoch 10/10 Train Loss: 1.1041 Accuracy: 0.9999


#### 💾 Save the Trained Model

Save the trained model’s state dictionary (`state_dict`) to disk for later inference or evaluation using `torch.save()`.


In [9]:
torch.save(model.state_dict(), "face_recognition_model.pt")

#### 🔍 Embedding Extraction Model

A new class `FaceEmbeddingExtractor` is defined:
- Uses ResNet-50 without the final classification layer.
- Extracts 2048-dimensional feature vectors (embeddings) for each image.
- Normalizes embeddings for cosine similarity.


In [10]:
class FaceEmbeddingExtractor(nn.Module):
    def __init__(self, base_model):
        super().__init__()
        # Use all layers except final FC
        self.features = nn.Sequential(*list(base_model.children())[:-1])  # output shape (batch, 2048, 1, 1)

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)
        # Normalize embeddings to unit norm
        x = F.normalize(x, p=2, dim=1)
        return x

#### 🧠 Prepare Reference Embeddings (Validation Set)

Extract clean image embeddings from the validation set.
These embeddings are used later for cosine similarity comparison against distorted query images.


In [11]:
embedding_model = FaceEmbeddingExtractor(model).to(device)
embedding_model.eval()


FaceEmbeddingExtractor(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
          

In [None]:
def extract_embeddings_from_folder(folder_path):
    embeddings = []
    images = []
    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        if os.path.isfile(file_path) and file.lower().endswith(('.jpg', '.jpeg', '.png')):
            img = Image.open(file_path).convert("RGB")
            img = val_transform(img).unsqueeze(0).to(device)
            with torch.no_grad():
                emb = embedding_model(img)
            embeddings.append(emb.cpu())
            images.append(file_path)
    if embeddings:
        embeddings = torch.cat(embeddings, dim=0)  
    else:
        embeddings = torch.empty(0)
    return embeddings, images

In [13]:
# Prepare val reference embeddings (clean images only)
val_persons = sorted(os.listdir(val_dir))
val_refs = {}
val_refs_labels = []
val_refs_names = []

for person_folder in val_persons:
    person_folder_path = os.path.join(val_dir, person_folder)
    if not os.path.isdir(person_folder_path):
        continue

    # Get clean images (exclude distortion folder)
    clean_images_folder = person_folder_path
    clean_embeddings, clean_image_paths = extract_embeddings_from_folder(clean_images_folder)
    
    # Remove embeddings of distortion folder images from clean images
    clean_embeddings = []
    clean_image_paths = []
    for f in os.listdir(person_folder_path):
        if f.lower() == 'distortion':
            continue
        file_path = os.path.join(person_folder_path, f)
        if os.path.isfile(file_path) and f.lower().endswith(('.jpg', '.jpeg', '.png')):
            img = Image.open(file_path).convert("RGB")
            img = val_transform(img).unsqueeze(0).to(device)
            with torch.no_grad():
                emb = embedding_model(img)
            clean_embeddings.append(emb.cpu())
            clean_image_paths.append(file_path)
    if clean_embeddings:
        clean_embeddings = torch.cat(clean_embeddings, dim=0)
        val_refs[person_folder] = clean_embeddings
        val_refs_labels.append(person_folder)
        val_refs_names.append(person_folder)
    else:
        val_refs[person_folder] = torch.empty(0)

#### 🧪 Evaluate on Distorted Images (Validation Set)

For each distorted image:
- Extract its embedding.
- Compare with all reference embeddings using cosine similarity.
- Classify the image as belonging to the person with the highest similarity.
- Compute accuracy on distorted validation images.


In [14]:
# Now evaluate on distorted images
correct = 0
total = 0

for person_folder in val_persons:
    person_folder_path = os.path.join(val_dir, person_folder)
    distortion_folder = os.path.join(person_folder_path, 'distortion')
    if not os.path.isdir(distortion_folder):
        continue

    distorted_images = [f for f in os.listdir(distortion_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    for distorted_image in distorted_images:
        distorted_path = os.path.join(distortion_folder, distorted_image)
        img = Image.open(distorted_path).convert("RGB")
        img = val_transform(img).unsqueeze(0).to(device)
        with torch.no_grad():
            dist_emb = embedding_model(img).cpu()

        # Compare with all reference embeddings from val set
        max_sim = -1
        pred_person = None
        for ref_person, ref_embs in val_refs.items():
            if ref_embs.shape[0] == 0:
                continue
            # Compute cosine similarity with all reference embeddings for this person
            sims = F.cosine_similarity(dist_emb, ref_embs)
            max_s = sims.max().item()
            if max_s > max_sim:
                max_sim = max_s
                pred_person = ref_person

        total += 1
        if pred_person == person_folder:
            correct += 1

accuracy = correct / total if total > 0 else 0
print(f"Validation Verification Accuracy: {accuracy:.4f}")

Validation Verification Accuracy: 0.9990


#### 📊 Final Evaluation with Metrics (Validation Set and Training Set)

Using `sklearn.metrics`, we evaluate:
- Accuracy
- Precision
- Recall
- F1 Score

This gives a deeper understanding of model performance, beyond just accuracy.


In [15]:

# Load trained model and setup embedding model
model.load_state_dict(torch.load("face_recognition_model.pt"))
embedding_model = FaceEmbeddingExtractor(model).to(device)
embedding_model.eval()

# --------- Utility to extract reference embeddings (clean images) ---------
def extract_clean_references(persons, directory):
    refs = {}
    ref_names = []
    for person_folder in persons:
        folder_path = os.path.join(directory, person_folder)
        if not os.path.isdir(folder_path):
            continue
        clean_embeddings = []
        for f in os.listdir(folder_path):
            if f.lower() == 'distortion':
                continue
            file_path = os.path.join(folder_path, f)
            if os.path.isfile(file_path) and f.lower().endswith(('.jpg', '.jpeg', '.png')):
                img = Image.open(file_path).convert("RGB")
                img = val_transform(img).unsqueeze(0).to(device)
                with torch.no_grad():
                    emb = embedding_model(img)
                clean_embeddings.append(emb.cpu())
        if clean_embeddings:
            refs[person_folder] = torch.cat(clean_embeddings, dim=0)
            ref_names.append(person_folder)
        else:
            refs[person_folder] = torch.empty(0)
    return refs, ref_names

# --------- Evaluation Function Using Cosine Similarity ---------
def evaluate_embeddings(reference_embeddings, reference_labels, query_paths, true_labels, tag):
    all_preds = []
    all_trues = []

    for path, true_label in zip(query_paths, true_labels):
        img = Image.open(path).convert("RGB")
        img = val_transform(img).unsqueeze(0).to(device)
        with torch.no_grad():
            emb = embedding_model(img).cpu()

        max_sim = -1
        pred_label = None
        for label, emb_list in reference_embeddings.items():
            if emb_list.shape[0] == 0:
                continue
            sims = F.cosine_similarity(emb, emb_list)
            max_s = sims.max().item()
            if max_s > max_sim:
                max_sim = max_s
                pred_label = label

        all_preds.append(pred_label)
        all_trues.append(true_label)

    acc = accuracy_score(all_trues, all_preds)
    prec = precision_score(all_trues, all_preds, average='macro', zero_division=0)
    rec = recall_score(all_trues, all_preds, average='macro', zero_division=0)
    f1 = f1_score(all_trues, all_preds, average='macro', zero_division=0)

    print(f"\n📊 Evaluation Metrics on {tag}:")
    print(f"  - Accuracy : {acc:.4f}")
    print(f"  - Precision: {prec:.4f}")
    print(f"  - Recall   : {rec:.4f}")
    print(f"  - F1-Score : {f1:.4f}")

# --------- Prepare reference embeddings ---------
train_persons = sorted(os.listdir(train_dir))
val_persons = sorted(os.listdir(val_dir))

train_refs, train_ref_names = extract_clean_references(train_persons, train_dir)
val_refs, val_ref_names = extract_clean_references(val_persons, val_dir)

# --------- Prepare distorted query sets ---------
def collect_distorted_queries(persons, root_dir):
    query_paths = []
    true_labels = []
    for person_folder in persons:
        distortion_folder = os.path.join(root_dir, person_folder, 'distortion')
        if not os.path.isdir(distortion_folder):
            continue
        for img_file in os.listdir(distortion_folder):
            if img_file.lower().endswith(('.jpg', '.png', '.jpeg')):
                query_paths.append(os.path.join(distortion_folder, img_file))
                true_labels.append(person_folder)
    return query_paths, true_labels

train_query_paths, train_true_labels = collect_distorted_queries(train_persons, train_dir)
val_query_paths, val_true_labels = collect_distorted_queries(val_persons, val_dir)

# --------- Run Evaluation ---------
evaluate_embeddings(train_refs, train_ref_names, train_query_paths, train_true_labels, tag="Training Set")
evaluate_embeddings(val_refs, val_ref_names, val_query_paths, val_true_labels, tag="Validation Set")




📊 Evaluation Metrics on Training Set:
  - Accuracy : 1.0000
  - Precision: 1.0000
  - Recall   : 1.0000
  - F1-Score : 1.0000

📊 Evaluation Metrics on Validation Set:
  - Accuracy : 0.9990
  - Precision: 0.9989
  - Recall   : 0.9991
  - F1-Score : 0.9989
