In [None]:
import pandas as pd
import torch.nn as nn
import os
from sklearn.model_selection import train_test_split
from PIL import Image
from matplotlib import transforms
import torch
import torch.nn.functional as F

# Load The Dataset

### Paths

In [None]:
dataset_path = "lfw"
images_path = os.path.join(dataset_path, "lfw-deepfunneled", "lfw-deepfunneled")

match_dev_test_path = os.path.join(dataset_path, "matchpairsDevTest.csv")
match_dev_train_path = os.path.join(dataset_path, "matchpairsDevTrain.csv")
match_dev_val_path = os.path.join(dataset_path, "matchpairsDevVal.csv")

mismatch_dev_test_path = os.path.join(dataset_path, "mismatchpairsDevTest.csv")
mismatch_dev_train_path = os.path.join(dataset_path, "mismatchpairsDevTrain.csv")
mismatch_dev_val_path = os.path.join(dataset_path, "mismatchpairsDevVal.csv")


## Load The Data

In [None]:
match_dev_test_df = pd.read_csv(match_dev_test_path)
match_dev_train_df = pd.read_csv(match_dev_train_path)
mismatch_dev_test_df = pd.read_csv(mismatch_dev_test_path)
mismatch_dev_train_df = pd.read_csv(mismatch_dev_train_path)

: 

### Split Into Validation Set

In [None]:
match_dev_train_df, match_dev_val_df = train_test_split(
    match_dev_train_df, test_size=0.2, random_state=42
)
mismatch_dev_train_df, mismatch_dev_val_df = train_test_split(
    mismatch_dev_train_df, test_size=0.2, random_state=42
)

### Loading The Images

In [None]:
# Create a dictionary to store the images
images_memo = {}

In [None]:
transform = transforms.Compose([
    transforms.Resize((250, 250)),       # Resize to match model input
    transforms.ToTensor(),               # Converts to [C, H, W] tensor and scales to [0, 1]
    transforms.Normalize(                # Normalize with ImageNet mean/std if using pretrained model
        mean=[0.485, 0.456, 0.406],      
        std=[0.229, 0.224, 0.225]
    )
])

def load_mismatch_images(person1, image1, person2, image2):
    if(person1, image1) not in images_memo:
        image1_path = os.path.join(images_path, person1, f"{person1}{image1:04d}.jpg")
        image1 = Image.open(image1_path).convert('RGB')
        images_memo[(person1, image1)] = transform(image1)
    if(person2, image2) not in images_memo:
        image2_path = os.path.join(images_path, person2, f"{person2}{image2:04d}.jpg")
        image2 = Image.open(image2_path).convert('RGB')
        images_memo[(person2, image2)] = transform(image2)
    
    
    return images_memo[(person1, image1)], images_memo[(person2, image2)]


def load_match_images(person, image1, image2):
    if(person, image1) not in images_memo:
        image1_path = os.path.join(images_path, person, f"{person}{image1:04d}.jpg")
        image1 = Image.open(image1_path).convert('RGB')
        images_memo[(person, image1)] = transform(image1)
    if(person, image2) not in images_memo:
        image2_path = os.path.join(images_path, person, f"{person}{image2:04d}.jpg")
        image2 = Image.open(image2_path).convert('RGB')
        images_memo[(person, image2)] = transform(image2)
    
    return images_memo[(person, image1)], images_memo[(person, image2)]

In [None]:
def make_minibatches(df, batch_size, is_match=True):
    """
    Create minibatches from the dataframe.
    """
    for i in range(0, len(df), batch_size):
        batch_df = df.iloc[i:i + batch_size]
        images1 = []
        images2 = []
        labels = []
        
        for _, row in batch_df.iterrows():
            if is_match:
                image1, image2 = load_match_images(row['person'], row['image1'], row['image2'])
                label = 1
            else:
                image1, image2 = load_mismatch_images(row['person1'], row['image1'], row['person2'], row['image2'])
                label = 0
            
            images1.append(image1)
            images2.append(image2)
            labels.append(label)
        
        yield torch.stack(images1), torch.stack(images2), torch.tensor(labels)

## Initializing The Network

In [None]:
class SiameseCNN(nn.Module):
    def __init__(self):
        super(SiameseCNN, self).__init__()
        
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=10),     #(1, 105, 105) → (64, 96, 96)
            nn.ReLU(),
            nn.MaxPool2d(2),                      #(64, 48, 48)

            nn.Conv2d(64, 128, kernel_size=7),    #(128, 42, 42)
            nn.ReLU(),
            nn.MaxPool2d(2),                      #(128, 21, 21)

            nn.Conv2d(128, 128, kernel_size=4),   #(128, 18, 18)
            nn.ReLU(),
            nn.MaxPool2d(2),                      #(128, 9, 9)

            nn.Conv2d(128, 256, kernel_size=4),   #(256, 6, 6)
            nn.ReLU()
        )
        
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.Sigmoid()  # As used in the original paper
        )

    def forward(self, x):
        x = self.cnn(x)
        x = self.fc(x)
        return x
