# Facial Similarity with Siamese network and LFW dataset

In [1]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms

In [4]:
class LFWDataset(Dataset):
    def __init__(self, pairs_file, img_folder, transform=None):
        self.pairs_file = pairs_file
        self.img_folder = img_folder
        self.transform = transform
        self.image_pairs, self.labels = self._load_pairs()

    def _load_pairs(self):
        with open(self.pairs_file, 'r') as f:
            pairs = f.readlines()[1:]
        image_pairs = []
        labels = []

        for pair in pairs:
            pair = pair.strip().split()
            if len(pair) == 3:
                person = pair[0]
                img1 = os.path.join(self.img_folder, person, f"{person}_{int(pair[1]):04d}.jpg")
                img2 = os.path.join(self.img_folder, person, f"{person}_{int(pair[2]):04d}.jpg")
                label = 1
            else:
                person1 = pair[0]
                person2 = pair[2]
                img1 = os.path.join(self.img_folder, person1, f"{person1}_{int(pair[1]):04d}.jpg")
                img2 = os.path.join(self.img_folder, person2, f"{person2}_{int(pair[3]):04d}.jpg")
                label = 0

            image_pairs.append((img1, img2))
            labels.append(label)

        return image_pairs, labels
    
    def __len__(self):
        return len(self.image_pairs)

transform = transforms.Compose([
    transforms.Resize((128,128)),
    transforms.ToTensor()
])

lfw_dataset = LFWDataset(pairs_file='/home/mehran/Documents/Datasets/pairsDevTrain.txt', img_folder='/home/mehran/Documents/Datasets/lfw-deepfunneled', transform=transform)
train_loader = torch.utils.data.DataLoader(lfw_dataset, batch_size=32, shuffle=True)
