In [1]:
!pip install facenet-pytorch

Collecting facenet-pytorch
  Downloading facenet_pytorch-2.5.3-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: facenet-pytorch
Successfully installed facenet-pytorch-2.5.3


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from facenet_pytorch import InceptionResnetV1

In [4]:
mdl = InceptionResnetV1(pretrained='vggface2', num_classes=2, classify=True)
print(mdl)

  0%|          | 0.00/107M [00:00<?, ?B/s]

InceptionResnetV1(
  (conv2d_1a): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_2a): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_2b): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (maxpool_3a): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2d_3b): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU()
  )
  (conv2d_4a): 

Training Data

In [5]:
import pandas as pd
from glob import glob #for finding files recursively
from collections import defaultdict

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.utils
import torchvision.datasets as dset

from torch import optim
from torch.utils.data import DataLoader,Dataset
from torchvision.models import *
from torchvision.datasets import ImageFolder
from torch.autograd import Variable

In [6]:
relationshipsCSV = "/content/drive/MyDrive/recognizing-faces-in-the-wild/train_relationships.csv"
train_images_folder = "/content/drive/MyDrive/recognizing-faces-in-the-wild/train/"
val_set = "F09" #can change this, this is randomly generated

In [7]:
availble_images = glob(train_images_folder + "*/*/*.jpg")

In [8]:
all_ppl = [x.split("/")[-3] + "/" + x.split("/")[-2] for x in availble_images] #all the people

In [9]:
%cd /content/drive/MyDrive/recognizing-faces-in-the-wild

/content/drive/MyDrive/recognizing-faces-in-the-wild


In [10]:
#creating the training set
train_images = [x for x in availble_images if val_set not in x]
train_person_to_images_map = defaultdict(list)
for x in train_images:
    train_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)

#creating the validation set
val_images = [x for x in availble_images if val_set in x]
val_person_to_images_map = defaultdict(list)
for x in val_images:
    val_person_to_images_map[x.split("/")[-3] + "/" + x.split("/")[-2]].append(x)

In [11]:
#read from the csv to create a list of tuples
relationships = pd.read_csv(relationshipsCSV)
relationship_pairs = [(row['p1'], row['p2']) for index, row in relationships.iterrows()] # Create a list of tuples
relationship_pairs = [x for x in relationship_pairs if x[0] in all_ppl and x[1] in all_ppl] #data cleaning

In [12]:
#split the relationships taken from the csv into train and validation
train = [x for x in relationship_pairs if val_set not in x[0]]
val = [x for x in relationship_pairs if val_set in x[0]]

In [13]:
import random

class trainingDataset(Dataset):#Get two images and whether they are related.
#THIS PROCESS CAN BE OPTIMIZED BY CHOSING ALL THE TRAIN PAIRS AND THEN ADD SOME NEGATIVE SAMPLES

    # relationships will be the "train" (or "val") variable created above, so it contains the pairs of people who are related.
    def __init__(self,imageFolderDataset, relationships, transform=None):
        self.imageFolderDataset = imageFolderDataset
        self.relationships = relationships #choose either train or val dataset to use
        self.transform = transform

    def __getitem__(self,index):
        # For each relationship in the "train" variable, the first image comes from first row,
        # and the second is either specially choosed related person or randomly choosed non-related person (in order to have positive and negative samples)
        img0_info = self.relationships[index][0]
        img0_path = glob("train/"+img0_info+"/*.jpg")
        img0_path = random.choice(img0_path) #chose randomically a photo of the individual

        # Found all candidates related to person in img0
        # candidate_relationship contains the value of self.relationship (that is the train or val variables,
        # so candidate_relationship = (img0_info, 'Fxxx/MIDx') OR ('Fxxx/MIDx', img0_info))
        candidate_relationship = [x for x in self.relationships if x[0]==img0_info or x[1]==img0_info]

        # Randomly choose whether to use a positive (1) or a negative (0) example
        if candidate_relationship==[]: # in this case, this should never happen.
            choose_positive_example = 0
        else:
            # Choose randomicaly to have positive or negative example: 1 means related, and 0 means non-related.
            choose_positive_example = random.randint(0,1)

        # If we have to choose positive example, we get the second person from related relationship
        if choose_positive_example==1:
            img1_info = random.choice(candidate_relationship)#choose the second person from related relationships
            # remember that candidate_relationship = (img0_info, 'Fxxx/MIDx') OR ('Fxxx/MIDx', img0_info)),
            # so img1 is the element of the tuple that is not img0_info
            if img1_info[0] != img0_info:
                img1_info = img1_info[0]
            else:
                img1_info=img1_info[1]

            #randomly choose a img of second person
            img1_path = glob("train/"+img1_info+"/*.jpg")
            img1_path = random.choice(img1_path)

        # In case we have to choose negative sample, we get the second person randomicaly (0 means non-related)
        else:
            randChoose = True #in case the random chosen person is related to first person
            while randChoose:
                img1_path = random.choice(self.imageFolderDataset.imgs)[0]
                img1_info = img1_path.split("/")[-3] + "/" + img1_path.split("/")[-2]
                randChoose = False
                #if we (unforunatly) choose a person who is related to the first one, randomly choose another person
                for x in candidate_relationship:
                    if x[0]==img1_info or x[1]==img1_info:
                        randChoose = True
                        break

        # Now we have two images (they are related or not)
        img0 = Image.open(img0_path)
        img1 = Image.open(img1_path)

        # Transform images
        if self.transform is not None:
            img0 = self.transform(img0)
            img1 = self.transform(img1)

        label = torch.tensor(choose_positive_example, dtype=torch.int8)

        # The returned data from dataloader is img=[batch_size,channels,width,length], should_get_same_class=[batch_size,label]
        return img0, img1 , label

    def __len__(self):
        return len(self.relationships)#essential for choose the num of data in one epoch

In [14]:
folder_dataset = dset.ImageFolder(root='train/')
BATCH_SIZE=64
IMG_SIZE=100

#Training set and training loader
trainset = trainingDataset(imageFolderDataset=folder_dataset,
                                        relationships=train,
                                        transform=transforms.Compose([transforms.Resize((IMG_SIZE,IMG_SIZE)),
                                                                      transforms.ToTensor()
                                                                      ]))

Define loss function and optimizer

In [15]:
#import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mdl.parameters(), lr=0.001)

In [16]:
#data loader
train_loader = DataLoader(trainset, batch_size=32, shuffle=True)

Siamese Model

In [17]:
class SiameseNetwork(nn.Module):
    def __init__(self, pretrained_model):
        super(SiameseNetwork, self).__init__()
        self.model = pretrained_model  # Load your pre-trained model

    def forward_one(self, x):
        return self.model(x)

    def forward(self, input1, input2):
        output1 = self.forward_one(input1)
        output2 = self.forward_one(input2)
        return output1, output2

In [18]:
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=2.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = F.pairwise_distance(output1, output2, keepdim=True)
        loss_contrastive = torch.mean((1-label) * torch.pow(euclidean_distance, 2) +
                                      (label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
        return loss_contrastive

Finetune Model

In [19]:
from PIL import Image

In [20]:
# #move to gpu
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print("Using device:", device)

# model_on_gpu = mdl.to(device)
# # Modify the DataLoader to move data to GPU
# for batch in trainset:
#     input1, input2, labels = batch
#     # Move the batch to GPU
#     input1 = input1.to(device)
#     input2 = input2.to(device)
#     labels = labels.to(device)
#     # labels = labels.to(device)

In [None]:
# Create the Siamese network
siamese_net = SiameseNetwork(mdl)

# Define the contrastive loss
criterion = ContrastiveLoss()

# Define the optimizer (e.g., Adam)
optimizer = optim.Adam(siamese_net.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    total_loss = 0.0
    for batch in train_loader:
        img1, img2, label = batch
        optimizer.zero_grad()
        output1, output2 = siamese_net(img1, img2)
        loss = criterion(output1, output2, label)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(train_loader)}")
