<a href="https://colab.research.google.com/github/AvivBGU/DeepLearning_Assignment2/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports & Constants

<font size="4">Imports </font>

Install Pytorch

In [None]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
!pip3 install torch torchsummary
!pip3 install -q gdown

In [None]:
import torch
import os
import torch.nn as nn
import torch.optim.lr_scheduler as lr_scheduler
import zipfile
import requests
import numpy as np
import torch.utils.data as data
import time
import matplotlib.pyplot as plt
import random
import copy
import gdown
from torchsummary import summary
from collections import defaultdict
from torchvision import transforms
from glob import glob
from PIL import Image

os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

print("Using torch", torch.__version__)

<font size="4">Constants</font>

In [None]:
current_working_directory = os.getcwd()
DATA_BASE_DIRECTORY: str = os.path.join(current_working_directory, 'data')
TRAIN_TEST_DIVISION_LOCATION = os.path.join(current_working_directory, 'training_set_division')
TRAINING_SET_URL='https://web.archive.org/web/20241214060505/https://vis-www.cs.umass.edu/lfw/pairsDevTrain.txt'
TEST_SET_URL='https://web.archive.org/web/20241214070147/https://vis-www.cs.umass.edu/lfw/pairsDevTest.txt#expand'
MAX_PIXEL_VALUE: float = 255.0
IMAGE_SIZE: tuple[int, int] = (105, 105)
BATCH_SIZE: int = 128
IMAGE_MODE: str = 'L' # If the image is greyscale
DEVICE_TO_USE: str = torch.device("cuda" if torch.cuda.is_available() else "cpu")
RANDOM_SEED: int = 10 # For replicatability
TRAINING_VALIDATION_DIVISION: float = 0.1
MAX_EPOCHS_FOR_TRAINING: int = 200
PATIANCE_FACTOR: int = 1e-4
ALLOWED_PATIANCE_ITERATIONS: int = 5
EARLY_STOP: bool = True
DEFAULT_LR = 1e-2
DEFAULT_MOMEMENTUM = 0.5
DEFAULT_L2_REGULARIZATION_STRENGTH = 1e-4
DEFAULT_LEARNING_RATE_DECAY_FUNC = lambda epoch: 0.99

print(f'Using device: {DEVICE_TO_USE}')
random.seed(RANDOM_SEED) # Using a set seed to achieve reproducibility
torch.manual_seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)

# Acquiring & Handling Data

<font size="6">Acquiring_Data</font>

In [None]:
def download_images_from_drive(file_id: str, download_to_path: str, extract_zip_to: str = DATA_BASE_DIRECTORY) -> str:
  """
  Downloads images from drive and return the path to the extracted folder, but 1
  level down assuming the structure of the directories are known in advance.
  """
  file_location: str = os.path.join(extract_zip_to, 'lfw2', 'lfw2')
  if os.path.exists(file_location):
    print(f"Dataset already downloaded to {file_location}")
    return file_location
  
  downloaded_zip_name: str = gdown.download(id='1p1wjaqpTh_5RHfJu4vUh8JJCdKwYMHCp', output=download_to_path)
  print(downloaded_zip_name)
  os.makedirs(extract_zip_to, exist_ok=True)
  with zipfile.ZipFile(downloaded_zip_name, 'r') as zip_ref:
      zip_ref.extractall(extract_zip_to)
  # os.remove(downloaded_zip_name)
  print(f"Dataset extracted to {extract_zip_to}")
  return file_location

updated_dir_location: str = download_images_from_drive(
    file_id="1p1wjaqpTh_5RHfJu4vUh8JJCdKwYMHCp",
    download_to_path=os.path.join(os.getcwd(), 'dataset.zip')
    )

<font size="4">Preprocessing function</font>

<font size="4">Loading file paths to memory</font>

In [None]:
def loads_files_paths_to_memory(base_directory: str, image_format: str = '.jpg') -> None:
    images: dict[str, dict[int, str]] = dict()
    images_loaded: int = 0
    for root, subdirs, files in os.walk(base_directory):
        if root == base_directory:
            continue
        person_name: str = root.split(os.sep)[-1]
        if person_name not in images:
            images[person_name] = dict()
        for file in files:
            if not file.endswith(image_format):
                raise Warning(f"File {file} is not a {image_format} file. Continuing...")
            stripped_image: str = file.rstrip(image_format) # File without ending
            image_index: int = int(stripped_image.split('_')[-1])
            if image_index in images[person_name]:
                 raise ValueError(f"Index: {image_index} collision for: {person_name}")
            images[person_name][image_index] = os.path.join(root, file)
            images_loaded += 1
    if len(images) < 1:
        raise ValueError(f"No images were found in {base_directory}, aborting...")
    print(f"People scanned: {len(images)}")
    print(f"Images loaded: {images_loaded}")
    return images

loaded_images: dict[str, dict[str, str]] = loads_files_paths_to_memory(updated_dir_location)

<font size="4">Organizing According to train-test</font>

<font size="4">Get train-test division and parse it</font>

In [None]:
def get_train_test_division(url_to_use: str, save_location: str, file_name: str) -> str:
    """
    Gets the train test division from the wayback machine and saves it locally.
    """
    full_file_location: str = os.path.join(save_location, file_name)
    if os.path.exists(full_file_location):
        """
        Already downloaded.
        """
        print(f'{full_file_location} was already downloaded, continuing...')
        return full_file_location
    os.makedirs(save_location, exist_ok=True) # Creates the dirs listed in the save location, no error if they already exist.
    url_response = requests.get(url_to_use)
    if url_response.status_code == 200:
        text_content = url_response.text
    else:
        raise ValueError("Cannot get train test division")
    with open(full_file_location, "w") as file:
        file.write(text_content)
    return full_file_location

training_data_location: str = get_train_test_division(TRAINING_SET_URL, TRAIN_TEST_DIVISION_LOCATION, "training_file.tsv")
test_data_location: str = get_train_test_division(TEST_SET_URL, TRAIN_TEST_DIVISION_LOCATION, 'test_file.tsv')

In [None]:
def parse_train_test_txt(data_location: str) -> list[tuple[tuple[str, int], tuple[str, int], bool]]:
    if not os.path.exists(data_location):
        raise ValueError(f"Data division not found in: {data_location}.")
    with open(data_location, "r") as data:
        read_data = data.read()
    ret_text: list[str] = read_data.split('\n')
    examples: list[tuple[tuple[str, int], tuple[str, int], bool]] = list()
    for text in ret_text:
        separated_by_tabs: list[str] = text.split('\t')
        if len(separated_by_tabs) < 3:
            # This is the number in the beginning
            continue
        if len(separated_by_tabs) == 3:
            # This is a positive example (2 Pictures of the same person)
            person = separated_by_tabs[0]
            first_image_index = int(separated_by_tabs[1])
            second_image_index = int(separated_by_tabs[2])
            examples.append(
                                        (
                                             (person, first_image_index),
                                             (person, second_image_index),
                                             1
                                        )
                                     )
        if len(separated_by_tabs) == 4:
            first_person = separated_by_tabs[0]
            first_person_image_index = int(separated_by_tabs[1])
            second_person = separated_by_tabs[2]
            second_person_image_index = int(separated_by_tabs[3])
            examples.append(
                                        (
                                             (first_person, first_person_image_index),
                                             (second_person, second_person_image_index),
                                             0
                                        )
                                     )
    return examples

training_set: list[tuple[tuple[str, int], tuple[str, int], bool]] = parse_train_test_txt(training_data_location)
test_set: list[tuple[tuple[str, int], tuple[str, int], bool]] = parse_train_test_txt(test_data_location)

<font size="4">Divide training set to positive and negative examples to achieve correct division in validation set</font>

In [None]:
positive_training_examples: list[tuple[tuple[str, int], tuple[str, int], bool]] = [training_example for training_example in training_set if training_example[2] == 1]
negative_training_examples: list[tuple[tuple[str, int], tuple[str, int], bool]] = [training_example for training_example in training_set if training_example[2] == 0]

<font size="5">Analyzing the datasets</font>

<font size="4">Auxilliary Functions for analysis</font>

In [None]:
def calculate_number_of_unique_examples(set_of_pairs: list[tuple[tuple[str, int], tuple[str, int], bool]]) -> dict:
    unique_people_dict: dict = defaultdict(set)
    for first_person, second_person, _ in set_of_pairs:
        first_person_name, first_person_image_index = first_person
        second_person_name, second_person_image_index = second_person
        unique_people_dict[first_person_name].add(first_person_image_index)
        unique_people_dict[second_person_name].add(second_person_image_index)
    return unique_people_dict


def divide_numbers_to_buckets(list_of_ints: list[int], list_of_buckets: list[int]) -> tuple[dict[int, int], list, list]:
    dictionary_for_buckets: dict = defaultdict(lambda: 0)
    min_value_in_bucket_list: int = min(list_of_buckets)
    max_value_in_bucket_list: int = max(list_of_buckets)
    overshooting_list: list = list()
    undershooting_list: list = list()
    for number in list_of_ints:
        if number > max_value_in_bucket_list:
            overshooting_list.append(number)
        if number < min_value_in_bucket_list:
            undershooting_list.append(number)
        dictionary_for_buckets[number] += 1
    return dict(dictionary_for_buckets), overshooting_list, undershooting_list
    
        
def print_data_regarding_dataset(dataset: list[tuple[tuple[str, int], tuple[str, int], bool]], name_of_dataset: str):
    positive_training_examples: list[tuple[tuple[str, int], tuple[str, int], bool]] = [example for example in dataset if example[2] == 1]
    negative_training_examples: list[tuple[tuple[str, int], tuple[str, int], bool]] = [example for example in dataset if example[2] == 0]
    print(f"Number of pair in {name_of_dataset}: {len(dataset)}")
    print(f"Number of positive pairs in {name_of_dataset}: {len(positive_training_examples)}")
    print(f"Number of negative pairs in {name_of_dataset}: {len(negative_training_examples)}")
    unique_people_mapped: dict = calculate_number_of_unique_examples(dataset)
    number_of_examples_per_class: list = [len(number_of_images_used_set) for number_of_images_used_set in unique_people_mapped.values()]
    average_number_of_examples_per_class: float = sum(number_of_examples_per_class)/len(unique_people_mapped.keys())
    min_examples_for_class = min(number_of_examples_per_class)
    max_examples_for_class = max(number_of_examples_per_class)
    number_of_examples_divided_to_buckets, overshooting_examples, undershooting_examples = divide_numbers_to_buckets(number_of_examples_per_class, list_of_buckets=[_ for _ in range(1, 9)])
    print(f"Number of unique classes in {name_of_dataset}: {len(unique_people_mapped.keys())}")
    print(f"Average number of unique examples per class in {name_of_dataset}: {average_number_of_examples_per_class}")
    print(f"Min number of examples per class in {name_of_dataset}: {min_examples_for_class}")
    print(f"Max number of examples per class in {name_of_dataset}: {max_examples_for_class}")
    print(f"Number of examples for each class divided to buckets in {name_of_dataset}: {dict(sorted(number_of_examples_divided_to_buckets.items()))}")
    print(f"Number of overshooting outliers in {name_of_dataset}: {overshooting_examples}")
    print(f"Number of undershooting outliers in {name_of_dataset}: {undershooting_examples}")

<font size="4">Analyzing Training Set</font>

In [None]:
print("Printing training set prior to dividing it to validation and testing.")
print_data_regarding_dataset(training_set, "Training set")

<font size="4">Getting validation set in such a way to make sure that it contains the same number of positive and negative examples.</font>

In [None]:
samples_to_select: int = int(len(training_set)*TRAINING_VALIDATION_DIVISION)
# Making sure the validation set contains an equal amount of positive and negative examples.
validation_set: list[tuple[tuple[str, int], tuple[str, int], bool]] = random.sample(positive_training_examples, int(samples_to_select/2))
validation_set.extend(random.sample(negative_training_examples, int(samples_to_select/2)))
training_set: list[tuple[tuple[str, int], tuple[str, int], bool]] = [sample for sample in training_set if sample not in validation_set]
print(f'training_set_size: {len(training_set)}')
print(f'validation_set_size: {len(validation_set)}')
print(f'test_set_size: {len(test_set)}')

<font size="4">Analyzing Training Set</font>

In [None]:
print_data_regarding_dataset(training_set, "Training Set")

<font size="4">Analyzing Validation Set</font>

In [None]:
print_data_regarding_dataset(validation_set, "Validation Set")

<font size="4">Analyzing Test Set</font>

In [None]:
print_data_regarding_dataset(test_set, "Test Set")

<font size="5">Loading images to datasets</font>

In [None]:
def load_images(images_file_paths_dict: dict[str, dict[int, str]],
                examples_list: list[tuple[tuple[str, int], tuple[str, int], bool]]) -> list[tuple[Image.Image, Image.Image]]:
  """
  Loads the images given to memory in the following format:
  Returns 2 lists:
  list[loaded_image, loaded_image], list[is_same]
  """
  data_to_ret: list = list()
  labels_to_ret: list[bool] = list() # Returned labels, true if same person, false otherwise.
  transform = transforms.Compose([ # In case we need/want transform the inputs.
      transforms.Resize(IMAGE_SIZE),
      transforms.ToTensor(),
  ]) # Important to note, if the input is transformed, then it's normalized.

  for example in examples_list:
    first_person, first_image_index = example[0]
    second_person, second_image_index = example[1]
    is_same = example[2]
    first_image_path = images_file_paths_dict[first_person][first_image_index]
    second_image_path = images_file_paths_dict[second_person][second_image_index]
    first_image = Image.open(first_image_path)
    second_image = Image.open(second_image_path)
    if (first_image.mode != IMAGE_MODE) or (second_image.mode != IMAGE_MODE):
      raise ValueError("Images have different modes.")
    if (first_image.size != IMAGE_SIZE) and (second_image.size != IMAGE_SIZE):
        # Resizing instead of throwing error
        first_image = transform(first_image)
        second_image = transform(second_image)
    data_to_ret.append((first_image, second_image))
    labels_to_ret.append(is_same)
  return data_to_ret, labels_to_ret

training_data, training_labels = load_images(loaded_images, training_set)
validation_data, validation_labels = load_images(loaded_images, validation_set)
test_data, test_labels = load_images(loaded_images, test_set)

<font size="4">Converting images to array to allow the useage of pytorch dataloader</font>

In [None]:
def convert_images_to_array(image_tuple_list: list[tuple[Image.Image, Image.Image]]) -> list[np.ndarray]:
  returned_list: list[np.ndarray] = list()
  for first_image, second_image in image_tuple_list:
    arrayed_first_image = np.array(first_image)
    arrayed_second_image = np.array(second_image)
    normalized_first_image = arrayed_first_image / MAX_PIXEL_VALUE if arrayed_first_image.max() > 1 else arrayed_first_image
    normalized_second_image = arrayed_second_image / MAX_PIXEL_VALUE if arrayed_second_image.max() > 1 else arrayed_second_image
    returned_list.append((normalized_first_image, normalized_second_image))
  return returned_list

arrayed_training_data = convert_images_to_array(training_data)
arrayed_validation_data = convert_images_to_array(validation_data)
arrayed_test_data = convert_images_to_array(test_data)

In [None]:
class SiameseDataset(data.Dataset):
    def __init__(self, image_pairs: list[tuple], labels: list[int]):
        self.image_pairs = image_pairs
        self.labels = labels

    def __len__(self):
        return len(self.image_pairs)

    def __getitem__(self, idx):
        img1, img2 = self.image_pairs[idx]
        img1 = torch.tensor(np.array(img1), dtype=torch.float32)
        img2 = torch.tensor(np.array(img2), dtype=torch.float32)
        if len(img1.shape) == 2 or len(img2.shape) == 2: # Adding color channel in case original picture didn't have it.
            img1 = img1.unsqueeze(0)
            img2 = img2.unsqueeze(0)
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return img1, img2, label

training_dataset: data.Dataset = SiameseDataset(arrayed_training_data, training_labels)
validation_dataset: data.Dataset = SiameseDataset(validation_data, validation_labels)
test_dataset: data.Dataset = SiameseDataset(arrayed_test_data, test_labels)
training_loader: data.DataLoader = torch.utils.data.DataLoader(training_dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
validation_loader: data.DataLoader = torch.utils.data.DataLoader(validation_dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
test_loader: data.DataLoader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, pin_memory=True)

# Neural Network definition

<font size="6">Creating Network</font>

In [None]:
class ModularSiameseNetwork(nn.Module):
    def __init__(self,
                 first_conv_layer: tuple[int, int, int, torch.nn.modules.pooling.MaxPool2d | None],
                 other_layers: list[(int, int, torch.nn.modules.pooling.MaxPool2d | None)],
                 input_size: list[int, int, int], # channels, height, width
                 fully_connected_layer_size: int,
                 apply_batchnorm: bool = False):
        super().__init__()

        self.main_network_block = nn.Sequential()
        first_layer_in_channel, first_layer_out_channel, first_layer_kernel_size, first_layer_use_max_pool = first_conv_layer
        self.main_network_block.append(
            nn.Conv2d(in_channels=first_layer_in_channel,
                      out_channels=first_layer_out_channel,
                      kernel_size=first_layer_kernel_size)
        )
        if apply_batchnorm:
            self.main_network_block.append(
                nn.BatchNorm2d(first_layer_out_channel)
            )
        self.main_network_block.append(
            nn.ReLU(),
        )
        if first_layer_use_max_pool:
            self.main_network_block.append(
                first_layer_use_max_pool
            )
        prev_layer_output: int = first_layer_out_channel
        for current_layer in other_layers:
            current_layer_output_channels, current_layer_kernel_size, max_pool = current_layer
            self.main_network_block.append(
                nn.Conv2d(prev_layer_output,
                          current_layer_output_channels,
                          kernel_size=current_layer_kernel_size)
            )
            if apply_batchnorm:
                self.main_network_block.append(
                    nn.BatchNorm2d(current_layer_output_channels)
                )
            self.main_network_block.append(
                nn.ReLU()
            )
            if max_pool: # If maxpooling should be used.
              self.main_network_block.append(
                  max_pool
              )
            prev_layer_output = current_layer_output_channels

        with torch.no_grad(): # Dynamically calculate the size of the layer according to expected input.
            dummy = torch.zeros(1, *input_size)
            dummy_out = self.main_network_block(dummy)
    
            flattened_size = dummy_out.view(1, -1).size(1)

        self.fully_connected_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(flattened_size, fully_connected_layer_size,),
            nn.ReLU()
        )

        self.output_layer = nn.Sequential(
            nn.Linear(fully_connected_layer_size, 1),
            nn.Sigmoid()
        )

        # === Initialization ===
        with torch.no_grad():
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.normal_(m.weight, mean=0.0, std=1e-2)
                    nn.init.normal_(m.bias, mean=0.5, std=1e-2)
                elif isinstance(m, nn.Linear):
                    nn.init.normal_(m.weight, mean=0.0, std=1e-2)
                    nn.init.normal_(m.bias, mean=0.5, std=2e-1)

    def forward_once(self, input):
        network_block_output = self.main_network_block(input)
        fully_connected_layer_output = self.fully_connected_layer(network_block_output)
        return fully_connected_layer_output

    def forward(self, input1, input2):
        output1 = self.forward_once(input1)
        output2 = self.forward_once(input2)
        return self.output_layer(torch.abs(output1 - output2)) # L1 distance.

A class designed to encapsulate all the data and variables requires to run a training set as well as use intelligent scheduling.

In [None]:
class SiameseNetworkWithScheduler:
    def __init__(self, network: ModularSiameseNetwork, 
                 criterion: str,
                 optimizer_to_use: str = "SGD",
                 optimizer_params: dict | None = None,
                 scheduler_params: dict | None = None,
                 use_scheduler: bool = True,
                 print_summary: bool = True
                 ):
        self.inner_network = network.to(DEVICE_TO_USE)
        if criterion == "BCE":
            self.criterion = nn.BCELoss()
        else:
            raise ValueError(f"Criterion: {criterion} not supported")
        if print_summary:
            print(f'Optimizer to use: {optimizer_to_use}')
            print(f'Criterion to use: {criterion}')
            print(f'Initial_LR: {optimizer_params.get("initial_lr", DEFAULT_LR)}')
            print(f'L2_Regularization_Strength: {optimizer_params.get("l2_regularition_strength", DEFAULT_L2_REGULARIZATION_STRENGTH)}')
            summary(self.inner_network.main_network_block, (1, *IMAGE_SIZE), batch_size=128)
        if not optimizer_params:
            optimizer_params = dict()
        if not scheduler_params:
            scheduler_params = dict()
        initial_lr = optimizer_params.get("initial_lr", DEFAULT_LR)
        l2_regularition_strength = optimizer_params.get("l2_regularition_strength", DEFAULT_L2_REGULARIZATION_STRENGTH)
        self.optimizer_as_str = optimizer_to_use
        if optimizer_to_use == "SGD":
            momentum = optimizer_params.get("momentum", DEFAULT_MOMEMENTUM)
            optimizer = torch.optim.SGD(
                network.parameters(),
                lr=initial_lr,             # learning rate
                momentum=momentum,       # momentum
                weight_decay=l2_regularition_strength  # L2 regularization
            )
        elif optimizer_to_use == "ADAM":
            optimizer = torch.optim.Adam(
                network.parameters(),
                lr=initial_lr,             # learning rate
                weight_decay=l2_regularition_strength  # L2 regularization
            )
        else:
            raise ValueError(f"Optimizer: {optimizer_to_use} not supported")
        self.optimizer = optimizer
        if use_scheduler == True:
            learning_rate_decay_func = scheduler_params.get("learning_rate_decay_func", DEFAULT_LEARNING_RATE_DECAY_FUNC)
            self.inner_scheduler = lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=learning_rate_decay_func)
        else:
            self.inner_scheduler = lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lambda epoch: 1) # Does nothing, by design to make the flow more coherent.
        self.inner_network.eval() # Making sure it's not mutated accidently.

    @classmethod
    def calculate_loss(cls, model, loss_criterion, input_1, input_2, targets):
        # Calculate loss for 1 example after passing it to device
        input1_in_device = input_1.to(DEVICE_TO_USE, non_blocking=True)
        input2_in_device = input_2.to(DEVICE_TO_USE, non_blocking=True)
        labels_in_device = targets.unsqueeze(1).to(DEVICE_TO_USE, non_blocking=True)
        output = model(input1_in_device, input2_in_device)
        loss = loss_criterion(output, labels_in_device)
        return loss

    def train(self,
              training_loader: data.DataLoader, 
              validation_loader: data.DataLoader, 
              use_early_stopping: bool = EARLY_STOP,
              patiance_factor: float = PATIANCE_FACTOR,
              patiance_in_epochs: int = ALLOWED_PATIANCE_ITERATIONS,
              max_epochs: int = MAX_EPOCHS_FOR_TRAINING
              ):
        training_loss_per_epoch: list[int] = list()
        validation_loss_per_epoch: list[int] = list()
        patiance_for_improvement: int = -1
        validation_loss: float  = 100000000.0
        early_stop_triggered: bool = False
        performance_degraded: bool = False
        best_model_state_dict: dict | None = None
        total_time_start = time.time()
        with torch.no_grad():
            running_validation_loss = 0 # Loss without any training
            for validation_input_1, validation_input_2, validation_targets in validation_loader:
                running_validation_loss += self.calculate_loss(self.inner_network, self.criterion, validation_input_1, validation_input_2, validation_targets).item()
            running_validation_loss = running_validation_loss / len(validation_loader)
            validation_loss_per_epoch.append(running_validation_loss)
            running_loss = 0.0
            for input1, input2, targets in training_loader:
                self.optimizer.zero_grad()
                loss = self.calculate_loss(self.inner_network, self.criterion, input1, input2, targets)
                running_loss += loss.item()
            avg_loss = running_loss / len(training_loader)
            training_loss_per_epoch.append(avg_loss)
        for epoch in range(max_epochs):
            if early_stop_triggered: 
                break
            self.inner_network.train()
            start_time = time.time()
            running_loss = 0.0
            for input1, input2, targets in training_loader:
                self.optimizer.zero_grad()
                loss = self.calculate_loss(self.inner_network, self.criterion, input1, input2, targets)
                running_loss += loss.item()
                loss.backward()
                self.optimizer.step()
            if use_early_stopping:
                with torch.no_grad():
                    running_validation_loss = 0
                    for validation_input_1, validation_input_2, validation_targets in validation_loader:
                        running_validation_loss += self.calculate_loss(self.inner_network, self.criterion, validation_input_1, validation_input_2, validation_targets).item()
                    running_validation_loss = running_validation_loss / len(validation_loader)
                    validation_loss_per_epoch.append(running_validation_loss)
                    if running_validation_loss - validation_loss < patiance_factor:
                        validation_loss = running_validation_loss
                        best_model_state_dict = copy.deepcopy(self.inner_network.state_dict())
                        patiance_for_improvement = -1
                        performance_degraded = False
                    else:
                        patiance_for_improvement += 1
                        performance_degraded = True
                    if patiance_for_improvement >= patiance_in_epochs and use_early_stopping:
                        print("Early stopping due to no improvement was triggered.")
                        early_stop_triggered = True
            self.inner_scheduler.step()
            avg_loss = running_loss / len(training_loader)
            training_loss_per_epoch.append(avg_loss)
            elapsed_time = time.time() - start_time
            self.inner_network.eval()
            
            print(f"Epoch [{epoch+1}/{max_epochs}] completed in {elapsed_time:.2f}s, Average Training Loss: {avg_loss:.4f}, Average Validation Loss: {running_validation_loss}")
            if performance_degraded:
                print(f"No significant improvement detected (At least {patiance_factor}). Patiance factor:{patiance_for_improvement + 1}/{patiance_in_epochs}")
            if early_stop_triggered and use_early_stopping:
                print("Ending training early since no validation improvement triggered.")
                break

        # Loads the best model in the previous step since the model's performance might've degraded since the patiance factor.
        if best_model_state_dict:
            self.inner_network.load_state_dict(best_model_state_dict)
            self.inner_network = self.inner_network.to(DEVICE_TO_USE)
        self.inner_network.eval()
        print(f"Finished training in: {time.time() - total_time_start}s.")
        return training_loss_per_epoch, validation_loss_per_epoch
    def eval(self, input1, input2):
        self.inner_network.eval()
        with torch.no_grad():
            return self.inner_network(
                input1.to(DEVICE_TO_USE, non_blocking=True),
                input2.to(DEVICE_TO_USE, non_blocking=True)
            )
    

<font size="4">A function meant to test the model by running the test data.</font>

In [None]:
def testing_the_model(
        model_to_test: SiameseNetworkWithScheduler,
        test_loader: data.DataLoader,
        number_of_samples_to_save: int = 5
        ) -> tuple[list, list]:
    test_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    correct_positive_examples: list = list()
    correct_negative_examples: list = list()
    incorrect_positive_examples: list = list()
    incorrect_negative_examples: list = list()
    positive_examples: list = (correct_positive_examples, correct_negative_examples)
    negative_examples: list = (incorrect_positive_examples, incorrect_negative_examples)
    with torch.no_grad():
        for test_input1, test_input2, test_target in test_loader:
               test_input1, test_input2, test_target = \
               test_input1.to(DEVICE_TO_USE, non_blocking=True), \
               test_input2.to(DEVICE_TO_USE, non_blocking=True), \
               test_target.unsqueeze(1).to(DEVICE_TO_USE, non_blocking=True)
               same_face: bool = True if test_target.sum() >= 0.5 else False
               test_output = model_to_test.eval(test_input1, test_input2)
               batch_loss = model_to_test.criterion(test_output, test_target)
               test_loss += batch_loss.item()
               test_output_as_float: float = test_output.sum()
               predictions = test_output > 0.5
               correct = (predictions.float() == test_target).sum().item()
               append_to: list | None = None
               if correct:
                   if same_face:
                        if len(correct_positive_examples) < number_of_samples_to_save:
                             append_to = correct_positive_examples
                   else:
                        if len(correct_negative_examples) < number_of_samples_to_save:
                             append_to = correct_negative_examples
               else:
                   if same_face:
                        if len(incorrect_positive_examples) < number_of_samples_to_save:
                             append_to = incorrect_positive_examples
                   else:
                        if len(incorrect_negative_examples) < number_of_samples_to_save:
                             append_to = incorrect_negative_examples
               if append_to is not None:
                    append_to.append((test_input1, test_input2, test_output_as_float))
               correct_predictions += correct
               total_predictions += test_target.size(0)
    test_loss /= len(test_loader)
    accuracy = correct_predictions / total_predictions
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy * 100:.2f}%")
    return positive_examples, negative_examples

Display an array as a picture. 

In [None]:
def display_data_loaded_to_loader(
        test_example_1, 
        test_example_2, 
        prediction: float = -1, 
        expected_result: float = -1):
    cpu_input1 = test_example_1.cpu()
    cpu_input2 = test_example_2.cpu()

    img1_np = cpu_input1[0].squeeze(0).numpy()
    img2_np = cpu_input2[0].squeeze(0).numpy()

    # Plotting
    fig, axs = plt.subplots(1, 2)
    axs[0].imshow(img1_np, cmap='gray')
    axs[0].set_title('Image 1')
    axs[0].axis('off')

    axs[1].imshow(img2_np, cmap='gray')
    axs[1].set_title('Image 2')
    axs[1].axis('off')
    text = f"Prediction: {prediction:.3f}, "
    text += f"Ground Truth: {expected_result:.3f}"

    fig.suptitle(text, fontsize=14, y=0.95)
    plt.tight_layout()
    plt.show()

In [None]:
def display_graphs_for_loss(training_loss_per_epoch: list[float], validation_loss_per_epoch: list[float]):
    plt.title("Training and validation loss over epochs")
    plt.plot(training_loss_per_epoch[:], label="Training loss")
    plt.plot(validation_loss_per_epoch[:], label="Validation loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()

<font size="6">Architecture Testing</font>

<font size="5">Testing and evaluating SGD optimizer with batchnorm</font>

<font size="4">Initializing</font>

In [None]:
if IMAGE_SIZE == (105, 105):
    model = ModularSiameseNetwork(
        first_conv_layer=(1, 64, 10, nn.MaxPool2d(2)),
        other_layers=[(128, 7, nn.MaxPool2d(2)),
                    (128, 4, nn.MaxPool2d(2)), 
                    (256, 4, None)], 
        input_size=[1, *IMAGE_SIZE],
        fully_connected_layer_size=4096,
        apply_batchnorm=True
    )
elif IMAGE_SIZE == (175, 175):
    model = ModularSiameseNetwork(
        first_conv_layer=(1, 64, 20, nn.MaxPool2d(2)),
        other_layers=[(64, 10, nn.MaxPool2d(2)),
                    (128, 7, nn.MaxPool2d(2)),
                    (128, 4, nn.MaxPool2d(2)), 
                    (256, 4, None)], 
        input_size=[1, *IMAGE_SIZE],
        fully_connected_layer_size=4096,
        apply_batchnorm=True
    )

complete_sgd_network = SiameseNetworkWithScheduler(
    network=model,
    criterion="BCE",
    optimizer_to_use="SGD",
    optimizer_params={'initial_lr': 1e-2}
)

<font size="4">Training</font>

In [None]:
training_loss_per_epoch, validation_loss_per_epoch = complete_sgd_network.train(
    training_loader=training_loader,
    validation_loader=validation_loader,
)

<font size="4">Evaluation</font>

In [None]:
positive_examples, negative_examples = testing_the_model(complete_sgd_network, test_loader, number_of_samples_to_save=4)

In [None]:
display_graphs_for_loss(training_loss_per_epoch, validation_loss_per_epoch)

In [None]:
for input1, input2, prediction in positive_examples[0]:
    display_data_loaded_to_loader(input1, input2, prediction, expected_result=1)
for input1, input2, prediction in positive_examples[1]:
    display_data_loaded_to_loader(input1, input2, prediction, expected_result=0)

In [None]:
for input1, input2, prediction in negative_examples[0]:
    display_data_loaded_to_loader(input1, input2, prediction, expected_result=1)
for input1, input2, prediction in negative_examples[1]:
    display_data_loaded_to_loader(input1, input2, prediction, expected_result=0)

<font size="5">Testing and evaluating ADAM optimizer with batchnorm</font>

<font size="4">Initializing</font>

In [None]:
if IMAGE_SIZE == (105, 105):
    model = ModularSiameseNetwork(
        first_conv_layer=(1, 64, 10, nn.MaxPool2d(2)),
        other_layers=[(128, 7, nn.MaxPool2d(2)),
                    (128, 4, nn.MaxPool2d(2)), 
                    (256, 4, None)], 
        input_size=[1, *IMAGE_SIZE],
        fully_connected_layer_size=4096,
        apply_batchnorm=True
    )
elif IMAGE_SIZE == (175, 175):
    model = ModularSiameseNetwork(
        first_conv_layer=(1, 64, 20, nn.MaxPool2d(2)),
        other_layers=[(64, 10, nn.MaxPool2d(2)),
                    (128, 7, nn.MaxPool2d(2)),
                    (128, 4, nn.MaxPool2d(2)), 
                    (256, 4, None)], 
        input_size=[1, *IMAGE_SIZE],
        fully_connected_layer_size=4096,
        apply_batchnorm=True
    )
complete_adam_network = SiameseNetworkWithScheduler(
    network=model,
    criterion="BCE",
    optimizer_to_use="ADAM",
    optimizer_params={'initial_lr': 1e-4}
)

<font size="4">Training</font>

In [None]:
training_loss_per_epoch, validation_loss_per_epoch = complete_adam_network.train(
    training_loader=training_loader,
    validation_loader=validation_loader,
)

<font size="4">Evaluation</font>

In [None]:
positive_examples, negative_examples = testing_the_model(complete_adam_network, test_loader, number_of_samples_to_save=4)

In [None]:
display_graphs_for_loss(training_loss_per_epoch, validation_loss_per_epoch)


In [None]:
for input1, input2, prediction in positive_examples[0]:
    display_data_loaded_to_loader(input1, input2, prediction, expected_result=1)
for input1, input2, prediction in positive_examples[1]:
    display_data_loaded_to_loader(input1, input2, prediction, expected_result=0)


In [None]:
for input1, input2, prediction in negative_examples[0]:
    display_data_loaded_to_loader(input1, input2, prediction, expected_result=1)
for input1, input2, prediction in negative_examples[1]:
    display_data_loaded_to_loader(input1, input2, prediction, expected_result=0)