In [1]:
from constants import DatasetPath
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import timm

import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader

from skimage import io
from sklearn.model_selection import train_test_split

from tqdm.notebook import tqdm
import cv2
import os

In [2]:
# Main Dataset folder path
DATA_DIR = DatasetPath.effectivePath

# HYPERPARAMETRES

BATCH_SIZE = 128

LR = 0.001

EPOCHS = 50

# Device to run calculations on 
DEVICE = 'cuda'

In [3]:
df = pd.read_csv(DATA_DIR + "input.csv")

train_df, valid_df = train_test_split(df, test_size = 0.20, random_state = 42)
print(len(train_df))
print(len(valid_df))

16000
4000


In [4]:
# Class to provide useful functions and prepare the data for NN use 
class APN_Dataset(Dataset):
    """
    Uploads images from Dataset and set them up for Neural Network use.

    Parameters
    ----------
        Dataset (pd.DataFrame): the DataFrame containing the dataset.
    """
    
    def __init__(self, df):
        """
        Initializes the class with the DataFrame in input.

        Parameters
        ----------
            df (pd.DataFrame): the DataFrame containing the dataset.
        """
        self.df = df

    def __len__(self):
        """
        Calculates the number of tuples in the dataset.

        Returns
        -------
            (int): number of tuples in the dataset.
        """    
        return len(self.df)

    def __getitem__(self, idx):
        """
        Retrieves the images at the given row index.

        Parameters
        ----------
            idx (int): dataset row index.

        Returns
        -------
            A_img, P_img, N_img (ndarray): the anchor, positive and negative images at row[idx] 
        """
        row = self.df.iloc[idx]

        A_img = io.imread(DATA_DIR + row.anchor)
        P_img = io.imread(DATA_DIR + row.positive)
        N_img = io.imread(DATA_DIR + row.negative)

        #Permute because the third channel has to be in first channel in torch

        #A_img = torch.from_numpy(A_img).permute(2, 0, 1) / 255.0
        #P_img = torch.from_numpy(P_img).permute(2, 0, 1) / 255.0
        #N_img = torch.from_numpy(N_img).permute(2, 0, 1) / 255.0


        A_img = np.expand_dims(A_img, 0)
        P_img = np.expand_dims(P_img, 0)
        N_img = np.expand_dims(N_img, 0)

        A_img = torch.from_numpy(A_img) / 255.0
        P_img = torch.from_numpy(P_img) / 255.0
        N_img = torch.from_numpy(N_img) / 255.0

        #A_img = torch.from_numpy(A_img.astype(np.int32)) / 65536.0
        #P_img = torch.from_numpy(P_img.astype(np.int32)) / 65536.0
        #N_img = torch.from_numpy(N_img.astype(np.int32)) / 65536.0

        return A_img, P_img, N_img

In [5]:
# APN_Dataset objects creation

trainset = APN_Dataset(train_df)
validset = APN_Dataset(valid_df)

print(f"Size of trainset: {len(trainset)}")
print(f"Size of validset: {len(validset)}")

Size of trainset: 16000
Size of validset: 4000


In [6]:
# DataLoader creation to handle loading data into memory

trainloader = DataLoader(trainset, batch_size = BATCH_SIZE, shuffle = True)
validloader = DataLoader(validset, batch_size = BATCH_SIZE)

print(f"No. of batches in trainloader : {len(trainloader)}")
print(f"No. of batches in validloader : {len(validloader)}")

No. of batches in trainloader : 250
No. of batches in validloader : 63


In [7]:
class APN_Model(nn.Module):
    """
    Defines a neural network model class APN_Model that uses an EfficientNet (specifically the B0 version) as its backbone.
    """

    def __init__(self, emb_size = 512):
        """
        Initializes the APN_Model with a specific model and a classifier that outputs embedding vector of the specified size.

        Parameters
        ----------
        emb_size (int, optional): the size of the output embedding vector (default is 512).
        """
        super(APN_Model, self).__init__()

        # Define the model to use 
        self.efficientnet = timm.create_model('tf_efficientnetv2_b0', pretrained = False)
        
        # Replace the classifier layer with a linear layer that outputs embeddings of size `emb_size`
        self.efficientnet.classifier = nn.Linear(in_features=self.efficientnet.classifier.in_features, out_features = emb_size)

    def forward(self, images):
        """
        Performs the forward pass of the model, which takes a batch of images and returns their embeddings.

        Parameters
        ----------
            images (torch.Tensor): a batch of images to process.

        Returns
        -------
            embeddings (torch.Tensor): a batch of embeddings of size `emb_size`.
        """
        embeddings = self.efficientnet(images)
        return embeddings

In [None]:
# NN mod to accept greyscale fourier spectrum images instead of RGB

model = APN_Model()
model.efficientnet.conv_stem = nn.Conv2d(1, 32, 3, 2, 1, bias=False);

model.to(DEVICE);

In [10]:
def train_fn(model, dataloader, optimizer, criterion):
    """
    Trains the model for one epoch using the provided dataloader, optimizer, and criterion.

    Parameters
    ----------
        model (torch.nn.Module): the neural network model to be trained.
        dataloader (torch.utils.data.DataLoader): the dataloader providing batches of anchor, positive, and negative image triplets.
        optimizer (torch.optim.Optimizer): the optimizer used to update the model's parameters.
        criterion (function): the loss function used to compute the loss between the model's outputs.

    Returns
    -------
        (float) : the average training loss over all batches.
    """

    # Sets the model to training mode
    model.train() #ON Dropout
    total_loss = 0.0

    # Iterates over the batches of data provided by the dataloader.
    # Each batch consists of triplets: anchor (A), positive (P), and negative (N) images.
    for A, P, N in tqdm(dataloader):
        # Moves the anchor, positive, and negative images to the specified device.
        A, P, N = A.to(DEVICE), P.to(DEVICE), N.to(DEVICE)

        # Passes the anchor, positive, and negative images through the model to obtain their embeddings.
        A_embs = model(A)
        P_embs = model(P)
        N_embs = model(N)

        # Computes the loss using the specified loss function
        loss = criterion(A_embs, P_embs, N_embs)

        # Clears the gradients of all optimized parameters.
        # Important to prevent accumulation of gradients from multiple forward passes.
        optimizer.zero_grad()

        # Computes the gradient of the loss with respect to the model's parameters (backpropagation).
        loss.backward()
        
        # Updates the model's parameters based on the computed gradients.
        optimizer.step()

        # Updates toal loss at the end of each iteration
        total_loss += loss.item()

    return total_loss / len(dataloader)

In [11]:
def eval_fn(model, dataloader, criterion):
    """
    Evaluates the model using the provided dataloader and criterion.

    Parameters
    ----------
        model (torch.nn.Module): the neural network model to be evaluated.
        dataloader (torch.utils.data.DataLoader): the dataloader providing batches of anchor, positive, and negative image triplets.
        criterion (function): the loss function used to compute the loss between the model's outputs.

    Returns
    -------
        (float): the average validation loss over all batches.
    """
    
    # Sets the model to evaluation mode
    model.eval() #OFF Dropout
    total_loss = 0.0

    # Disables gradient calculation.
    # Important to reduce memory usage and speed up computations since gradients are not needed.
    with torch.no_grad():
        # Iterates over the batches of data provided by the dataloader.
        for A, P, N in tqdm(dataloader):
             # Moves the anchor, positive, and negative images to the specified device.
            A, P, N = A.to(DEVICE), P.to(DEVICE), N.to(DEVICE)

            # Passes the anchor, positive, and negative images through the model to obtain their embeddings.
            A_embs = model(A)
            P_embs = model(P)
            N_embs = model(N)

            # Computes the loss using the specified loss function
            loss = criterion(A_embs, P_embs, N_embs)

            # Updates toal loss at the end of each iteration
            total_loss += loss.item()

    return total_loss / len(dataloader)

In [12]:
# Definition of the loss function and the optimizer to utilize.

criterion = nn.TripletMarginLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = LR)

In [None]:
model_path = DATA_DIR + 'best_model.pt'
loss_path =  DATA_DIR + 'best_valid_loss.txt'

def save_checkpoint(best_valid_loss):
    """
    Saves best model along with its valid loss to a '.txt' file

    Parametres
    ----------
        best_valid_loss (float): the best valid loss calculated among all training sessions.
    """
    torch.save(model.state_dict(), model_path)
    with open(loss_path, 'w') as f:
        f.write(str(best_valid_loss))

    print("CHECKPOINT_SAVED")

def load_checkpoint():
    """
    Loads the best model and its valid loss from a '.txt' file. In case one of these can't be found, the training starts from scratch

    Returns
    -------
        best_valid_loss (float, np.Inf): if the best valid loss exists, returns a float; else returns an Infinite value
    """
    if os.path.exists(model_path) and os.path.exists(loss_path):
        print("CHECKPOINT_LOADED")
        model.load_state_dict(torch.load(model_path))
        with open(loss_path, 'r') as f:
            best_valid_loss = float(f.read())
            return best_valid_loss

    print("UNABLE_TO_LOAD_CHECKPOINT")
    return np.Inf

In [None]:
# Loads the best validation loss from a previously saved checkpoint.
# (This allows the training process to resume from the best state observed so far)
best_valid_loss = load_checkpoint()

# Loops over the specified number of epochs.
for i in range(EPOCHS):

    # Trains and evaluates the model
    train_loss = train_fn(model, trainloader, optimizer, criterion)
    valid_loss = eval_fn(model, validloader, criterion)

    # If the validation loss has improved, it saves the current state of the model and updates the best valid loss
    if valid_loss < best_valid_loss:
        save_checkpoint(valid_loss)
        best_valid_loss = valid_loss

    # Prints the stats relative to the current epoch.
    print(f"EPOCHS : {i+1} train_loss : {train_loss} valid_loss : {valid_loss}")

In [14]:
#QUESTA E' LA FUNZIONE PER GENERARE I VETTORI DI ENCODING
def get_encoding_csv(model, anc_img_path):
  anc_img_names_arr = np.array(anc_img_path)
  encodings = []

  model.eval()

  with torch.no_grad():
    for image in tqdm(anc_img_names_arr):
      A = io.imread(DATASET_PATH + image)
      #A = torch.from_numpy(A).permute(2, 0, 1) / 255.0
      A = np.expand_dims(A, 0)
      A = torch.from_numpy(A.astype(np.int32)) / 255.0
      A = A.to(DEVICE)
      A_enc = model(A.unsqueeze(0))
      encodings.append(A_enc.squeeze().cpu().detach().numpy())

    encodings = np.array(encodings)
    encodings = pd.DataFrame(encodings)
    df_enc = pd.concat([anc_img_path, encodings], axis = 1)

    return df_enc

In [15]:
#QUI RICARICO IL MODELLO UNA VOLTA TRAINATO
model.load_state_dict(torch.load(DATASET_PATH + 'best_model.pt'))

#QUI CREO IL DATABASE DI FEATURE VECTORS DEL TRAINING SET
df_enc = get_encoding_csv(model, df['anchor'])

  0%|          | 0/20000 [00:00<?, ?it/s]

In [None]:
df_enc.to_csv('database.csv', index = False)

df_enc = pd.read_csv(DATASET_PATH + 'embedded.csv')
df_enc.head()

In [18]:
def euclidean_dist(img_enc, anc_enc_arr):
    #dist = np.s qrt(np.dot(img_enc-anc_enc_arr, (img_enc- anc_enc_arr).T))
    dist = np.dot(img_enc-anc_enc_arr, (img_enc- anc_enc_arr).T)
    #dist = np.sqrt(dist)
    return dist

In [16]:
def getImageEmbeddings(img, model):

    img = np.expand_dims(img, 0)
    img = torch.from_numpy(img) / 255
    model.eval()

    with torch.no_grad():
        img = img.to(DEVICE)
        img_enc = model(img.unsqueeze(0))
        img_enc = img_enc.detach().cpu().numpy()
        img_enc = np.array(img_enc)

    return img_enc

In [17]:
def searchInDatabase(img_enc, database):
    anc_enc_arr = database.iloc[:, 1:].to_numpy()
    anc_img_names = database['Anchor']

    distance = []
    for i in range(anc_enc_arr.shape[0]):
        dist = euclidean_dist(img_enc, anc_enc_arr[i : i+1, :])
        distance = np.append(distance, dist)

    closest_idx = np.argsort(distance)

    return database['Anchor'][closest_idx[0]]