In [1]:
from constants import DatasetPath
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import timm

import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader

from skimage import io
from sklearn.model_selection import train_test_split

from tqdm.notebook import tqdm
import cv2
import os

In [2]:
DATASET_PATH = DatasetPath.effectivePath
INPUT_CSV_FILE = DATASET_PATH + "input.csv"

with open(DATASET_PATH + 'best_valid_loss.txt', 'w') as file:
    file.write(str(np.inf))

#ATTENZIONE, IL BATCH SIZE VA REGOLATO IN BASE ALLA DISPONIBILITA' DELLA VOSTRA MEMORIA. TENETE CONTO CHE IO HO 128 CON 6GB DI MEMORIA GPU
BATCH_SIZE = 32

#ATTENZIONE, IL LEARNING RATE ANCHE E' UN ALTRO IPER PARAMETRO IO HO FATTO LE PROVE SONO CON QUESTO.
LR = 0.001

#ATTENZIONE, L'EPOCHE DI TRAINING E' UN ALTRO IPERPARAMETRO IO PER ORA L'HO SETTATO COSI, MA SI PUò AUMENTARE O DIMINUIRE
EPOCHS = 5

DEVICE = 'cuda'

In [14]:
df = pd.read_csv(INPUT_CSV_FILE)

train_df, valid_df = train_test_split(df, test_size = 0.20, random_state = 42)
print(len(train_df))
print(len(valid_df))
print(df.shape)

16000
4000
(20000, 3)


In [4]:
# FUNZIONE PER CARICARE LE IMMAGINI DAL FILE CSV E SETTARLE NEL MODO CORRETO PER ESSERE DATE COME INPUT ALLA RETE NEURALE
class APN_Dataset(Dataset):

  def __init__(self, df):
    self.df = df

  def __len__(self):
    return len(self.df)

  def __getitem__(self, idx):
    row = self.df.iloc[idx]

    A_img = io.imread(DATASET_PATH + row.anchor)
    P_img = io.imread(DATASET_PATH + row.positive)
    N_img = io.imread(DATASET_PATH + row.negative)

    #Permute because the third channel has to be in first channel in torch

    #A_img = torch.from_numpy(A_img).permute(2, 0, 1) / 255.0
    #P_img = torch.from_numpy(P_img).permute(2, 0, 1) / 255.0
    #N_img = torch.from_numpy(N_img).permute(2, 0, 1) / 255.0


    A_img = np.expand_dims(A_img, 0)
    P_img = np.expand_dims(P_img, 0)
    N_img = np.expand_dims(N_img, 0)

    A_img = torch.from_numpy(A_img) / 255.0
    P_img = torch.from_numpy(P_img) / 255.0
    N_img = torch.from_numpy(N_img) / 255.0

    #A_img = torch.from_numpy(A_img.astype(np.int32)) / 65536.0
    #P_img = torch.from_numpy(P_img.astype(np.int32)) / 65536.0
    #N_img = torch.from_numpy(N_img.astype(np.int32)) / 65536.0

    return A_img, P_img, N_img

In [5]:
trainset = APN_Dataset(train_df)
validset = APN_Dataset(valid_df)

print(f"Size of trainset: {len(trainset)}")
print(f"Size of validset: {len(validset)}")

Size of trainset: 16000
Size of validset: 4000


In [6]:
trainloader = DataLoader(trainset, batch_size = BATCH_SIZE, shuffle = True)
validloader = DataLoader(validset, batch_size = BATCH_SIZE)

print(f"No. of batches in trainloader : {len(trainloader)}")
print(f"No. of batches in validloader : {len(validloader)}")

No. of batches in trainloader : 500
No. of batches in validloader : 125


In [7]:
#FUNZIONE PER CARICARE IL MODELLO DI RETE NEURALE DIRETTAMENTE DALLE REPOSITORY ONLINE
class APN_Model(nn.Module):

    #QUI DEFINISCO LA SIZE DEL VETTORE DI EMBEDDING
  def __init__(self, emb_size = 512):
    super(APN_Model, self).__init__()

    #QUI CAIRCATE IL MODELLO, IN QUESTO CASO EFFICIENTNET VERSIONE B0 (LA PIù LEGGERA DELLA FAMIGLIA)
    self.efficientnet = timm.create_model('tf_efficientnetv2_b0', pretrained = False)
    self.efficientnet.classifier = nn.Linear(in_features=self.efficientnet.classifier.in_features, out_features = emb_size)

  def forward(self, images):
    embeddings = self.efficientnet(images)
    return embeddings

In [None]:
#QUI FATE UNA PICCOLA MODIFICA ALLA RETE PER FARLE AVERE IN INPUT IMMAGINI IN SCALA DI GRIGIO DELLO SPETTRO DI FOURIER
model = APN_Model()
model.efficientnet.conv_stem = nn.Conv2d(1, 32, 3, 2, 1, bias=False)

model.to(DEVICE)

In [9]:
#FUNZIONE DI TRAINING
def train_fn(model, dataloader, optimizer, criterion):
  model.train() #ON Dropout
  total_loss = 0.0
  
  for A, P, N in tqdm(dataloader, desc="Training Progress"):
    A, P, N = A.to(DEVICE), P.to(DEVICE), N.to(DEVICE)
    A_embs = model(A)
    P_embs = model(P)
    N_embs = model(N)
    loss = criterion(A_embs, P_embs, N_embs)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_loss += loss.item()

  return total_loss / len(dataloader)

In [10]:
#FUNZIONE DI EVALUATION
def eval_fn(model, dataloader, criterion):
  model.eval() #OFF Dropout
  total_loss = 0.0

  with torch.no_grad():
    for A, P, N in tqdm(dataloader, desc = "Validation progress"):
      A, P, N = A.to(DEVICE), P.to(DEVICE), N.to(DEVICE)

      A_embs = model(A)
      P_embs = model(P)
      N_embs = model(N)

      loss = criterion(A_embs, P_embs, N_embs)

      total_loss += loss.item()

    return total_loss / len(dataloader)

In [11]:
#QUI UTILIZZATE LA TRIPLET LOSS E COME OTTIMIZZATORE ADAM. PER IL MOMENTO VI SCONSIGLIO DI CAMBIARE ADAM
criterion = nn.TripletMarginLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = LR)

In [None]:
model_path = DATASET_PATH + 'best_model.pt'
loss_path =  DATASET_PATH + 'best_valid_loss.txt'

def save_checkpoint(best_valid_loss):
  """
  Saves best model along with its valid loss to a '.txt' file

  Parametres
  ----------
    best_valid_loss (float): the best valid loss calculated among all training sessions.
  """
  torch.save(model.state_dict(), model_path)
  with open(loss_path, 'w') as f:
    f.write(str(best_valid_loss))
  
  print("CHECKPOINT_SAVED")


def load_checkpoint():
  """
  Loads the best model and its valid loss from a '.txt' file. In case one of these can't be found, the training starts from scratch

  Returns
  -------
    best_valid_loss (float, np.Inf): if the best valid loss exists, returns a float; else returns an Infinite value
  """
  if os.path.exists(model_path) and os.path.exists(loss_path):
    print("CHECKPOINT_LOADED")
    model.load_state_dict(torch.load(model_path))
    with open(loss_path, 'r') as f:
      best_valid_loss = float(f.read())
      return best_valid_loss
  
  print("UNABLE_TO_LOAD_CHECKPOINT")
  return np.Inf

In [13]:
#QUI FATE IL TRAINING VERO E PROPRIO UTILIZZANDO TUTTE LE FUNZIONI CHE AVETE VISTO IN PRECEDENZA
best_valid_loss = load_checkpoint()

for i in range(EPOCHS):

  train_loss = train_fn(model, trainloader, optimizer, criterion)
  valid_loss = eval_fn(model, validloader, criterion)

  if valid_loss < best_valid_loss:
    save_checkpoint(valid_loss)
    best_valid_loss = valid_loss

  print(f"EPOCHS : {i+1} train_loss : {train_loss} valid_loss : {valid_loss}")

Training Progress:   0%|          | 0/500 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [15]:

best_valid_loss = np.Inf

for i in range(EPOCHS):

    train_loss = train_fn(model, trainloader, optimizer, criterion)
    valid_loss = eval_fn(model, validloader, criterion)

    if valid_loss < best_valid_loss:
        torch.save(model.state_dict(), DATASET_PATH + 'best_model.pt')
        best_valid_loss = valid_loss
        print("SAVED_WEIGHTS_SUCCESS")

    print(f"EPOCHS : {i+1} train_loss : {train_loss} valid_loss : {valid_loss}")

Training Progress:   0%|          | 0/500 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
#QUESTA E' LA FUNZIONE PER GENERARE I VETTORI DI ENCODING
def get_encoding_csv(model, anc_img_path):
  anc_img_names_arr = np.array(anc_img_path)
  encodings = []

  model.eval()

  with torch.no_grad():
    for image in tqdm(anc_img_names_arr):
      A = io.imread(DATASET_PATH + image)
      #A = torch.from_numpy(A).permute(2, 0, 1) / 255.0
      A = np.expand_dims(A, 0)
      A = torch.from_numpy(A.astype(np.int32)) / 255.0
      A = A.to(DEVICE)
      A_enc = model(A.unsqueeze(0))
      encodings.append(A_enc.squeeze().cpu().detach().numpy())

    encodings = np.array(encodings)
    encodings = pd.DataFrame(encodings)
    df_enc = pd.concat([anc_img_path, encodings], axis = 1)

    return df_enc

In [None]:
#QUI RICARICO IL MODELLO UNA VOLTA TRAINATO
model.load_state_dict(torch.load(DATASET_PATH + 'best_model.pt'))

#QUI CREO IL DATABASE DI FEATURE VECTORS DEL TRAINING SET
df_enc = get_encoding_csv(model, df['anchor'])

  0%|          | 0/20000 [00:00<?, ?it/s]

In [None]:
df_enc.to_csv('database.csv', index = False)

df_enc = pd.read_csv(DATASET_PATH + 'embedded.csv')
df_enc.head()

In [None]:
def euclidean_dist(img_enc, anc_enc_arr):
    #dist = np.s qrt(np.dot(img_enc-anc_enc_arr, (img_enc- anc_enc_arr).T))
    dist = np.dot(img_enc-anc_enc_arr, (img_enc- anc_enc_arr).T)
    #dist = np.sqrt(dist)
    return dist

In [None]:
def getImageEmbeddings(img, model):

    img = np.expand_dims(img, 0)
    img = torch.from_numpy(img) / 255
    model.eval()

    with torch.no_grad():
        img = img.to(DEVICE)
        img_enc = model(img.unsqueeze(0))
        img_enc = img_enc.detach().cpu().numpy()
        img_enc = np.array(img_enc)

    return img_enc

In [None]:
def searchInDatabase(img_enc, database):
    anc_enc_arr = database.iloc[:, 1:].to_numpy()
    anc_img_names = database['anchor']

    distance = []
    for i in range(anc_enc_arr.shape[0]):
        dist = euclidean_dist(img_enc, anc_enc_arr[i : i+1, :])
        distance = np.append(distance, dist)

    closest_idx = np.argsort(distance)

    return database['anchor'][closest_idx[0]]