In [2]:
import os
import sys
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, PredefinedSplit, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from torchvision.models import resnet50
from torch.utils.data import DataLoader
from torch.utils.data import DataLoader, TensorDataset

from sklearn.model_selection import StratifiedKFold

# Add project path
project_path = os.path.abspath("../code")
sys.path.append(project_path)
from vipm_features import ResNet50FeatureExtractor

project_path = os.path.abspath("../code")  # Adatta il percorso a dove si trova il tuo progetto
sys.path.append(project_path)
project_path = os.path.abspath("../networks")  # Adatta il percorso a dove si trova il tuo progetto
sys.path.append(project_path)
from models import *
from vipm_features import *
import vipm_costants as CONST
from vipm_pipeline import *
from dataset import *
import torch

# Configure environment variable
LOKY_MAX_CPU_COUNT = 16


In [6]:
# Carica il file CSV
def load_csv(csv_path):
    data = pd.read_csv(csv_path, header=None, names=['image_name', 'label'])
    return data['image_name'].tolist(), data['label'].tolist()

# Carica il file compresso NPZ
def load_features(npz_path):
    data = np.load(npz_path)
    features = data['features']
    labels = data['labels']
    return features, labels

# Percorsi
csv_path_70 = '../dataset/filtered_small_with_cleaned_retrieval_augmented_70_labels.csv'
csv_path_80 = '../dataset/filtered_small_with_cleaned_retrieval_augmented_80_labels.csv'
csv_path_90 = '../dataset/filtered_small_with_cleaned_retrieval_augmented_90_labels.csv'
csv_test = '../dataset/val_info.csv'

indir_70 = '../dataset/filtered_small_with_cleaned_retrieval_augmented_70'
indir_80 = '../dataset/filtered_small_with_cleaned_retrieval_augmented_80'
indir_90 = '../dataset/filtered_small_with_cleaned_retrieval_augmented_90'

test_dir = '../dataset/val_set'
test_degraded_dir = '../dataset/val_set_degraded'

outdir = '../features'  # Modifica in base alla posizione delle feature
os.makedirs(outdir, exist_ok=True)

# Carica le immagini dal CSV
extractor = ResNet50FeatureExtractor()

# Carica le immagini dal CSV 70
image_names_70, labels_70 = load_csv(csv_path_70)
labels_70 = np.array(labels_70)
features_70, _, _ = extractor.get_features(csv=csv_path_70, indir=indir_70, outdir=outdir, normalize=True)

# Carica le immagini dal CSV 80
image_names_80, labels_80 = load_csv(csv_path_80)
labels_80 = np.array(labels_80)
features_80, _, _ = extractor.get_features(csv=csv_path_80, indir=indir_80, outdir=outdir, normalize=True)

# Carica le immagini dal CSV 90
image_names_90, labels_90 = load_csv(csv_path_90)
labels_90 = np.array(labels_90)
features_90, _, _ = extractor.get_features(csv=csv_path_90, indir=indir_90, outdir=outdir, normalize=True)

# Test set 
image_names_test, labels_test = load_csv(csv_test)
labels_test = np.array(labels_test)
features_test, _, _ = extractor.get_features(csv=csv_test, indir=test_dir, outdir=outdir, normalize=True)

# Test set degraded
image_names_test_degraded, labels_test_degraded = load_csv(csv_test)
labels_test_degraded = np.array(labels_test_degraded)
features_test_degraded, _, _ = extractor.get_features(csv=csv_test, indir=test_degraded_dir, outdir=outdir, normalize=True, file_name='features_test_degraded_normalized.npz')

Caricamento delle feature da ../features\filtered_small_with_cleaned_retrieval_augmented_70_labels_resnet50_features_normalized.npz
Caricamento delle feature da ../features\filtered_small_with_cleaned_retrieval_augmented_80_labels_resnet50_features_normalized.npz
File non trovato. Estrazione delle feature...
Elaborazione immagine 1/11971 - Tempo rimanente stimato: 0.00 secondi
Elaborazione immagine 101/11971 - Tempo rimanente stimato: 309.57 secondi
Elaborazione immagine 201/11971 - Tempo rimanente stimato: 289.12 secondi
Elaborazione immagine 301/11971 - Tempo rimanente stimato: 284.93 secondi
Elaborazione immagine 401/11971 - Tempo rimanente stimato: 279.72 secondi
Elaborazione immagine 501/11971 - Tempo rimanente stimato: 277.06 secondi
Elaborazione immagine 601/11971 - Tempo rimanente stimato: 276.24 secondi
Elaborazione immagine 701/11971 - Tempo rimanente stimato: 276.30 secondi
Elaborazione immagine 801/11971 - Tempo rimanente stimato: 275.31 secondi
Elaborazione immagine 901/11

In [None]:
log_path = "results_log.csv"
log_columns = [
    "Dimension", "Model", "K", "Accuracy",  "Top-5 Accuracy",  "Top-10 Accuracy",  "Loss"
]
logs = []

# Combinazioni di dimensioni e feature
configurations = [
    {"dimension": 70, "features": features_70, "labels": [labels_70]},
    {"dimension": 80, "features": features_80, "labels": [labels_80]},
    {"dimension": 90, "features": features_90, "labels": [labels_90]}
]

def train_and_evaluate_neural_network_one_layer(X, y, X_test, y_test, X_test_degraded, y_test_degraded):
    one_layer_model = OneLayerNetwork(2048, 251)
    one_layer_optimizer = torch.optim.Adam(one_layer_model.parameters(), lr=0.01)
    one_layer_scheduler = torch.optim.lr_scheduler.StepLR(one_layer_optimizer, step_size=5, gamma=0.1)
    one_layer_model_option = ModelOptions(torch.nn.CrossEntropyLoss(), one_layer_optimizer, one_layer_scheduler, input_dim = 2048)
    nn = NeuralNetwork(one_layer_model, one_layer_model_option)
    
    # Split data seed 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)
    
    # Convert to torch tensors and create datasets
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.LongTensor(y_train)
    X_val_tensor = torch.FloatTensor(X_val)
    y_val_tensor = torch.LongTensor(y_val)
    
    X_test_tensor = torch.FloatTensor(X_test)
    y_test_tensor = torch.LongTensor(y_test)
    X_test_degraded_tensor = torch.FloatTensor(X_test_degraded)
    y_test_degraded_tensor = torch.LongTensor(y_test_degraded)
    
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    test_degraded_dataset = TensorDataset(X_test_degraded_tensor, y_test_degraded_tensor)
    
    train_loader = DataLoader(train_dataset,  batch_size=one_layer_model_option.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=one_layer_model_option.batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=one_layer_model_option.batch_size, shuffle=True)
    test_degraded_loader = DataLoader(test_degraded_dataset, batch_size=one_layer_model_option.batch_size, shuffle=True)

    nn.fit(train_loader, val_loader)
    loss, top1_accuracy, top5_accuracy, top10_accuracy, y_pred_top1, y_test = nn.predict(test_loader)
    loss_degraded, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, y_pred_top1_degraded, y_test_degraded = nn.predict(test_degraded_loader)
    
    return top1_accuracy, top5_accuracy, top10_accuracy, loss, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded


def train_and_evaluate_neural_network_classifer(X, y, X_test, y_test, X_test_degraded, y_test_degraded):
    one_layer_model = ClassifierNetwork(2048, 251)
    one_layer_optimizer = torch.optim.Adam(one_layer_model.parameters(), lr=0.01)
    one_layer_scheduler = torch.optim.lr_scheduler.StepLR(one_layer_optimizer, step_size=5, gamma=0.1)
    one_layer_model_option = ModelOptions(torch.nn.CrossEntropyLoss(), one_layer_optimizer, one_layer_scheduler, input_dim = 2048)
    nn = NeuralNetwork(one_layer_model, one_layer_model_option)
    
    # Split data seed 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)
    
    # Convert to torch tensors and create datasets
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.LongTensor(y_train)
    X_val_tensor = torch.FloatTensor(X_val)
    y_val_tensor = torch.LongTensor(y_val)
    
    X_test_tensor = torch.FloatTensor(X_test)
    y_test_tensor = torch.LongTensor(y_test)
    X_test_degraded_tensor = torch.FloatTensor(X_test_degraded)
    y_test_degraded_tensor = torch.LongTensor(y_test_degraded)
    
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    test_degraded_dataset = TensorDataset(X_test_degraded_tensor, y_test_degraded_tensor)
    
    train_loader = DataLoader(train_dataset,  batch_size=one_layer_model_option.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=one_layer_model_option.batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=one_layer_model_option.batch_size, shuffle=True)
    test_degraded_loader = DataLoader(test_degraded_dataset, batch_size=one_layer_model_option.batch_size, shuffle=True)

    nn.fit(train_loader, val_loader)
    loss, top1_accuracy, top5_accuracy, top10_accuracy, y_pred_top1, y_test = nn.predict(test_loader)
    loss_degraded, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, y_pred_top1_degraded, y_test_degraded = nn.predict(test_degraded_loader)
    
    return top1_accuracy, top5_accuracy, top10_accuracy, loss, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded


# Cross-validation 80-20 e test
for config in configurations:
    dim = config["dimension"]
    print(f"Iniziando configurazione per dimensione: {dim}")

    current_features = config["features"]
    current_labels = config["labels"][0]
    
    # Prova con diversi modelli
    print("  Prova con modello One Layer")  
    top1_accuracy, top5_accuracy, top10_accuracy, loss, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded = train_and_evaluate_neural_network_one_layer(current_features, current_labels, features_test, labels_test, features_test_degraded, labels_test_degraded)
    print(f"    Test Set One Layer: Accuracy={top1_accuracy}, Top-5 Accuracy={top5_accuracy}, Top-10 Accuracy={top10_accuracy}, Loss={loss}")
    print(f"    Test Set Degraded One Layer: Accuracy={top1_accuracy_degraded}, Top-5 Accuracy={top5_accuracy_degraded}, Top-10 Accuracy={top10_accuracy_degraded}, Loss={loss_degraded}")
    logs.append([dim, "Test Set One Layer", 0, top1_accuracy, top5_accuracy, top10_accuracy, loss])
    logs.append([dim, "Test Set Degraded One Layer", 0, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded]) 
    
    print("  Prova con modello Classifier")
    top1_accuracy, top5_accuracy, top10_accuracy, loss, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded = train_and_evaluate_neural_network_classifer(current_features, current_labels, features_test, labels_test, features_test_degraded, labels_test_degraded)
    print(f"    Test Set Classifier: Accuracy={top1_accuracy}, Top-5 Accuracy={top5_accuracy}, Top-10 Accuracy={top10_accuracy}, Loss={loss}")
    print(f"    Test Set Degraded Classifier: Accuracy={top1_accuracy_degraded}, Top-5 Accuracy={top5_accuracy_degraded}, Top-10 Accuracy={top10_accuracy_degraded}, Loss={loss_degraded}")
    logs.append([dim, "Test Set Classifier", 0, top1_accuracy, top5_accuracy, top10_accuracy, loss])
    logs.append([dim, "Test Set Degraded Classifier", 0, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded])
            
    # Prova con diversi valori di K
    for k in [5, 10, 25, 50, 100, 150]:
        print(f"    Valutazione per K={k}")

        # Validation set solo con feature della dimensione specifica
        knn = KNeighborsClassifier(n_neighbors=k)
        knn.fit(current_features, current_labels)

        # Test su test set
        top_5_predictions_test = np.argsort(knn.predict_proba(features_test), axis=1)[:, -5:]
        top_10_predictions_test = np.argsort(knn.predict_proba(features_test), axis=1)[:, -10:]
        acc_test = knn.score(features_test, labels_test)
        top_5_acc_test = np.mean([y in top_5 for y, top_5 in zip(labels_test, top_5_predictions_test)])
        top_10_acc_test = np.mean([y in top_10 for y, top_10 in zip(labels_test, top_10_predictions_test)])
        logs.append([dim, "Test Set KNN", k, acc_test, top_5_acc_test, top_10_acc_test])
        print(f"      Test Set KNN: Accuracy={acc_test}, Top-5 Accuracy={top_5_acc_test}, Top-10 Accuracy={top_10_acc_test}")

        # Test su test set degraded
        top_5_predictions_test_degraded = np.argsort(knn.predict_proba(features_test_degraded), axis=1)[:, -5:]
        top_10_predictions_test_degraded = np.argsort(knn.predict_proba(features_test_degraded), axis=1)[:, -10:]
        acc_test_degraded = knn.score(features_test_degraded, labels_test_degraded)
        top_5_acc_test_degraded = np.mean([y in top_5 for y, top_5 in zip(labels_test_degraded, top_5_predictions_test_degraded)])
        top_10_acc_test_degraded = np.mean([y in top_10 for y, top_10 in zip(labels_test_degraded, top_10_predictions_test_degraded)])
        logs.append([dim, "Test Set Degraded KNN", k, acc_test_degraded, top_5_acc_test_degraded, top_10_acc_test_degraded])
        print(f"      Test Set Degraded KNN: Accuracy={acc_test_degraded}, Top-5 Accuracy={top_5_acc_test_degraded}, Top-10 Accuracy={top_10_acc_test_degraded}")
                
# Salva i log in un CSV
log_df = pd.DataFrame(logs, columns=log_columns)
log_df.to_csv(log_path, index=False)
print(f"Log salvato in {log_path}")

Iniziando configurazione per dimensione: 70
  Prova con modello One Layer
Epoch 1/100:
  Train Loss: 5.5749, Train Accuracy: 0.77%
  Val Loss: 5.5238, Val Accuracy: 0.34%
Epoch 2/100:
  Train Loss: 5.1660, Train Accuracy: 1.58%
  Val Loss: 5.3921, Val Accuracy: 1.59%
Epoch 3/100:
  Train Loss: 4.8733, Train Accuracy: 2.57%
  Val Loss: 4.9475, Val Accuracy: 1.71%
Epoch 4/100:
  Train Loss: 4.6740, Train Accuracy: 3.64%
  Val Loss: 4.7922, Val Accuracy: 3.70%
Epoch 5/100:
  Train Loss: 4.4874, Train Accuracy: 5.34%
  Val Loss: 4.5034, Val Accuracy: 5.30%
Epoch 6/100:
  Train Loss: 4.3470, Train Accuracy: 6.22%
  Val Loss: 4.2989, Val Accuracy: 7.35%
Epoch 7/100:
  Train Loss: 4.3105, Train Accuracy: 7.27%
  Val Loss: 4.2284, Val Accuracy: 7.69%
Epoch 8/100:
  Train Loss: 4.2776, Train Accuracy: 7.45%
  Val Loss: 4.1911, Val Accuracy: 9.00%
Epoch 9/100:
  Train Loss: 4.2426, Train Accuracy: 7.76%
  Val Loss: 4.1684, Val Accuracy: 8.83%
Epoch 10/100:
  Train Loss: 4.2361, Train Accuracy: 7