In [4]:
import os
import sys
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, PredefinedSplit, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from torchvision.models import resnet50
from torch.utils.data import DataLoader
from torch.utils.data import DataLoader, TensorDataset

from sklearn.model_selection import StratifiedKFold

# Add project path
project_path = os.path.abspath("../code")
sys.path.append(project_path)
from vipm_features import ResNet50FeatureExtractor

project_path = os.path.abspath("../code")  # Adatta il percorso a dove si trova il tuo progetto
sys.path.append(project_path)
project_path = os.path.abspath("../networks")  # Adatta il percorso a dove si trova il tuo progetto
sys.path.append(project_path)
from models import *
from vipm_features import *
import vipm_costants as CONST
from vipm_pipeline import *
from dataset import *
import torch

# Configure environment variable
LOKY_MAX_CPU_COUNT = 16


In [2]:
# Carica il file CSV
def load_csv(csv_path):
    data = pd.read_csv(csv_path, header=None, names=['image_name', 'label'])
    return data['image_name'].tolist(), data['label'].tolist()

# Carica il file compresso NPZ
def load_features(npz_path):
    data = np.load(npz_path)
    features = data['features']
    labels = data['labels']
    return features, labels

# Percorsi
csv_path = '../dataset/train_small.csv'   
csv_unlabeled = '../dataset/train_unlabeled.csv'
csv_test = '../dataset/val_info.csv'

indir = '../dataset/train_set'  # Modifica in base alla posizione delle immagini
test_dir = '../dataset/val_set'
test_degraded_dir = '../dataset/val_set_degraded'

outdir = '../features'  # Modifica in base alla posizione delle feature
os.makedirs(outdir, exist_ok=True)

# Carica le features recuperate

# 20 
npz_path = os.path.join(outdir, 'features_unlabeled_retrived.npz')
features_20, labels_20 = load_features(npz_path)

# 20 cleaned
npz_path = os.path.join(outdir, 'features_unlabeled_retrived_cleaned.npz')
features_20_cleaned, labels_20_cleaned = load_features(npz_path)

# 40
npz_path = os.path.join(outdir, 'features_unlabeled_retrived_40.npz')
features_40, labels_40 = load_features(npz_path)

# 40 cleaned
npz_path = os.path.join(outdir, 'features_unlabeled_retrived_cleaned_40.npz')
features_40_cleaned, labels_40_cleaned = load_features(npz_path)

# 80
npz_path = os.path.join(outdir, 'features_unlabeled_retrived_80.npz')
features_80, labels_80 = load_features(npz_path)

# 80 cleaned
npz_path = os.path.join(outdir, 'features_unlabeled_retrived_cleaned_80.npz')
features_80_cleaned, labels_80_cleaned = load_features(npz_path)

# feature small 
npz_path = os.path.join(outdir, 'features_small_filtered.npz')
features_small_filtered, labels_small_filtered = load_features(npz_path)

# Carica le immagini dal CSV
image_names, labels_small = load_csv(csv_path)
labels_small = np.array(labels_small)
extractor = ResNet50FeatureExtractor()
features_small, _, _ = extractor.get_features(csv=csv_path, indir=indir, outdir=outdir, normalize=True)

# Test set 
image_names_test, labels_test = load_csv(csv_test)
labels_test = np.array(labels_test)
features_test, _, _ = extractor.get_features(csv=csv_test, indir=test_dir, outdir=outdir, normalize=True)

# Test set degraded
image_names_test_degraded, labels_test_degraded = load_csv(csv_test)
labels_test_degraded = np.array(labels_test_degraded)
features_test_degraded, _, _ = extractor.get_features(csv=csv_test, indir=test_degraded_dir, outdir=outdir, normalize=True, file_name='features_test_degraded_normalized.npz')

Caricamento delle feature da ../features\train_small_resnet50_features_normalized.npz
Caricamento delle feature da ../features\val_info_resnet50_features_normalized.npz
Caricamento delle feature da ../features\features_test_degraded_normalized.npz


In [5]:
log_path = "results_log.csv"
log_columns = [
    "Dimension", "Model", "K", "Accuracy",  "Top-5 Accuracy",  "Top-10 Accuracy",  "Loss"
]
logs = []

# Combinazioni di dimensioni e feature
configurations = [
    {"dimension": 20, "features": features_20, "labels": labels_20, "cleaned_features": features_20_cleaned, "cleaned_labels": labels_20_cleaned},
    {"dimension": 40, "features": features_40, "labels": labels_40, "cleaned_features": features_40_cleaned, "cleaned_labels": labels_40_cleaned},
    {"dimension": 80, "features": features_80, "labels": labels_80, "cleaned_features": features_80_cleaned, "cleaned_labels": labels_80_cleaned}
]

def train_and_evaluate_neural_network_one_layer(X, y, X_test, y_test, X_test_degraded, y_test_degraded):
    one_layer_model = OneLayerNetwork(2048, 251)
    one_layer_optimizer = torch.optim.Adam(one_layer_model.parameters(), lr=0.01)
    one_layer_scheduler = torch.optim.lr_scheduler.StepLR(one_layer_optimizer, step_size=5, gamma=0.1)
    one_layer_model_option = ModelOptions(torch.nn.CrossEntropyLoss(), one_layer_optimizer, one_layer_scheduler, input_dim = 2048)
    nn = NeuralNetwork(one_layer_model, one_layer_model_option)
    
    # Split data seed 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)
    
    # Convert to torch tensors and create datasets
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.LongTensor(y_train)
    X_val_tensor = torch.FloatTensor(X_val)
    y_val_tensor = torch.LongTensor(y_val)
    
    X_test_tensor = torch.FloatTensor(X_test)
    y_test_tensor = torch.LongTensor(y_test)
    X_test_degraded_tensor = torch.FloatTensor(X_test_degraded)
    y_test_degraded_tensor = torch.LongTensor(y_test_degraded)
    
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    test_degraded_dataset = TensorDataset(X_test_degraded_tensor, y_test_degraded_tensor)
    
    train_loader = DataLoader(train_dataset,  batch_size=one_layer_model_option.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=one_layer_model_option.batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=one_layer_model_option.batch_size, shuffle=True)
    test_degraded_loader = DataLoader(test_degraded_dataset, batch_size=one_layer_model_option.batch_size, shuffle=True)

    nn.fit(train_loader, val_loader)
    loss, top1_accuracy, top5_accuracy, top10_accuracy, y_pred_top1, y_test = nn.predict(test_loader)
    loss_degraded, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, y_pred_top1_degraded, y_test_degraded = nn.predict(test_degraded_loader)
    
    return top1_accuracy, top5_accuracy, top10_accuracy, loss, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded


def train_and_evaluate_neural_network_classifer(X, y, X_test, y_test, X_test_degraded, y_test_degraded):
    one_layer_model = ClassifierNetwork(2048, 251)
    one_layer_optimizer = torch.optim.Adam(one_layer_model.parameters(), lr=0.01)
    one_layer_scheduler = torch.optim.lr_scheduler.StepLR(one_layer_optimizer, step_size=5, gamma=0.1)
    one_layer_model_option = ModelOptions(torch.nn.CrossEntropyLoss(), one_layer_optimizer, one_layer_scheduler, input_dim = 2048)
    nn = NeuralNetwork(one_layer_model, one_layer_model_option)
    
    # Split data seed 42
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)
    
    # Convert to torch tensors and create datasets
    X_train_tensor = torch.FloatTensor(X_train)
    y_train_tensor = torch.LongTensor(y_train)
    X_val_tensor = torch.FloatTensor(X_val)
    y_val_tensor = torch.LongTensor(y_val)
    
    X_test_tensor = torch.FloatTensor(X_test)
    y_test_tensor = torch.LongTensor(y_test)
    X_test_degraded_tensor = torch.FloatTensor(X_test_degraded)
    y_test_degraded_tensor = torch.LongTensor(y_test_degraded)
    
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    test_degraded_dataset = TensorDataset(X_test_degraded_tensor, y_test_degraded_tensor)
    
    train_loader = DataLoader(train_dataset,  batch_size=one_layer_model_option.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=one_layer_model_option.batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=one_layer_model_option.batch_size, shuffle=True)
    test_degraded_loader = DataLoader(test_degraded_dataset, batch_size=one_layer_model_option.batch_size, shuffle=True)

    nn.fit(train_loader, val_loader)
    loss, top1_accuracy, top5_accuracy, top10_accuracy, y_pred_top1, y_test = nn.predict(test_loader)
    loss_degraded, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, y_pred_top1_degraded, y_test_degraded = nn.predict(test_degraded_loader)
    
    return top1_accuracy, top5_accuracy, top10_accuracy, loss, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded



# Test small come baseline
for small_cleaned in [False, True]:
    print(f"Small Cleaned: {small_cleaned}")

    if small_cleaned:
        small_features = features_small_filtered
        small_labels = labels_small_filtered
    else:
        small_features = features_small
        small_labels = labels_small

    print("  Prova con modello One Layer")  
    top1_accuracy, top5_accuracy, top10_accuracy, loss, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded = train_and_evaluate_neural_network_one_layer(small_features, small_labels, features_test, labels_test, features_test_degraded, labels_test_degraded)
    print(f"    Test Set One Layer: Accuracy={top1_accuracy}, Top-5 Accuracy={top5_accuracy}, Top-10 Accuracy={top10_accuracy}, Loss={loss}")
    print(f"    Test Set Degraded One Layer: Accuracy={top1_accuracy_degraded}, Top-5 Accuracy={top5_accuracy_degraded}, Top-10 Accuracy={top10_accuracy_degraded}, Loss={loss_degraded}")
    logs.append([0, "Test Set One Layer", 0, top1_accuracy, top5_accuracy, top10_accuracy, loss])
    logs.append([0, "Test Set Degraded One Layer", 0, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded]) 
    
    print("  Prova con modello Classifier")
    top1_accuracy, top5_accuracy, top10_accuracy, loss, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded = train_and_evaluate_neural_network_classifer(small_features, small_labels, features_test, labels_test, features_test_degraded, labels_test_degraded)
    print(f"    Test Set Classifier: Accuracy={top1_accuracy}, Top-5 Accuracy={top5_accuracy}, Top-10 Accuracy={top10_accuracy}, Loss={loss}")
    print(f"    Test Set Degraded Classifier: Accuracy={top1_accuracy_degraded}, Top-5 Accuracy={top5_accuracy_degraded}, Top-10 Accuracy={top10_accuracy_degraded}, Loss={loss_degraded}")
    logs.append([0, "Test Set Classifier", 0, top1_accuracy, top5_accuracy, top10_accuracy, loss])
    logs.append([0, "Test Set Degraded Classifier", 0, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded])

        
# Cross-validation 80-20 e test
for config in configurations:
    dim = config["dimension"]
    print(f"Iniziando configurazione per dimensione: {dim}")

    # Combinazioni di feature tra small e dimensione specifica
    for small_cleaned in [False, True]:
        for dim_cleaned in [False, True]:

            if small_cleaned:
                small_features = features_small_filtered
                small_labels = labels_small_filtered
            else:
                small_features = features_small
                small_labels = labels_small

            if dim_cleaned:
                current_features = config["cleaned_features"]
                current_labels = config["cleaned_labels"][0]
            else:
                current_features = config["features"]
                current_labels = config["labels"][0]

            print(f"  Small Cleaned: {small_cleaned}, Dim Cleaned: {dim_cleaned}")

            # Unione delle feature
            combined_features = np.concatenate((current_features, small_features), axis=0)
            combined_labels = np.concatenate((current_labels, small_labels), axis=0)
            
            
            print("  Prova con modello One Layer")
            top1_accuracy, top5_accuracy, top10_accuracy, loss, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded = train_and_evaluate_neural_network_one_layer(combined_features, combined_labels, features_test, labels_test, features_test_degraded, labels_test_degraded)
            print(f"    Test Set One Layer: Accuracy={top1_accuracy}, Top-5 Accuracy={top5_accuracy}, Top-10 Accuracy={top10_accuracy}, Loss={loss}")
            print(f"    Test Set Degraded One Layer: Accuracy={top1_accuracy_degraded}, Top-5 Accuracy={top5_accuracy_degraded}, Top-10 Accuracy={top10_accuracy_degraded}, Loss={loss_degraded}")
            logs.append([dim, "Test Set One Layer", 0, top1_accuracy, top5_accuracy, top10_accuracy, loss])
            logs.append([dim, "Test Set Degraded One Layer", 0, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded])
            
            print("  Prova con modello Classifier")
            top1_accuracy, top5_accuracy, top10_accuracy, loss, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded = train_and_evaluate_neural_network_classifer(combined_features, combined_labels, features_test, labels_test, features_test_degraded, labels_test_degraded)
            print(f"    Test Set Classifier: Accuracy={top1_accuracy}, Top-5 Accuracy={top5_accuracy}, Top-10 Accuracy={top10_accuracy}, Loss={loss}")
            print(f"    Test Set Degraded Classifier: Accuracy={top1_accuracy_degraded}, Top-5 Accuracy={top5_accuracy_degraded}, Top-10 Accuracy={top10_accuracy_degraded}, Loss={loss_degraded}")
            logs.append([dim, "Test Set Classifier", 0, top1_accuracy, top5_accuracy, top10_accuracy, loss])
            logs.append([dim, "Test Set Degraded Classifier", 0, top1_accuracy_degraded, top5_accuracy_degraded, top10_accuracy_degraded, loss_degraded])

# Salva i log in un CSV
log_df = pd.DataFrame(logs, columns=log_columns)
log_df.to_csv(log_path, index=False)
print(f"Log salvato in {log_path}")

Small Cleaned: False
  Prova con modello One Layer
Epoch 1/100:
  Train Loss: 5.6685, Train Accuracy: 0.51%
  Val Loss: 5.5236, Val Accuracy: 0.20%
Epoch 2/100:
  Train Loss: 5.5848, Train Accuracy: 0.66%
  Val Loss: 5.5281, Val Accuracy: 0.20%
Epoch 3/100:
  Train Loss: 5.4419, Train Accuracy: 1.31%
  Val Loss: 5.5281, Val Accuracy: 0.20%
Epoch 4/100:
  Train Loss: 5.3110, Train Accuracy: 1.44%
  Val Loss: 5.5281, Val Accuracy: 0.20%
Epoch 5/100:
  Train Loss: 5.1721, Train Accuracy: 1.81%
  Val Loss: 5.5281, Val Accuracy: 0.20%
Epoch 6/100:
  Train Loss: 5.0603, Train Accuracy: 2.41%
  Val Loss: 5.5281, Val Accuracy: 0.20%
Epoch 7/100:
  Train Loss: 5.0432, Train Accuracy: 2.59%
  Val Loss: 5.5281, Val Accuracy: 0.20%
Epoch 8/100:
  Train Loss: 5.0114, Train Accuracy: 2.77%
  Val Loss: 5.5281, Val Accuracy: 0.20%
Epoch 9/100:
  Train Loss: 4.9695, Train Accuracy: 2.37%
  Val Loss: 5.5281, Val Accuracy: 0.20%
Epoch 10/100:
  Train Loss: 4.9731, Train Accuracy: 2.55%
  Val Loss: 5.5281