In [1]:
import os
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, Subset
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, roc_curve, auc, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.metrics import precision_recall_curve, average_precision_score
from torch.optim.lr_scheduler import StepLR, OneCycleLR
from torch.utils.tensorboard import SummaryWriter
import shutil
import pandas as pd
from tqdm import tqdm

from training import *
from sklearn.model_selection import GridSearchCV

In [2]:
import training

In [None]:
torch.manual_seed(42)

In [4]:
# Paths
ROOT_DIR = Path.cwd().parent
DATA_DIR = os.path.join(ROOT_DIR, 'dataset')
PREPROCESSED_DIR = os.path.join(DATA_DIR, 'preprocessed')
CSV_PATH = os.path.join(DATA_DIR, 'csv_mappings', 'train.csv')

MODEL_DIR = os.path.join(ROOT_DIR, 'models')
BASELINE_DIR = os.path.join(MODEL_DIR, 'baselines')
BASELINE_FINETUNED_DIR = os.path.join(MODEL_DIR, 'baselines_finetuned')
RESULT_DIR = os.path.join(BASELINE_DIR, 'results')


In [5]:
CLASS_NAMES = {
    0: "amanita",
    1: "boletus",
    2: "chantelle",
    3: "deterrimus",
    4: "rufus",
    5: "torminosus",
    6: "aurantiacum",
    7: "procera",
    8: "involutus",
    9: "russula"
}

In [6]:
BATCH_SIZE = 32
NUM_CLASSES = 10
EPOCHS = 20
PATIENCE = 3
SCHEDULER = 'StepLR'
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [7]:
train_loader, val_loader, test_loader = training.get_data_loaders(PREPROCESSED_DIR, CSV_PATH, BATCH_SIZE)

In [8]:
param_grid = {
    'batch_size': [16, 32, 64],
    'learning_rate': [0.0001, 0.001, 0.01],
    'dropout_prob': [0.3, 0.5, 0.7],
    'fc_layer_size': [1024, 2048, 4096],
    'scheduler': ['StepLR', 'OneCycleLR', 'None']
}


In [9]:
def grid_search_training(batch_size, learning_rate, dropout_prob, fc_layer_size, scheduler_type):
    train_loader, val_loader, test_loader = get_data_loaders(PREPROCESSED_DIR, CSV_PATH, batch_size)
    
    model_type = 'custom_resnet'  
    
    model, optimizer, criterion, scheduler, save_path, writer = set_model_for_training(
        model_type, train_loader, EPOCHS, learning_rate, NUM_CLASSES, DEVICE, scheduler_type, finetuned=True, write=True
    )
    
    model = train_and_validate(
        model, train_loader, val_loader, criterion, optimizer, epochs=EPOCHS, device=DEVICE, 
        writer=writer, scheduler=scheduler, patience=PATIENCE, save_path=os.path.join(save_path, f'{model_type}.pth')
    )
    
    return model, criterion


In [10]:
from sklearn.model_selection import ParameterGrid

param_grid = {
    'batch_size': [16, 32],
    'learning_rate': [0.0001, 0.001],
    'dropout_prob': [0.3, 0.5],
    'fc_layer_size': [1024, 2048],
    'scheduler': ['StepLR', 'OneCycleLR']
}

In [None]:
best_accuracy = 0
best_params = None

for params in ParameterGrid(param_grid):
    print(f"Training with params: {params}")
    
    model, criterion = grid_search_training(
        batch_size=params['batch_size'],
        learning_rate=params['learning_rate'],
        dropout_prob=params['dropout_prob'],
        fc_layer_size=params['fc_layer_size'],
        scheduler_type=params['scheduler']
    )
    
    val_loss, val_accuracy, *_ = evaluate_model(model, val_loader, criterion, DEVICE)
    
    # Track best accuracy
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        best_params = params

print(f"Best Accuracy: {best_accuracy}%")
print(f"Best Hyperparameters: {best_params}")


In [None]:
def evaluate_and_plot(model, test_loader, criterion, device):
    test_loss, test_accuracy, all_labels, all_predictions, all_pred_probs = evaluate_model(
        model, test_loader, criterion, device
    )

    print(f"Test Accuracy: {test_accuracy}%")
    plot_confusion_matrix(all_labels, all_predictions, NUM_CLASSES)
    plot_roc_curve(all_labels, all_pred_probs, NUM_CLASSES)
    per_class_accuracy(all_labels, all_predictions, NUM_CLASSES, CLASS_NAMES)
    plot_precision_recall_curve(all_labels, all_pred_probs, NUM_CLASSES)

best_model = load_model_for_explaining(
    'densenet', NUM_CLASSES, DEVICE, finetuned=False
)  

evaluate_and_plot(best_model, test_loader, criterion, DEVICE)
