In [1]:
#  #?  change which dataset to take from to train
# import pandas as pd
# training_set = pd.read_csv('./DATA_OVERFIT/folds/train_fold_0.csv')
# cv_set = pd.read_csv('./DATA_OVERFIT/folds/test_fold_0.csv')
# holdout_set = pd.read_csv('./DATA_0/holdout_set/holdout_data_OHE.csv')
# print(training_set.shape)   
# print(cv_set.shape)
# print(holdout_set.shape)

# train_x = training_set.drop(columns = 'DR')
# train_y = training_set[['DR']]
# test_x = cv_set.drop(columns = 'DR')
# test_y = cv_set[['DR']]       
        
# kFolds = train_x, test_x, train_y, test_y
# for i in kFolds:
#     print(i.shape)
    
    
# #! need to change the directories below because the actual call is below

In [2]:
import torch
import torch.nn as nn
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, roc_auc_score
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using", device)
def train_and_evaluate(model, criterion, optimiser, scheduler, train_loader, val_loader, epochs=20, patience=5, device=device, threshold = 0.5):
    # if isinstance(model.last_layer(), nn.Sigmoid) and isinstance(criterion, nn.BCEWithLogitsLoss):
    #     raise ValueError("Model output is Sigmoid but criterion is BCEWithLogitsLoss. Please check your model and criterion compatibility.")
    best_val_loss = float('inf')
    best_model_state = None
    wait = 0
    n_count = 0
    criterion.to(device) #? Move criterion to device
    #* Epoch Training loop for this fold
    for epoch in range(1,epochs+1):
        #* Set model to training mode: essential for dropout and batch norm layers
        model.train()
        running_loss = 0.0 #? loss for this epoch
        #* Mini-batch training loop
        for batch, (inputs, labels) in enumerate(train_loader,start=1):
            optimiser.zero_grad() #? Zero the gradients
            n_count += inputs.size(0) #? Count number of samples trained
                                    
            # print(all(param.requires_grad for param in model.parameters()))
   
            torch.set_printoptions(threshold=float('inf'))
            
            assert not torch.isnan(inputs).any(), "Input has NaNs"
            assert not torch.isinf(inputs).any(), "Input has Infs"
            outputs = model(inputs) #? Forward pass through the model
            assert not torch.isnan(outputs).any(), "Model output has NaNs"
            assert not torch.isinf(outputs).any(), "Model output has Infs"
            loss = criterion(outputs, labels) #? Calculate loss
            assert not torch.isnan(loss).any(), "Model loss has NaNs"
            loss.backward() #? Backpropagation
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            running_loss += loss.item()
            optimiser.step() #? Update weights
            # scheduler.step()
                
        train_loss = running_loss / len(train_loader)
        # print(f"Epoch: {epoch}, training loss: {train_loss:.4f}")
    
        #* Now we evaluate the model on the validation set, to track training vs validation loss
        model.eval() #? Set model to evaluation mode
        with torch.no_grad(): #? No need to track gradients during evaluation
            val_loss = 0.0    
            for batch, (inputs, labels) in enumerate(val_loader,start=1):#! one pass because val_loader batch size is all, if you want to do it in mini-batches, you MUST change the metric calculations to accept mini-batches
                
                outputs = model(inputs)
                # labels = labels.cpu() 
                loss = criterion(outputs, labels)
                val_loss += loss.item() #? Calculate loss
            avg_val_loss = val_loss / len(val_loader)
            loss_ratio = val_loss / train_loss    
            pos_weight = loss_ratio  # or any other function of loss_ratio you choose
            # scheduler.step(val_loss)  # Use validation loss here, not training loss
            scheduler.step()

        # Update criterion with new pos_weight
        # criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight]).to(device))
        # Early stopping
        if avg_val_loss < best_val_loss and epoch > 100:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict()
            wait = 0
        elif avg_val_loss*0.96 <= best_val_loss:
                wait = 0
        else:
            wait += 1
        if wait >= patience:
            print(f"Early stopping triggered at epoch {epoch}, best val loss: {best_val_loss:.4f}")
            break
        print(f"Epoch: {epoch}".ljust(12), 
              f"training loss:{train_loss:.3f}".ljust(16), 
              f"best_val_loss:{best_val_loss:.3f}".ljust(12), 
              f"Val Loss: {avg_val_loss:.3f}",
              f"Scheduler lr: {scheduler.get_last_lr()}".ljust(50), 
              f"N samples trained: {n_count}",
              end="\r")
    #* Use best model to calculate metrics on the validation set
    #! must be outside epoch loop, it comes after the training and cv loop
    model.load_state_dict(best_model_state) #? Load the best model state
    with torch.no_grad():
        for batch, (inputs, labels) in enumerate(val_loader,start=1):#! one pass because val_loader batch size is all, if you want to do it in mini-batches, you MUST change the metric calculations to accept mini-batches
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                labels = labels.cpu() 
                # predictions = (torch.sigmoid(outputs) < 0.5).float().cpu().numpy()
                predictions = (torch.sigmoid(outputs) >= threshold).float().cpu().numpy()
                
                val_loss += loss.item() #? Calculate loss
                
    #! The following should have length equal to fold number           
    accuracy=accuracy_score(labels, predictions) 
    precision=precision_score(labels, predictions, pos_label=1, zero_division=0)
    recall=recall_score(labels, predictions, pos_label=1)
    f1=f1_score(labels, predictions, pos_label=1)
    auc=roc_auc_score(labels, predictions)
    
    return model, accuracy, precision, recall, f1, auc

Using cuda


In [3]:
# import torch.nn as nn

# class Ivan_NN(nn.Module):
#     def __init__(self, input_dim,):
#         super().__init__()
#         self.layers = nn.Sequential(
#             nn.Linear(input_dim,256),
#             nn.LeakyReLU(),
#             nn.BatchNorm1d(256),
#             nn.Linear(256,2048),
#             nn.BatchNorm1d(2048),
#             nn.LeakyReLU(),
#             # nn.Dropout(0.2),
#             nn.Linear(2048,712),
#             nn.BatchNorm1d(712),
#             nn.LeakyReLU(),
#             # # nn.Dropout(0.2),
#             nn.Linear(712,360),
#             nn.BatchNorm1d(360),
#             nn.LeakyReLU(0.2),
#             nn.Linear(360,512),
#             nn.BatchNorm1d(512),
#             nn.LeakyReLU(),
#             nn.Linear(512,1024),
#             nn.BatchNorm1d(1024),
#             nn.LeakyReLU(),
#             nn.Linear(1024,512),
#             nn.LeakyReLU(),
#             # nn.LeakyReLU(),
#             # nn.Dropout(0.2),
#             nn.Linear(512,324),
#             nn.BatchNorm1d(324),
#             nn.LeakyReLU(),
#             nn.Linear(324,64),
#             nn.LeakyReLU(),
#             nn.Linear(64,1),
#             # nn.LeakyReLU(0.2),
#             # nn.Linear(32,1),
#         )
    
#     def forward(self, x):
#         # print(x.shape)
#         return self.layers(x)

# print(Ivan_NN(28))


In [4]:
import torch.nn as nn
import torch

class Ivan_NN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.input_fc = nn.Linear(input_dim, 2180)
        self.input_bn = nn.BatchNorm1d(2180)
    
        self.drop = nn.Dropout(0.3)

        self.block1 = nn.Sequential(
            nn.Linear(2180, 888),
            nn.BatchNorm1d(888),
            nn.LeakyReLU(),
            nn.Dropout(0.3)
        )

        self.block2 = nn.Sequential(
            nn.Linear(888, 1122),
            nn.BatchNorm1d(1122),
            nn.LeakyReLU(),
            nn.Dropout(0.3)
        )

        self.block3 = nn.Sequential(
            nn.Linear(1122, 624),
            nn.BatchNorm1d(624),
            nn.LeakyReLU(),
            nn.Dropout(0.3)
        )

        self.block4 = nn.Sequential(
            nn.Linear(624, 1080),
            nn.BatchNorm1d(1080),
            nn.LeakyReLU(),
            nn.Dropout(0.3)
        )

        self.block5 = nn.Sequential(
            nn.Linear(1080, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(),
            nn.Dropout(0.3)
        )

        self.block6 = nn.Sequential(
            nn.Linear(512, 128),
            nn.BatchNorm1d(128),  # ← Fixed this
            nn.LeakyReLU(),
            nn.Dropout(0.3)
        )

        self.block7 = nn.Sequential(
            nn.Linear(128, 36),
            nn.BatchNorm1d(36),
            nn.LeakyReLU(),
            nn.Dropout(0.2)
        )

        self.block8 = nn.Sequential(
            nn.Linear(36, 64),
            nn.BatchNorm1d(64),
            nn.Sigmoid(),  # or LeakyReLU, up to you
            nn.Dropout(0.1)
        )

        self.output = nn.Linear(64, 1)

        # Skip connection projectors
        self.skip1_proj = nn.Sequential(nn.Linear(2180, 1122))
        self.skip2_proj = nn.Sequential(nn.Linear(1122, 128))
        self.skip3_proj = nn.Sequential(nn.Linear(128, 64))

    def forward(self, x):
        x = self.input_fc(x)
        x = self.input_bn(x)
        # x = torch.nn.functional.leaky_relu(x)
        x = self.drop(x)

        x1 = self.block1(x)
        x2 = self.block2(x1)

        skip1 = self.skip1_proj(x)
        x2 = x2 + skip1

        x3 = self.block3(x2)
        x4 = self.block4(x3)
        x5 = self.block5(x4)

        x6 = self.block6(x5)

        skip2 = self.skip2_proj(x2)
        x6 = x6 + skip2

        x7 = self.block7(x6)
        x8 = self.block8(x7)

        skip3 = self.skip3_proj(x6)
        x8 = x8 + skip3

        out = self.output(x8)
        return out


In [8]:
import torch.nn as nn

class Ivan_NN(nn.Module):
    def __init__(self, input_dim,):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim,64),
            nn.LeakyReLU(),
            nn.BatchNorm1d(64),
            nn.Linear(64,128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            # nn.Dropout(0.2),
            nn.Linear(128,64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            # # nn.Dropout(0.2),
            nn.Linear(64,80),
            nn.BatchNorm1d(80),
            nn.LeakyReLU(0.2),
            # nn.Linear(360,512),
            # nn.BatchNorm1d(512),
            # nn.LeakyReLU(),
            # nn.Linear(512,1024),
            # nn.BatchNorm1d(1024),
            # nn.LeakyReLU(),
            # nn.Linear(1024,512),
            # nn.LeakyReLU(),
            # nn.LeakyReLU(),
            # nn.Dropout(0.2),
            # nn.Linear(512,324),
            # nn.BatchNorm1d(324),
            # nn.LeakyReLU(),
            # nn.Linear(324,64),
            # nn.LeakyReLU(),
            nn.Linear(80,1),
            # nn.LeakyReLU(0.2),
            # nn.Linear(32,1),
        )
    
    def forward(self, x):
        # print(x.shape)
        return self.layers(x)

print(Ivan_NN(28))


Ivan_NN(
  (layers): Sequential(
    (0): Linear(in_features=28, out_features=64, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): Linear(in_features=64, out_features=128, bias=True)
    (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): LeakyReLU(negative_slope=0.01)
    (6): Linear(in_features=128, out_features=64, bias=True)
    (7): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): LeakyReLU(negative_slope=0.01)
    (9): Linear(in_features=64, out_features=80, bias=True)
    (10): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): LeakyReLU(negative_slope=0.2)
    (12): Linear(in_features=80, out_features=1, bias=True)
  )
)


In [9]:
from Training_Helper_Functions import *


#! change imports
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import RobustScaler
import numpy as np
import optuna
from torch import optim
def maximise_combined_score(trial):
    if True:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print("Using device:", device)
        epochs = 800
        random_state = 42
        # Scaler
        scaler = RobustScaler() # Or StandardScaler() - You can also make this a hyperparameter if you want
        threshold = trial.suggest_float("threshold", 0.28,0.6,)
        # dropout = None
        initial_lr = trial.suggest_float("initial_lr", 1e-3, 1e-3 ,log=True)
        max_lr = trial.suggest_float("max_lr", 1e-3, 1e-3, log=True)
        weight_decay = trial.suggest_float("weight_decay", 1e-5, 6e-1, log=True)
        # Loss function hyperparameters
        criterion_choice = "BCEWithLogitsLoss" #trial.suggest_categorical("criterion", ["BCEWithLogitsLoss", "FocalLoss"])
        # Hyperparameter exploration optimization
        if criterion_choice == "BCEWithLogitsLoss":
            pos_weight = trial.suggest_int("pos_weight",1, 1)
            alpha = None
            gamma = None
        elif criterion_choice == "FocalLoss":
            pos_weight = None
            alpha = trial.suggest_float("alpha", 0.25, 0.75)
            gamma = trial.suggest_float("gamma", 1.0, 5.0)
        else:
            pos_weight = None

        # Initialize lists for metrics across folds
        accuracy_list = []
        precision_list = []
        recall_list = []
        f1_list = []
        auc_list = []
    if True:
        training_set = pd.read_csv('./DATA/folds/train_fold_0.csv')
        cv_set = pd.read_csv('./DATA/folds/test_fold_0.csv')
        holdout_set = pd.read_csv('./DATA/holdout_set/holdout_data_OHE.csv')
        # print(training_set.shape)
        # print(cv_set.shape)
        # print(holdout_set.shape)

        train_x = training_set.drop(columns = 'DR')
        train_y = training_set[['DR']]
        test_x = cv_set.drop(columns = 'DR')
        test_y = cv_set[['DR']] 
    # Cross-validation loop
    # for fold, (train_x, test_x, train_y, test_y) in enumerate(kFolds, start=1):
    train_x, test_x, train_y, test_y
    fold = 1
    print(f"Fold {fold}:")
    # Create DataLoader for current fold
    train_loader, val_loader = fold_to_dataloader_tensor(train_x, test_x, train_y, test_y, batch_size=512,
                                                        device=device)
    # Instantiate and initialize the model
    model = Ivan_NN(input_dim=get_feature_count(train_loader))
    model.to(device)
    model.apply(init_weights)

    # Map the choice to the actual loss function
    criterion = criterion_mapping(criterion_choice, pos_weight, alpha, gamma)
    optimiser = optim.Adam(model.parameters(), lr=initial_lr, weight_decay=weight_decay) 
    scheduler = torch.optim.lr_scheduler.CyclicLR(
        optimiser,
        base_lr=1e-6,
        max_lr=max_lr,
        cycle_momentum=True)
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimiser, mode='min', patience=3, factor=0.5)

    # scheduler = torch.optim.lr_scheduler.StepLR(optimiser, step_size=10, gamma=0.1)

    # Train and evaluate the model on the current fold
    model, accuracy, precision, recall, f1, auc = train_and_evaluate(
        model, criterion, optimiser, scheduler, train_loader, val_loader, epochs=epochs, patience=40,
        device=device, threshold=threshold
    )
    print(f"Accuracy: {accuracy:.4f}, precision: {precision:.4f}, recall: {recall:.4f}, f1: {f1:.4f}, auc: {auc:.4f}")
    del model
    del train_loader
    del val_loader

    # Append the metrics from the current fold
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)
    auc_list.append(auc)

    # Calculate the average metrics across all folds
    avg_accuracy = np.sum(accuracy_list) / len(accuracy_list)
    avg_precision = np.sum(precision_list) / len(precision_list)
    avg_recall = np.sum(recall_list) / len(recall_list)
    avg_f1 = np.sum(f1_list) / len(f1_list)
    avg_auc = np.sum(auc_list) / len(auc_list)

    # Combine metrics into a single "score"
    # combined_score = (avg_f1 + avg_precision + avg_recall + avg_accuracy + avg_auc) / 5
    combined_score = avg_f1

    return combined_score

In [10]:
import threading
import optuna
from optuna_dashboard import run_server

def start_dashboard():
    run_server(storage)

storage = optuna.storages.InMemoryStorage()
study = optuna.create_study(direction="maximize", storage=storage, study_name="Basic")

# Start dashboard in a separate thread
dashboard_thread = threading.Thread(target=start_dashboard, daemon=True)
dashboard_thread.start()

# Run optimization
study.optimize(maximise_combined_score, n_trials=30)

# After optimization, print results
print("Best trial:")
trial = study.best_trial
print(f"  Combined score: {trial.value}")
print("  Best hyperparameters:")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


[I 2025-04-23 04:50:15,772] A new study created in memory with name: Basic
Bottle v0.13.2 server starting up (using WSGIRefServer())...
Listening on http://localhost:8080/
Hit Ctrl-C to quit.



Using device: cuda
Fold 1:
Early stopping triggered at epoch 145, best val loss: 1.6228s: 2.482 Scheduler lr: [7.292800000000007e-05]              N samples trained: 2580480


[I 2025-04-23 04:50:59,167] Trial 0 finished with value: 0.31210191082802546 and parameters: {'threshold': 0.5907571683338007, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 2.0806914493899644e-05, 'pos_weight': 1}. Best is trial 0 with value: 0.31210191082802546.


Accuracy: 0.8120, precision: 0.2475, recall: 0.4224, f1: 0.3121, auc: 0.6391
Using device: cuda
Fold 1:
Epoch: 140   training loss:0.096 best_val_loss:1.453 Val Loss: 2.062 Scheduler lr: [7.092999999999985e-05]              N samples trained: 2508800

[I 2025-04-23 04:51:44,578] Trial 1 finished with value: 0.3227848101265823 and parameters: {'threshold': 0.597704452522527, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.006300158315291185, 'pos_weight': 1}. Best is trial 1 with value: 0.3227848101265823.


Early stopping triggered at epoch 141, best val loss: 1.4531
Accuracy: 0.8138, precision: 0.2550, recall: 0.4397, f1: 0.3228, auc: 0.6477
Using device: cuda
Fold 1:
Epoch: 143   training loss:0.089 best_val_loss:1.945 Val Loss: 2.759 Scheduler lr: [7.24284999999999e-05]               N samples trained: 2562560

[I 2025-04-23 04:52:23,016] Trial 2 finished with value: 0.2890855457227139 and parameters: {'threshold': 0.5786562412662332, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.0002503660696454788, 'pos_weight': 1}. Best is trial 1 with value: 0.3227848101265823.


Early stopping triggered at epoch 144, best val loss: 1.9448
Accuracy: 0.7903, precision: 0.2197, recall: 0.4224, f1: 0.2891, auc: 0.6270
Using device: cuda
Fold 1:
Epoch: 181   training loss:0.037 best_val_loss:1.246 Val Loss: 1.875 Scheduler lr: [9.140950000000003e-05]              N samples trained: 3243520

[I 2025-04-23 04:53:22,073] Trial 3 finished with value: 0.2988505747126437 and parameters: {'threshold': 0.40420744599967495, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.0036351239449815055, 'pos_weight': 1}. Best is trial 1 with value: 0.3227848101265823.


Early stopping triggered at epoch 182, best val loss: 1.2462
Accuracy: 0.7876, precision: 0.2241, recall: 0.4483, f1: 0.2989, auc: 0.6370
Using device: cuda
Fold 1:
Epoch: 286   training loss:0.389 best_val_loss:0.465 Val Loss: 0.552 Scheduler lr: [0.0001438569999999998]              N samples trained: 5125120

[I 2025-04-23 04:54:44,055] Trial 4 finished with value: 0.31932773109243695 and parameters: {'threshold': 0.46079583482241127, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.28768468779280204, 'pos_weight': 1}. Best is trial 1 with value: 0.3227848101265823.


Early stopping triggered at epoch 287, best val loss: 0.4652
Accuracy: 0.7885, precision: 0.2365, recall: 0.4914, f1: 0.3193, auc: 0.6566
Using device: cuda
Fold 1:
Epoch: 201   training loss:0.054 best_val_loss:1.163 Val Loss: 1.235 Scheduler lr: [0.00010139949999999981]             N samples trained: 3601920

[I 2025-04-23 04:55:42,367] Trial 5 finished with value: 0.32558139534883723 and parameters: {'threshold': 0.4684235082730942, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.03597298737009726, 'pos_weight': 1}. Best is trial 5 with value: 0.32558139534883723.


Early stopping triggered at epoch 202, best val loss: 1.1628
Accuracy: 0.7981, precision: 0.2456, recall: 0.4828, f1: 0.3256, auc: 0.6581
Using device: cuda
Fold 1:
Early stopping triggered at epoch 148, best val loss: 1.4946s: 2.019 Scheduler lr: [7.442650000000013e-05]              N samples trained: 2634240


[I 2025-04-23 04:56:24,386] Trial 6 finished with value: 0.30952380952380953 and parameters: {'threshold': 0.48420096337145707, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 1.6766184049406895e-05, 'pos_weight': 1}. Best is trial 5 with value: 0.32558139534883723.


Accuracy: 0.7981, precision: 0.2364, recall: 0.4483, f1: 0.3095, auc: 0.6428
Using device: cuda
Fold 1:
Epoch: 146   training loss:0.114 best_val_loss:0.778 Val Loss: 0.861 Scheduler lr: [7.392699999999996e-05]              N samples trained: 2616320

[I 2025-04-23 04:57:02,014] Trial 7 finished with value: 0.2857142857142857 and parameters: {'threshold': 0.5531150083527293, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.02935004023693843, 'pos_weight': 1}. Best is trial 5 with value: 0.32558139534883723.


Early stopping triggered at epoch 147, best val loss: 0.7781
Accuracy: 0.7955, precision: 0.2207, recall: 0.4052, f1: 0.2857, auc: 0.6222
Using device: cuda
Fold 1:
Epoch: 151   training loss:0.092 best_val_loss:1.538 Val Loss: 1.857 Scheduler lr: [7.642449999999991e-05]              N samples trained: 2705920

[I 2025-04-23 04:57:46,170] Trial 8 finished with value: 0.30517711171662126 and parameters: {'threshold': 0.39400143178113434, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.010095326107519864, 'pos_weight': 1}. Best is trial 5 with value: 0.32558139534883723.


Early stopping triggered at epoch 152, best val loss: 1.5380
Accuracy: 0.7781, precision: 0.2231, recall: 0.4828, f1: 0.3052, auc: 0.6470
Using device: cuda
Fold 1:
Epoch: 147   training loss:0.082 best_val_loss:1.082 Val Loss: 1.365 Scheduler lr: [7.442650000000013e-05]              N samples trained: 2634240

[I 2025-04-23 04:58:21,723] Trial 9 finished with value: 0.30177514792899407 and parameters: {'threshold': 0.455792286879118, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.00587242911192485, 'pos_weight': 1}. Best is trial 5 with value: 0.32558139534883723.


Early stopping triggered at epoch 148, best val loss: 1.0820
Accuracy: 0.7946, precision: 0.2297, recall: 0.4397, f1: 0.3018, auc: 0.6371
Using device: cuda
Fold 1:
Epoch: 188   training loss:0.257 best_val_loss:0.573 Val Loss: 0.657 Scheduler lr: [9.490599999999987e-05]              N samples trained: 3368960

[I 2025-04-23 04:59:20,236] Trial 10 finished with value: 0.30707070707070705 and parameters: {'threshold': 0.32278347640819455, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.4940717069133068, 'pos_weight': 1}. Best is trial 5 with value: 0.32558139534883723.


Early stopping triggered at epoch 189, best val loss: 0.5731
Accuracy: 0.7015, precision: 0.2005, recall: 0.6552, f1: 0.3071, auc: 0.6809
Using device: cuda
Fold 1:
Early stopping triggered at epoch 310, best val loss: 0.4650s: 0.790 Scheduler lr: [0.0001553455000000001]              N samples trained: 5537280


[I 2025-04-23 05:00:45,664] Trial 11 finished with value: 0.2857142857142857 and parameters: {'threshold': 0.5275996144370525, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.06611190513155595, 'pos_weight': 1}. Best is trial 5 with value: 0.32558139534883723.


Accuracy: 0.8042, precision: 0.2261, recall: 0.3879, f1: 0.2857, auc: 0.6194
Using device: cuda
Fold 1:
Epoch: 178   training loss:0.048 best_val_loss:1.843 Val Loss: 2.154 Scheduler lr: [8.991099999999997e-05]              N samples trained: 3189760

[I 2025-04-23 05:01:33,829] Trial 12 finished with value: 0.31232876712328766 and parameters: {'threshold': 0.3015339927087317, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.0005392550763507689, 'pos_weight': 1}. Best is trial 5 with value: 0.32558139534883723.


Early stopping triggered at epoch 179, best val loss: 1.8427
Accuracy: 0.7815, precision: 0.2289, recall: 0.4914, f1: 0.3123, auc: 0.6528
Using device: cuda
Fold 1:
Epoch: 144   training loss:0.092 best_val_loss:1.686 Val Loss: 2.317 Scheduler lr: [7.292800000000007e-05]              N samples trained: 2580480

[I 2025-04-23 05:06:42,964] Trial 13 finished with value: 0.32294617563739375 and parameters: {'threshold': 0.5156484828990211, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.0008411670512029099, 'pos_weight': 1}. Best is trial 5 with value: 0.32558139534883723.


Early stopping triggered at epoch 145, best val loss: 1.6856
Accuracy: 0.7920, precision: 0.2405, recall: 0.4914, f1: 0.3229, auc: 0.6586
Using device: cuda
Fold 1:
Epoch: 148   training loss:0.085 best_val_loss:1.672 Val Loss: 2.074 Scheduler lr: [7.492599999999984e-05]              N samples trained: 2652160

[I 2025-04-23 05:07:11,327] Trial 14 finished with value: 0.3352941176470588 and parameters: {'threshold': 0.5083474884116073, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.0004696687678473499, 'pos_weight': 1}. Best is trial 14 with value: 0.3352941176470588.


Early stopping triggered at epoch 149, best val loss: 1.6724
Accuracy: 0.8033, precision: 0.2545, recall: 0.4914, f1: 0.3353, auc: 0.6649
Using device: cuda
Fold 1:
Epoch: 117   training loss:0.121 best_val_loss:1.738 Val Loss: 1.926 Scheduler lr: [5.94415e-05]                        N samples trained: 2096640

[W 2025-04-23 05:07:34,980] Trial 15 failed with parameters: {'threshold': 0.4143713380165521, 'initial_lr': 0.001, 'max_lr': 0.001, 'weight_decay': 0.00011541745858379638, 'pos_weight': 1} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "d:\GitHub repos\ADL2\.venv\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\tanle\AppData\Local\Temp\ipykernel_5988\3184159513.py", line 81, in maximise_combined_score
    model, accuracy, precision, recall, f1, auc = train_and_evaluate(
                                                  ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\tanle\AppData\Local\Temp\ipykernel_5988\3861305712.py", line 30, in train_and_evaluate
    outputs = model(inputs) #? Forward pass through the model
              ^^^^^^^^^^^^^
  File "d:\GitHub repos\ADL2\.venv\Lib\site-packages\torch\nn\modules\module.py", line 1735, in _wrapped_c

Epoch: 118   training loss:0.118 best_val_loss:1.738 Val Loss: 1.964 Scheduler lr: [5.9941000000000164e-05]             N samples trained: 2114560

KeyboardInterrupt: 