In [2]:
 #?  change which dataset to take from to train
import pandas as pd
training_set = pd.read_csv('./DATA_0/folds/train_fold_0.csv')
cv_set = pd.read_csv('./DATA_0/folds/test_fold_0.csv')
holdout_set = pd.read_csv('./DATA_0/holdout_set/holdout_data_OHE.csv')
print(training_set.shape)
print(cv_set.shape)
print(holdout_set.shape)

train_x = training_set.drop(columns = 'DR')
train_y = training_set[['DR']]
test_x = cv_set.drop(columns = 'DR')
test_y = cv_set[['DR']]       
        
kFolds = train_x, test_x, train_y, test_y
for i in kFolds:
    print(i.shape)
    
    
#! need to change the directories below because the actual call is below

(4272, 29)
(1149, 29)
(638, 29)
(4272, 28)
(1149, 28)
(4272, 1)
(1149, 1)


In [10]:
import torch
import torch.nn as nn
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, roc_auc_score
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using", device)
def train_and_evaluate(model, criterion, optimiser, scheduler, train_loader, val_loader, epochs=20, patience=5, device=device, threshold = 0.5):
    # if isinstance(model.last_layer(), nn.Sigmoid) and isinstance(criterion, nn.BCEWithLogitsLoss):
    #     raise ValueError("Model output is Sigmoid but criterion is BCEWithLogitsLoss. Please check your model and criterion compatibility.")
    best_val_loss = float('inf')
    best_model_state = None
    wait = 0
    n_count = 0
    criterion.to(device) #? Move criterion to device
    #* Epoch Training loop for this fold
    for epoch in range(1,epochs+1):
        #* Set model to training mode: essential for dropout and batch norm layers
        model.train()
        running_loss = 0.0 #? loss for this epoch
        #* Mini-batch training loop
        for batch, (inputs, labels) in enumerate(train_loader,start=1):
            optimiser.zero_grad() #? Zero the gradients
            n_count += inputs.size(0) #? Count number of samples trained
                                    
            
            torch.set_printoptions(threshold=float('inf'))
            
            assert not torch.isnan(inputs).any(), "Input has NaNs"
            assert not torch.isinf(inputs).any(), "Input has Infs"
            outputs = model(inputs) #? Forward pass through the model
            assert not torch.isnan(outputs).any(), "Model output has NaNs"
            assert not torch.isinf(outputs).any(), "Model output has Infs"
            loss = criterion(outputs, labels) #? Calculate loss
            assert not torch.isnan(loss).any(), "Model loss has NaNs"
            loss.backward() #? Backpropagation
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            running_loss += loss.item()
            optimiser.step() #? Update weights
            scheduler.step()
                
        train_loss = running_loss / len(train_loader)
        # print(f"Epoch: {epoch}, training loss: {train_loss:.4f}")
    
        #* Now we evaluate the model on the validation set, to track training vs validation loss
        model.eval() #? Set model to evaluation mode
        with torch.no_grad(): #? No need to track gradients during evaluation
            val_loss = 0.0    
            for batch, (inputs, labels) in enumerate(val_loader,start=1):#! one pass because val_loader batch size is all, if you want to do it in mini-batches, you MUST change the metric calculations to accept mini-batches
                
                outputs = model(inputs)
                # labels = labels.cpu() 
                loss = criterion(outputs, labels)
                val_loss += loss.item() #? Calculate loss
            avg_val_loss = val_loss / len(val_loader)
        loss_ratio = val_loss / train_loss    
        pos_weight = loss_ratio  # or any other function of loss_ratio you choose
    
        # Update criterion with new pos_weight
        criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight]).to(device))
        # Early stopping
        if avg_val_loss < best_val_loss and epoch > 10000:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict()
            wait = 0
        elif avg_val_loss*0.95 <= best_val_loss:
                wait = 0
        else:
            wait += 1
        if wait >= patience:
            print(f"Early stopping triggered at epoch {epoch}, best val loss: {best_val_loss:.4f}")
            break
        print(f"Epoch: {epoch}".ljust(12), 
              f"training loss:{train_loss:.3f}".ljust(16), 
              f"best_val_loss:{best_val_loss:.3f}".ljust(12), 
              f"Val Loss: {avg_val_loss:.3f}",
              f"Scheduler lr: {scheduler.get_last_lr()}".ljust(50), 
              f"N samples trained: {n_count}",
              end="\r")
    #* Use best model to calculate metrics on the validation set
    #! must be outside epoch loop, it comes after the training and cv loop
    model.load_state_dict(best_model_state) #? Load the best model state
    with torch.no_grad():
        for batch, (inputs, labels) in enumerate(val_loader,start=1):#! one pass because val_loader batch size is all, if you want to do it in mini-batches, you MUST change the metric calculations to accept mini-batches
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                labels = labels.cpu() 
                # predictions = (torch.sigmoid(outputs) < 0.5).float().cpu().numpy()
                predictions = (torch.sigmoid(outputs) >= threshold).float().cpu().numpy()
                
                val_loss += loss.item() #? Calculate loss
                
    #! The following should have length equal to fold number           
    accuracy=accuracy_score(labels, predictions) 
    precision=precision_score(labels, predictions, pos_label=1, zero_division=0)
    recall=recall_score(labels, predictions, pos_label=1)
    f1=f1_score(labels, predictions, pos_label=1)
    auc=roc_auc_score(labels, predictions)
    
    return model, accuracy, precision, recall, f1, auc

Using cuda


In [11]:
import torch.nn as nn

class Ivan_NN(nn.Module):
    def __init__(self, input_dim,):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_dim,512),
            nn.LeakyReLU(),
            # nn.Dropout(0.2),
            nn.Linear(512,1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(0.2),
            # nn.Dropout(0.2),
            nn.Linear(1024,512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
            nn.Linear(512,256),
            nn.LeakyReLU(),
            nn.Linear(256,128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Linear(128,1),
            # nn.LeakyReLU(),
            # nn.Dropout(0.2),
            # nn.Linear(256,128),
            # nn.LeakyReLU(),
            # nn.Linear(128,64),
            # nn.LeakyReLU(),
            # nn.Linear(64,32),
            # nn.LeakyReLU(0.2),
            # nn.Linear(32,1),
        )
    
    def forward(self, x):
        return self.layers(x)

print(Ivan_NN(28))


Ivan_NN(
  (layers): Sequential(
    (0): Linear(in_features=28, out_features=512, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
    (2): Linear(in_features=512, out_features=1024, bias=True)
    (3): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): LeakyReLU(negative_slope=0.2)
    (5): Linear(in_features=1024, out_features=512, bias=True)
    (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): LeakyReLU(negative_slope=0.2)
    (8): Linear(in_features=512, out_features=256, bias=True)
    (9): LeakyReLU(negative_slope=0.01)
    (10): Linear(in_features=256, out_features=128, bias=True)
    (11): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): LeakyReLU(negative_slope=0.01)
    (13): Linear(in_features=128, out_features=1, bias=True)
  )
)


In [12]:
from Training_Helper_Functions import *


#! change imports
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import RobustScaler
import numpy as np
import optuna
from torch import optim
def maximise_combined_score(trial):
    if True:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print("Using device:", device)
        epochs = 10000
        random_state = 42
        # Scaler
        scaler = RobustScaler() # Or StandardScaler() - You can also make this a hyperparameter if you want
        threshold = trial.suggest_float("threshold", 0.5,0.5)
        # dropout = None
        initial_lr = trial.suggest_float("initial_lr", 1e-6, 1e-6, log=True)
        max_lr = trial.suggest_float("max_lr", 5e-6, 5e-6, log=True)
        weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-6, log=True)
        # Loss function hyperparameters
        criterion_choice = "BCEWithLogitsLoss" #trial.suggest_categorical("criterion", ["BCEWithLogitsLoss", "FocalLoss"])
        # Hyperparameter exploration optimization
        if criterion_choice == "BCEWithLogitsLoss":
            pos_weight = trial.suggest_int("pos_weight", 2, 2)
            alpha = None
            gamma = None
        elif criterion_choice == "FocalLoss":
            pos_weight = None
            alpha = trial.suggest_float("alpha", 0.25, 0.75)
            gamma = trial.suggest_float("gamma", 1.0, 5.0)
        else:
            pos_weight = None

        # Initialize lists for metrics across folds
        accuracy_list = []
        precision_list = []
        recall_list = []
        f1_list = []
        auc_list = []
    if True:
        training_set = pd.read_csv('./DATA_0/folds/train_fold_0.csv')
        cv_set = pd.read_csv('./DATA_0/folds/test_fold_0.csv')
        holdout_set = pd.read_csv('./DATA_0/holdout_set/holdout_data_OHE.csv')
        # print(training_set.shape)
        # print(cv_set.shape)
        # print(holdout_set.shape)

        train_x = training_set.drop(columns = 'DR')
        train_y = training_set[['DR']]
        test_x = cv_set.drop(columns = 'DR')
        test_y = cv_set[['DR']] 
    # Cross-validation loop
    # for fold, (train_x, test_x, train_y, test_y) in enumerate(kFolds, start=1):
    train_x, test_x, train_y, test_y
    fold = 1
    print(f"Fold {fold}:")
    # Create DataLoader for current fold
    train_loader, val_loader = fold_to_dataloader_tensor(train_x, test_x, train_y, test_y, batch_size=256,
                                                        device=device)
    # Instantiate and initialize the model
    model = Ivan_NN(input_dim=get_feature_count(train_loader))
    model.to(device)
    model.apply(init_weights)

    # Map the choice to the actual loss function
    criterion = criterion_mapping(criterion_choice, pos_weight, alpha, gamma)
    optimiser = optim.Adam(model.parameters(), lr=initial_lr, weight_decay=weight_decay)
    scheduler = torch.optim.lr_scheduler.CyclicLR(
        optimiser,
        base_lr=1e-6,
        max_lr=max_lr,
        cycle_momentum=True)

    # Train and evaluate the model on the current fold
    model, accuracy, precision, recall, f1, auc = train_and_evaluate(
        model, criterion, optimiser, scheduler, train_loader, val_loader, epochs=epochs, patience=100,
        device=device, threshold=threshold
    )
    print(f"Accuracy: {accuracy:.4f}, precision: {precision:.4f}, recall: {recall:.4f}, f1: {f1:.4f}, auc: {auc:.4f}")
    del model
    del train_loader
    del val_loader

    # Append the metrics from the current fold
    accuracy_list.append(accuracy)
    precision_list.append(precision)
    recall_list.append(recall)
    f1_list.append(f1)
    auc_list.append(auc)

    # Calculate the average metrics across all folds
    avg_accuracy = np.sum(accuracy_list) / len(accuracy_list)
    avg_precision = np.sum(precision_list) / len(precision_list)
    avg_recall = np.sum(recall_list) / len(recall_list)
    avg_f1 = np.sum(f1_list) / len(f1_list)
    avg_auc = np.sum(auc_list) / len(auc_list)

    # Combine metrics into a single "score"
    # combined_score = (avg_f1 + avg_precision + avg_recall + avg_accuracy + avg_auc) / 5
    combined_score = avg_f1

    return combined_score

In [13]:
import threading
import optuna
from optuna_dashboard import run_server

def start_dashboard():
    run_server(storage)

storage = optuna.storages.InMemoryStorage()
study = optuna.create_study(direction="maximize", storage=storage, study_name="Basic")

# Start dashboard in a separate thread
dashboard_thread = threading.Thread(target=start_dashboard, daemon=True)
dashboard_thread.start()

# Run optimization
study.optimize(maximise_combined_score, n_trials=30)

# After optimization, print results
print("Best trial:")
trial = study.best_trial
print(f"  Combined score: {trial.value}")
print("  Best hyperparameters:")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


[I 2025-04-21 21:45:56,635] A new study created in memory with name: Basic
Bottle v0.13.2 server starting up (using WSGIRefServer())...
Listening on http://localhost:8080/
Hit Ctrl-C to quit.



Using device: cuda
Fold 1:
Epoch: 2353  training loss:0.350 best_val_loss:inf Val Loss: 1303936.000 Scheduler lr: [4.296000000000007e-06]              N samples trained: 9637888

[W 2025-04-21 21:48:33,545] Trial 0 failed with parameters: {'threshold': 0.5, 'initial_lr': 1e-06, 'max_lr': 5e-06, 'weight_decay': 1e-06, 'pos_weight': 2} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "d:\GitHub repos\ADL2\.venv\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\tanle\AppData\Local\Temp\ipykernel_16268\944311138.py", line 78, in maximise_combined_score
    model, accuracy, precision, recall, f1, auc = train_and_evaluate(
                                                  ^^^^^^^^^^^^^^^^^^^
  File "C:\Users\tanle\AppData\Local\Temp\ipykernel_16268\2483633839.py", line 30, in train_and_evaluate
    assert not torch.isnan(outputs).any(), "Model output has NaNs"
               ^^^^^^^^^^^^^^^^^^^^^^^^^^
KeyboardInterrupt
[W 2025-04-21 21:48:33,546] Trial 0 failed with value None.


Epoch: 2354  training loss:0.317 best_val_loss:inf Val Loss: 1360396.250 Scheduler lr: [4.328000000000003e-06]              N samples trained: 9641984

KeyboardInterrupt: 