In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
random_state = 42 
raw_dataset = pd.read_csv("./original_dataset/processed_data_encoded.csv") #data has X and Y, community 0-9
X = raw_dataset.drop(columns=["BMI", "TCTG", "DR"])
Y = pd.DataFrame(raw_dataset["DR"])
# Slice your data


X_FOR_FOLDS, X_FINAL_TEST, Y_FOR_FOLDS, Y_FINAL_TEST = train_test_split(X, Y, test_size=0.1, random_state=random_state, stratify=Y)
df = pd.concat([X_FOR_FOLDS, Y_FOR_FOLDS], axis=1)
df.reset_index(drop=True, inplace=True)

In [5]:
import torch
import torch.nn as nn
class FeedForwardBlock_ELU(nn.Module):
    def __init__(self, in_features, out_features, dropout=None, activation=nn.LeakyReLU):
        super().__init__()
        layers = [
            nn.Linear(in_features, out_features),
            # nn.BatchNorm1d(out_features),
            activation()
        ]
        if dropout and dropout > 0:
            layers.append(nn.Dropout(dropout))
        self.block = nn.Sequential(*layers)

    def forward(self, x):
        return self.block(x)
    
class FeedForwardBlock_TanH(nn.Module):
    def __init__(self, in_features, out_features, dropout=None, activation=nn.Tanh):
        super().__init__()
        layers = [
            nn.Linear(in_features, out_features),
            # nn.BatchNorm1d(out_features),
            activation()
        ]
        if dropout and dropout > 0:
            layers.append(nn.Dropout(dropout))
        self.block = nn.Sequential(*layers)

    def forward(self, x):
        return self.block(x)
class FeedForwardBlock_ReLU(nn.Module):
    def __init__(self, in_features, out_features, dropout=None, activation=nn.ReLU):
        super().__init__()
        layers = [
            nn.Linear(in_features, out_features),
            nn.BatchNorm1d(out_features),
            activation()
        ]
        if dropout and dropout > 0:
            layers.append(nn.Dropout(dropout))
        self.block = nn.Sequential(*layers)

    def forward(self, x):
        return self.block(x)
class MyModelWithSkip(nn.Module):
    def __init__(self, input_dim, hidden_dim, hidden_dim2, output_dim, hidden_dim3 = None, dropout=None):
        super().__init__()

        self.block1 = FeedForwardBlock_ELU(input_dim, hidden_dim, dropout=dropout)
        self.block2 = FeedForwardBlock_ReLU(hidden_dim, hidden_dim2, dropout=dropout / 2)
        self.block3 = FeedForwardBlock_ELU(hidden_dim2, hidden_dim3, dropout=dropout / 2)
        self.block4 = FeedForwardBlock_ReLU(hidden_dim3, output_dim, dropout=dropout / 2) if hidden_dim3 else None
        self.output_layer = nn.Linear(output_dim, 1)



        self.skip13 = nn.Identity() if hidden_dim == hidden_dim2 else nn.Linear(hidden_dim, hidden_dim2)

        self.skip24 = nn.Identity() if hidden_dim2 == hidden_dim3 else nn.Linear(hidden_dim2, hidden_dim3)
        self.skip35 = nn.Identity() if hidden_dim3 == output_dim else nn.Linear(hidden_dim3, output_dim)


    def forward(self, x):
        x1 = self.block1(x)
        x2 = self.block2(x1)
        x3 = self.block3(x2 + self.skip13(x1))  # now this works
        x4 = self.block4(x3 + self.skip24(x2))
        out = self.output_layer(x4+self.skip35(x3))
        return out
    
test_model = MyModelWithSkip(
    input_dim=28,
    hidden_dim=64,
    hidden_dim2=128,
    hidden_dim3=64,
    output_dim=32,
    dropout=0.0
)
print(test_model)


MyModelWithSkip(
  (block1): FeedForwardBlock_ELU(
    (block): Sequential(
      (0): Linear(in_features=28, out_features=64, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
    )
  )
  (block2): FeedForwardBlock_ReLU(
    (block): Sequential(
      (0): Linear(in_features=64, out_features=128, bias=True)
      (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (block3): FeedForwardBlock_ELU(
    (block): Sequential(
      (0): Linear(in_features=128, out_features=64, bias=True)
      (1): LeakyReLU(negative_slope=0.01)
    )
  )
  (block4): FeedForwardBlock_ReLU(
    (block): Sequential(
      (0): Linear(in_features=64, out_features=32, bias=True)
      (1): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
    )
  )
  (output_layer): Linear(in_features=32, out_features=1, bias=True)
  (skip13): Linear(in_features=64, out_features=128, bias=True)
  (skip24): Linear(i

In [6]:

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, roc_auc_score
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using", device)
def train_and_evaluate(model, criterion, optimiser, scheduler, train_loader, val_loader, epochs=20, patience=5, device=device, threshold = 0.5):
    # if isinstance(model.last_layer(), nn.Sigmoid) and isinstance(criterion, nn.BCEWithLogitsLoss):
    #     raise ValueError("Model output is Sigmoid but criterion is BCEWithLogitsLoss. Please check your model and criterion compatibility.")
    best_val_loss = float('inf')
    best_model_state = None
    wait = 0
    criterion.to(device) #? Move criterion to device
    #* Epoch Training loop for this fold
    for epoch in range(1,epochs+1):
        #* Set model to training mode: essential for dropout and batch norm layers
        model.train()
        running_loss = 0.0 #? loss for this epoch
        #* Mini-batch training loop
        for batch, (inputs, labels) in enumerate(train_loader,start=1):
            optimiser.zero_grad() #? Zero the gradients
            
            
            torch.set_printoptions(threshold=float('inf'))
            
            assert not torch.isnan(inputs).any(), "Input has NaNs"
            assert not torch.isinf(inputs).any(), "Input has Infs"
            outputs = model(inputs) #? Forward pass through the model
            assert not torch.isnan(outputs).any(), "Model output has NaNs"
            assert not torch.isinf(outputs).any(), "Model output has Infs"
            loss = criterion(outputs, labels) #? Calculate loss
            assert not torch.isnan(loss).any(), "Model loss has NaNs"
            loss.backward() #? Backpropagation
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            running_loss += loss.item()
            optimiser.step() #? Update weights
            scheduler.step()
                
        train_loss = running_loss / len(train_loader)
        # print(f"Epoch: {epoch}, training loss: {train_loss:.4f}")
    
        #* Now we evaluate the model on the validation set, to track training vs validation loss
        model.eval() #? Set model to evaluation mode
        with torch.no_grad(): #? No need to track gradients during evaluation
            val_loss = 0.0    
            for batch, (inputs, labels) in enumerate(val_loader,start=1):#! one pass because val_loader batch size is all, if you want to do it in mini-batches, you MUST change the metric calculations to accept mini-batches
                
                outputs = model(inputs)
                # labels = labels.cpu() 
                loss = criterion(outputs, labels)
                val_loss += loss.item() #? Calculate loss
            avg_val_loss = val_loss / len(val_loader)
        loss_ratio = val_loss / train_loss    
        pos_weight = loss_ratio  # or any other function of loss_ratio you choose
    
        # Update criterion with new pos_weight
        criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight]).to(device))
        # Early stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict()
            wait = 0
        elif avg_val_loss*0.95 <= best_val_loss:
                wait = 0
        else:
            wait += 1
        if wait >= patience:
            print(f"Early stopping triggered at epoch {epoch}, best val loss: {best_val_loss:.4f}")
            break
        print(f"Epoch: {epoch}".ljust(12), f"training loss:{train_loss:.3f}".ljust(16), f"best_val_loss:{best_val_loss:.3f}".ljust(12), f"Val Loss: {avg_val_loss:.3f}", f"Scheduler lr: {scheduler.get_last_lr()}".ljust(50),end="\r")
    #* Use best model to calculate metrics on the validation set
    #! must be outside epoch loop, it comes after the training and cv loop
    model.load_state_dict(best_model_state) #? Load the best model state
    with torch.no_grad():
        for batch, (inputs, labels) in enumerate(val_loader,start=1):#! one pass because val_loader batch size is all, if you want to do it in mini-batches, you MUST change the metric calculations to accept mini-batches
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                labels = labels.cpu() 
                # predictions = (torch.sigmoid(outputs) < 0.5).float().cpu().numpy()
                predictions = (torch.sigmoid(outputs) >= threshold).float().cpu().numpy()
                
                val_loss += loss.item() #? Calculate loss
                
    #! The following should have length equal to fold number           
    accuracy=accuracy_score(labels, predictions) 
    precision=precision_score(labels, predictions, pos_label=1, zero_division=0)
    recall=recall_score(labels, predictions, pos_label=1)
    f1=f1_score(labels, predictions, pos_label=1)
    auc=roc_auc_score(labels, predictions)
    
    return model, accuracy, precision, recall, f1, auc

Using cuda


In [7]:
from Training_Helper_Functions import *
from Preprocessing_Functions import * 
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import RobustScaler
import numpy as np
import optuna
from torch import optim
def maximise_combined_score(trial):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)
    epochs = 10000
    random_state = 42
    
    # Scaler
    scaler = RobustScaler() # Or StandardScaler() - You can also make this a hyperparameter if you want

    kFolds = FOLDS_GENERATOR(df, n_splits=5, random_state=42,             
                            OD_majority = IsolationForest(contamination=trial.suggest_float("contamination_majority", 0.01, 0.4), random_state=random_state), 
                            OD_minority = IsolationForest(contamination=trial.suggest_float("contamination_minority", 0.01, 0.2), random_state=random_state), 
                            oversampler_first = trial.suggest_categorical("oversampler_first", [True]),
                            synthesizer =  trial.suggest_categorical("synthesizer", ["TVAE", "CTGAN"]),
                            epochs = trial.suggest_int("epochs", 100, 1000, step=100),
                            n_synthetic_data = trial.suggest_int("n_synthetic_data", 1000, 10000, step=1000),
                            scaler=scaler,      
                               ) # Pass outlier models and scaler

    # Model hyperparameters (first-level optimization)
    hidden_dim = trial.suggest_int("hidden_dim", 128, 128, step = 2)
    hidden_dim2 = trial.suggest_int("hidden_dim2", 256, 256, step = 2)
    hidden_dim3 = trial.suggest_int("hidden_dim2", 96, 96, step = 2)
    output_dim = trial.suggest_int("output_dim", 32, 32, step = 2)

    dropout = trial.suggest_float("dropout", 0.0, 0.2)
    threshold = trial.suggest_float("threshold", 0.3, 0.8)
    # dropout = None
    initial_lr = trial.suggest_float("initial_lr", 1e-4, 1e-4, log=True)
    max_lr = trial.suggest_float("max_lr", 1e-4, 1e-4, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True)


    # Loss function hyperparameters
    criterion_choice = "BCEWithLogitsLoss" #trial.suggest_categorical("criterion", ["BCEWithLogitsLoss", "FocalLoss"])
    # Hyperparameter exploration optimization
    if criterion_choice == "BCEWithLogitsLoss":
        pos_weight = trial.suggest_int("pos_weight", 5, 5)
        alpha = None
        gamma = None
    elif criterion_choice == "FocalLoss":
        pos_weight = None
        alpha = trial.suggest_float("alpha", 0.25, 0.75)
        gamma = trial.suggest_float("gamma", 1.0, 5.0)
    else:
        pos_weight = None

    # Initialize lists for metrics across folds
    accuracy_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    auc_list = []

    # Cross-validation loop
    for fold, (train_x, test_x, train_y, test_y) in enumerate(kFolds, start=1):
        print(f"Fold {fold}:")
        # Create DataLoader for current fold
        train_loader, val_loader = fold_to_dataloader_tensor(train_x, test_x, train_y, test_y, batch_size=64,
                                                            device=device)
        # Instantiate and initialize the model
        model = MyModelWithSkip(input_dim=get_feature_count(train_loader), hidden_dim=hidden_dim, hidden_dim2=hidden_dim2,
                        output_dim=output_dim, dropout=dropout)
        model.to(device)
        model.apply(init_weights)

        # Map the choice to the actual loss function
        criterion = criterion_mapping(criterion_choice, pos_weight, alpha, gamma)
        optimiser = optim.Adam(model.parameters(), lr=initial_lr, weight_decay=weight_decay)
        scheduler = torch.optim.lr_scheduler.CyclicLR(
            optimiser,
            base_lr=1e-5,
            max_lr=max_lr,
            cycle_momentum=False)

        # Train and evaluate the model on the current fold
        model, accuracy, precision, recall, f1, auc = train_and_evaluate(
            model, criterion, optimiser, scheduler, train_loader, val_loader, epochs=epochs, patience=100,
            device=device, threshold=threshold
        )
        print(f"Accuracy: {accuracy:.4f}, precision: {precision:.4f}, recall: {recall:.4f}, f1: {f1:.4f}, auc: {auc:.4f}")
        del model
        del train_loader
        del val_loader

        # Append the metrics from the current fold
        accuracy_list.append(accuracy)
        precision_list.append(precision)
        recall_list.append(recall)
        f1_list.append(f1)
        auc_list.append(auc)
        break

    # Calculate the average metrics across all folds
    avg_accuracy = np.sum(accuracy_list) / len(accuracy_list)
    avg_precision = np.sum(precision_list) / len(precision_list)
    avg_recall = np.sum(recall_list) / len(recall_list)
    avg_f1 = np.sum(f1_list) / len(f1_list)
    avg_auc = np.sum(auc_list) / len(auc_list)

    # Combine metrics into a single "score"
    # combined_score = (avg_f1 + avg_precision + avg_recall + avg_accuracy + avg_auc) / 5
    combined_score = avg_f1

    return combined_score

Using cuda


  from .autonotebook import tqdm as notebook_tqdm


In [8]:
# import threading
# import optuna
# from optuna_dashboard import run_server

# def start_dashboard():
#     run_server(storage)

# storage = optuna.storages.InMemoryStorage()
# study = optuna.create_study(direction="maximize", storage=storage, study_name="Basic")

# # Start dashboard in a separate thread
# dashboard_thread = threading.Thread(target=start_dashboard, daemon=True)
# dashboard_thread.start()

# # Run optimization
# study.optimize(maximise_combined_score, n_trials=30)

# # After optimization, print results
# print("Best trial:")
# trial = study.best_trial
# print(f"  Combined score: {trial.value}")
# print("  Best hyperparameters:")
# for key, value in trial.params.items():
#     print(f"    {key}: {value}")


In [None]:
import threading
import optuna
from optuna_dashboard import run_server
# !fuser -k 8080/tcp

# Define your persistent storage
storage = "sqlite:///opt6-skip-connections.db"

# Create or load your study
study_name = "optuna6-skip-connections"
try:
    study = optuna.load_study(study_name=study_name, storage=storage)
except KeyError:
    study = optuna.create_study(study_name=study_name, direction="maximize", storage=storage)

# Start Optuna Dashboard in a separate thread
dashboard_thread = threading.Thread(target=lambda: run_server(storage), daemon=True)
dashboard_thread.start()

# Run optimization
# Ensure the 'DR' column exists in the DataFrame
if 'DR' not in df.columns:
    raise KeyError("'DR' column is missing in the DataFrame. Please ensure it is present before running the optimization.")

study.optimize(maximise_combined_score, n_trials=1000)

# Print results
print("Best trial:")
trial = study.best_trial
print(f"  Combined score: {trial.value}")
print("  Best hyperparameters:")
for key, value in trial.params.items():
    print(f"    {key}: {value}")


[I 2025-04-20 22:11:33,795] A new study created in RDB with name: optuna6-skip-connections
Bottle v0.13.2 server starting up (using WSGIRefServer())...
Listening on http://localhost:8080/
Hit Ctrl-C to quit.



Using device: cuda
Original class distribution: DR
0.0    4129
1.0     464
Name: count, dtype: int64
After OD, majority: 3591
After OD, minority: 452
Before oversampling & synthetic data: DR 
0.0    3591
1.0     452
Name: count, dtype: int64

Applying SMOTENC oversampling...
               Age       Gender    Community         UAlb           Ucr  \
count  7182.000000  7182.000000  7182.000000  7182.000000   7182.000000   
mean     63.515566     0.530354     4.092593    41.220628   3868.891193   
std       7.000632     0.499113     3.030997    91.746780   5508.039556   
min      36.000000     0.000000     0.000000     0.100000      1.000000   
25%      59.000000     0.000000     1.000000     5.284210      6.000000   
50%      64.000000     1.000000     4.000000    11.900000     11.533487   
75%      68.000000     1.000000     7.000000    33.300000   7263.500000   
max      92.000000     1.000000     9.000000  1049.900000  21612.000000   

              UACR           TC           TG    

Loss: 34.748:   4%|▍         | 31/800 [00:10<04:22,  2.92it/s]
[W 2025-04-20 22:12:06,467] Trial 0 failed with parameters: {'contamination_majority': 0.13028024849691833, 'contamination_minority': 0.02435868841723967, 'oversampler_first': True, 'synthesizer': 'TVAE', 'epochs': 800, 'n_synthetic_data': 9000} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\TAN LE ZHAN\Documents\GitHub\ADL2\.venv\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\TAN LE ZHAN\AppData\Local\Temp\ipykernel_32168\696415111.py", line 17, in maximise_combined_score
    kFolds = FOLDS_GENERATOR(df, n_splits=5, random_state=42,
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\TAN LE ZHAN\Documents\GitHub\ADL2\Preprocessing_Functions.py", line 241, in FOLDS_GENERATOR
    X_train_processed = Synthetic_Data_Generator(X_train_processed, s

KeyboardInterrupt: 

127.0.0.1 - - [20/Apr/2025 22:14:21] "GET / HTTP/1.1" 302 0
127.0.0.1 - - [20/Apr/2025 22:14:21] "GET /dashboard HTTP/1.1" 200 4145
127.0.0.1 - - [20/Apr/2025 22:14:21] "GET /static/bundle.js HTTP/1.1" 200 4140872
127.0.0.1 - - [20/Apr/2025 22:14:23] "GET /api/studies HTTP/1.1" 200 152
127.0.0.1 - - [20/Apr/2025 22:14:23] "GET /favicon.ico HTTP/1.1" 200 7670
127.0.0.1 - - [20/Apr/2025 22:14:24] "GET /api/studies/1/param_importances?evaluator=ped_anova HTTP/1.1" 200 27
127.0.0.1 - - [20/Apr/2025 22:14:24] "GET /api/studies/1?after=0 HTTP/1.1" 200 2229
127.0.0.1 - - [20/Apr/2025 22:14:24] "GET /api/meta HTTP/1.1" 200 63
127.0.0.1 - - [20/Apr/2025 22:14:33] "GET /api/studies/1?after=1 HTTP/1.1" 200 391
127.0.0.1 - - [20/Apr/2025 22:14:38] "GET /api/studies/1?after=1 HTTP/1.1" 200 391
127.0.0.1 - - [20/Apr/2025 22:14:49] "GET /api/studies/1?after=1 HTTP/1.1" 200 391
127.0.0.1 - - [20/Apr/2025 22:14:58] "GET /api/studies/1/param_importances?evaluator=mean_decrease_impurity HTTP/1.1" 200 27
