# Initial Setups

## Setup Environment and Project Path Configs

In [1]:
# General CPU Usage Optimization
import os
os.environ['OMP_NUM_THREADS'] = '16'
os.environ['MKL_NUM_THREADS'] = '16'
os.environ['OPENBLAS_NUM_THREADS'] = '16'
os.environ['NUMEXPR_NUM_THREADS'] = '16'

# PyTorch-specific CPU Usage Optimization
import torch
try:
    torch.set_num_threads(16)
except RuntimeError as e:
    print(f"Warning: Could not set torch.set_num_threads.\n{e}")

try:
    torch.set_num_interop_threads(16)
except RuntimeError as e:
    print(f"Warning: Could not set torch.set_num_interop_threads.\n{e}")

print(f"PyTorch threads: {torch.get_num_threads()}")
print(f"PyTorch interop threads: {torch.get_num_interop_threads()}")

# Configure Project Path for Module Imports
import sys
from pathlib import Path

# Get the current working directory
current_dir = os.getcwd()

# Navigate up to the project root directory
project_root = Path(current_dir).parent.resolve()

# Add the project root to sys.path if it's not already there
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

print(f"Project root added to sys.path: {project_root}")

# General Utility for Timestamps
from datetime import datetime

PyTorch threads: 16
PyTorch interop threads: 16
Project root added to sys.path: C:\Users\Acer\Desktop\Projects for Data Science\Drug Gi50 Value Prediction


## Import Core Libraries

In [2]:
# Data Manipulation
import pandas as pd
import numpy as np

# PyTorch Core for Neural Networks
import torch.nn as nn  # Neural network modules like Linear, ReLU, MSELoss
import torch.nn.functional as F  # Functional interface for activations, e.g. F.ReLU
import torch.optim as optim  # Optimization functions like Adam, AdamW, etc.
from torch.optim import lr_scheduler  # Learning rate scheduling
from torch.utils.data import TensorDataset, DataLoader  # Feed data to the model in batches

# MLP Model Class
from src.models.mlp_models import MLP

# Data Preprocessing
from sklearn.preprocessing import StandardScaler

# Mixed Precision Training (for GPU-accelerated training)
# Speeds up training by using float16 where possible
from torch.cuda.amp import autocast, GradScaler

# Hyperparameter Optimization
import optuna

# Model Evaluation
from sklearn.metrics import mean_squared_error, r2_score

# Git commit ID for final model filename (for reproducibility)
import subprocess

## Import Utility Libraries

In [3]:
# Progress bars
tqdm_notebook_available = False
try:
    from tqdm.notebook import tqdm
    tqdm.pandas() # Enable tqdm for pandas apply
    tqdm_notebook_available = True
except ImportError:
    print("tqdm.notebook not found. Install with 'pip install tqdm'.")

## Define Device (GPU/CPU)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## Set Final Model Save Location

In [5]:
mlp_models_base_dir = Path("../models/mlp")
mlp_models_base_dir.mkdir(parents=True, exist_ok=True)
print(f"The best final MLP model will be saved in: {mlp_models_base_dir}")

The best final MLP model will be saved in: ..\models\mlp


# Load Data Splits

In [6]:
splits_dir = Path("../data/splits")
print(f"\nLoading data splits from {splits_dir}...")

try:
    X_train = pd.read_parquet(splits_dir / "X_train.parquet")
    X_val = pd.read_parquet(splits_dir / "X_val.parquet")
    X_test = pd.read_parquet(splits_dir / "X_test.parquet")
    
    y_train = pd.read_parquet(splits_dir / "y_train.parquet")
    y_val = pd.read_parquet(splits_dir / "y_val.parquet")
    y_test = pd.read_parquet(splits_dir / "y_test.parquet")
    print("Data splits loaded successfully.")
except FileNotFoundError:
    print(f"Error: One or more split files not found in '{splits_dir}'.")
    print("Please ensure you have run '02_Split_Features.ipynb' to generate and save the splits.")

print(f"X_train shape: {X_train.shape}")
print(f"X_val shape: {X_val.shape}")
print(f"X_test shape: {X_test.shape}")

print(f"y_train shape: {y_train.shape}")
print(f"y_val shape: {y_val.shape}")
print(f"y_test shape: {y_test.shape}")

# Display first few rows to verify data
print("\nFirst 5 rows of X_train:")
display(X_train.head())

print("\nFirst 5 rows of y_train:")
display(y_train.head())


Loading data splits from ..\data\splits...
Data splits loaded successfully.
X_train shape: (13119, 2268)
X_val shape: (2812, 2268)
X_test shape: (2812, 2268)
y_train shape: (13119, 1)
y_val shape: (2812, 1)
y_test shape: (2812, 1)

First 5 rows of X_train:


Unnamed: 0,molregno,canonical_smiles,num_activities,MaxAbsEStateIndex,MaxEStateIndex,MinAbsEStateIndex,MinEStateIndex,qed,SPS,MolWt,...,morgan_fp_2038,morgan_fp_2039,morgan_fp_2040,morgan_fp_2041,morgan_fp_2042,morgan_fp_2043,morgan_fp_2044,morgan_fp_2045,morgan_fp_2046,morgan_fp_2047
0,2307646,COc1cccc2c1OCc1c-2nc2cnc3ccccc3c2c1C,6,6.033142,6.033142,0.494176,0.494176,0.476742,12.56,328.371,...,0,0,0,0,0,0,0,0,0,0
1,2081122,COc1cc(/C(C#N)=C/c2ccc3c(c2)OCCO3)cc(OC)c1OC,9,9.645791,9.645791,0.459195,0.459195,0.604738,12.923077,353.374,...,0,0,0,0,0,0,0,0,0,0
2,2199496,COC(=O)[C@@H]1CCCN1Cc1ccc(-c2ncc(-c3ccc(OCC=C(...,6,11.953178,11.953178,0.169552,-0.173158,0.359463,15.909091,447.535,...,0,0,0,0,0,0,0,0,0,0
3,2221960,O=C(/C=C/c1cccn(C/C=C/c2ccccc2Br)c1=O)NO,4,12.253458,12.253458,0.216419,-0.686457,0.479732,11.217391,375.222,...,0,0,0,0,0,0,0,0,0,0
4,2879093,Cc1cc(C2c3c(-c4cccc5[nH]c(=O)oc45)n[nH]c3C(=O)...,2,14.128489,14.128489,0.124437,-3.116139,0.437556,16.121212,472.879,...,0,0,0,0,0,0,0,0,0,0



First 5 rows of y_train:


Unnamed: 0,pGI50
14387,5.734742
12543,7.164746
12810,4.928428
13172,6.882724
18712,6.094208


# Prepare Data for MLP

## Convert Data To Numpy Arrays

In [7]:
print("\nPreparing X for MLP training (dropping identifiers)...")
X_train_mlp = X_train.drop(columns=['molregno', 'canonical_smiles'], errors='ignore')
X_val_mlp = X_val.drop(columns=['molregno', 'canonical_smiles'], errors='ignore')
X_test_mlp = X_test.drop(columns=['molregno', 'canonical_smiles'], errors='ignore')

print(f"X_train_mlp shape (numerical features only): {X_train_mlp.shape}")
print(f"X_val_mlp shape (numerical features only): {X_val_mlp.shape}")
print(f"X_test_mlp shape (numerical features only): {X_test_mlp.shape}")

display(X_train_mlp.head())
display(y_train.head())

X_train_np = X_train_mlp.values
y_train_np = y_train.values.reshape(-1, 1)

X_val_np = X_val_mlp.values
y_val_np = y_val.values.reshape(-1, 1)

X_test_np = X_test_mlp.values
y_test_np = y_test.values.reshape(-1, 1)

print("Data converted to NumPy arrays.")
print(f"X_train_np shape: {X_train_np.shape}, y_train_np shape: {y_train_np.shape}")
print(f"X_val_np shape: {X_val_np.shape}, y_val_np shape: {y_val_np.shape}")
print(f"X_test_np shape: {X_test_np.shape}, y_test_np shape: {y_test_np.shape}")


Preparing X for MLP training (dropping identifiers)...
X_train_mlp shape (numerical features only): (13119, 2266)
X_val_mlp shape (numerical features only): (2812, 2266)
X_test_mlp shape (numerical features only): (2812, 2266)


Unnamed: 0,num_activities,MaxAbsEStateIndex,MaxEStateIndex,MinAbsEStateIndex,MinEStateIndex,qed,SPS,MolWt,HeavyAtomMolWt,ExactMolWt,...,morgan_fp_2038,morgan_fp_2039,morgan_fp_2040,morgan_fp_2041,morgan_fp_2042,morgan_fp_2043,morgan_fp_2044,morgan_fp_2045,morgan_fp_2046,morgan_fp_2047
0,6,6.033142,6.033142,0.494176,0.494176,0.476742,12.56,328.371,312.243,328.121178,...,0,0,0,0,0,0,0,0,0,0
1,9,9.645791,9.645791,0.459195,0.459195,0.604738,12.923077,353.374,334.222,353.126323,...,0,0,0,0,0,0,0,0,0,0
2,6,11.953178,11.953178,0.169552,-0.173158,0.359463,15.909091,447.535,418.303,447.215806,...,0,0,0,0,0,0,0,0,0,0
3,4,12.253458,12.253458,0.216419,-0.686457,0.479732,11.217391,375.222,360.102,374.026604,...,0,0,0,0,0,0,0,0,0,0
4,2,14.128489,14.128489,0.124437,-3.116139,0.437556,16.121212,472.879,453.727,472.111375,...,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,pGI50
14387,5.734742
12543,7.164746
12810,4.928428
13172,6.882724
18712,6.094208


Data converted to NumPy arrays.
X_train_np shape: (13119, 2266), y_train_np shape: (13119, 1)
X_val_np shape: (2812, 2266), y_val_np shape: (2812, 1)
X_test_np shape: (2812, 2266), y_test_np shape: (2812, 1)


## Standardize Data

In [8]:
print("\nScaling features using StandardScaler for MLP...")
scaler = StandardScaler()

# ONLY fit_transform on TRAINING data
X_train_scaled_np = scaler.fit_transform(X_train_np)
X_val_scaled_np = scaler.transform(X_val_np)
X_test_scaled_np = scaler.transform(X_test_np)

print("Features scaled successfully.")
print(f"X_train_scaled_np shape: {X_train_scaled_np.shape}")
print(f"X_val_scaled_np shape: {X_val_scaled_np.shape}")
print(f"X_test_scaled_np shape: {X_test_scaled_np.shape}")


Scaling features using StandardScaler for MLP...
Features scaled successfully.
X_train_scaled_np shape: (13119, 2266)
X_val_scaled_np shape: (2812, 2266)
X_test_scaled_np shape: (2812, 2266)


## Convert Data to PyTorch Tensors

In [9]:
if 'device' not in locals(): # Check if device variable is already set
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Device not previously set, now using: {device}")

print("\n--- Converting Scaled NumPy Arrays to PyTorch Tensors ---")
X_train_tensor = torch.from_numpy(X_train_scaled_np).float().to(device)
y_train_tensor = torch.from_numpy(y_train_np).float().to(device)
X_val_tensor = torch.from_numpy(X_val_scaled_np).float().to(device)
y_val_tensor = torch.from_numpy(y_val_np).float().to(device)
X_test_tensor = torch.from_numpy(X_test_scaled_np).float().to(device)
y_test_tensor = torch.from_numpy(y_test_np).float().to(device)

print("Data converted to PyTorch Tensors and moved to device.")
print(f"X_train_tensor device: {X_train_tensor.device}, shape: {X_train_tensor.shape}")
print(f"y_train_tensor device: {y_train_tensor.device}, shape: {y_train_tensor.shape}")
print(f"X_val_tensor device: {X_val_tensor.device}, shape: {X_val_tensor.shape}")
print(f"y_val_tensor device: {y_val_tensor.device}, shape: {y_val_tensor.shape}")
print(f"X_test_tensor device: {X_test_tensor.device}, shape: {X_test_tensor.shape}")
print(f"y_test_tensor device: {y_test_tensor.device}, shape: {y_test_tensor.shape}")


--- Converting Scaled NumPy Arrays to PyTorch Tensors ---
Data converted to PyTorch Tensors and moved to device.
X_train_tensor device: cuda:0, shape: torch.Size([13119, 2266])
y_train_tensor device: cuda:0, shape: torch.Size([13119, 1])
X_val_tensor device: cuda:0, shape: torch.Size([2812, 2266])
y_val_tensor device: cuda:0, shape: torch.Size([2812, 1])
X_test_tensor device: cuda:0, shape: torch.Size([2812, 2266])
y_test_tensor device: cuda:0, shape: torch.Size([2812, 1])


# Train Model

## Tune Hyperparameters

### Create TensorDatasets and DataLoaders

In [10]:
# Temporary batch size, used as an Optuna hyperparam later
temp_batch_size = 128

# Create TensorDataset objects from PyTorch Tensors
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

# Create DataLoader objects using the TensorDataset objects
train_loader = DataLoader(train_dataset, batch_size=temp_batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=temp_batch_size, shuffle=False)

print(f"TensorDatasets and DataLoaders created with temporary batch size: {temp_batch_size}")
print(f"Number of training batches: {len(train_loader)}")
print(f"Number of validation batches: {len(val_loader)}")

TensorDatasets and DataLoaders created with temporary batch size: 128
Number of training batches: 103
Number of validation batches: 22


### Define Optuna Objective Function

In [11]:
def objective(trial):
    # Hyperparameters to tune
    hidden_size = trial.suggest_int("hidden_size", 128, 1024, log=True) # Number of neurons in hidden layer
    learning_rate = trial.suggest_float("lr", 1e-4, 5e-3, log=True) # Learning rate for optimizer
    batch_size = trial.suggest_categorical("batch_size", [64, 128, 256]) # Batch size for DataLoaders
    n_epochs = trial.suggest_int("n_epochs", 150, 600) # Number of training epochs

    # Initialize model
    # input_size is number of features in X_train_tensor
    input_size = X_train_tensor.shape[1]
    output_size = 1  # For regression (pGI50)

    model = MLP(input_size, hidden_size, output_size).to(device)

    # Loss function and Optimizer
    criterion = nn.MSELoss() # Mean Squared Error Loss for regression
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # DataLoaders for batching within the trial
    # Re-create DataLoaders here because batch_size is a hyperparameter
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Early Stopping Logic
    best_val_rmse = float('inf')
    patience_counter = 0
    patience = 50 # Number of epochs to wait for improvement before stopping

     # Training Loop
    for epoch in range(n_epochs):
        model.train()  # Set model to training mode
        for batch_x, batch_y in train_loader:
            optimizer.zero_grad()  # Zero the gradients before backpropagation
            outputs = model(batch_x)  # Forward pass
            
            loss = criterion(outputs, batch_y)  # Calculate loss
            loss.backward()  # Backward pass: compute gradients
            optimizer.step() # Apply gradients

        # Validation Step
        model.eval()  # Set model to evaluation mode
        val_predictions = []
        val_targets = []
        with torch.no_grad():  # Disable gradient calculations for validation
            for batch_x_val, batch_y_val in val_loader:
                val_outputs = model(batch_x_val)
                val_predictions.extend(val_outputs.cpu().numpy().flatten())
                val_targets.extend(batch_y_val.cpu().numpy().flatten())

        val_rmse = np.sqrt(mean_squared_error(val_targets, val_predictions))

        # Optuna Pruning: Report current validation RMSE to Optuna
        trial.report(val_rmse, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        # Manual Early Stopping Check
        if val_rmse < best_val_rmse:
            best_val_rmse = val_rmse
            patience_counter = 0  # Reset patience if improvement is found

        else:
            patience_counter += 1
            if patience_counter >= patience:
                # print(f"Early stopping at epoch {epoch+1} for trial {trial.number}")
                break

    # Final evaluation on validation set after training (or early stopping)
    model.eval()
    final_val_predictions = []
    final_val_targets = []
    with torch.no_grad():
        for batch_x_val, batch_y_val in val_loader:
            val_outputs = model(batch_x_val)
            final_val_predictions.extend(val_outputs.cpu().numpy().flatten())
            final_val_targets.extend(batch_y_val.cpu().numpy().flatten())

    final_rmse = np.sqrt(mean_squared_error(final_val_targets, final_val_predictions))
    final_r2 = r2_score(final_val_targets, final_val_predictions)

    # Store R2 score as well in the study
    trial.set_user_attr("final_r2_score", float(final_r2))

    return final_rmse # Optuna minimizes this value

### Run Optuna Study

In [12]:
study_dir = Path("../studies/mlp_study")
study_dir.mkdir(parents=True, exist_ok=True)

study_db_path = f"sqlite:///{study_dir / 'mlp_optuna_study.db'}"
study_name = "mlp_regression_pGI50"
print(f"Optuna study for MLP will be stored at: {study_db_path}")

pruner = optuna.pruners.MedianPruner(
    n_startup_trials=10,  # Run at least these many trials completely before starting to prune
    n_warmup_steps=20,    # Don't prune trials until they've completed these many epochs
    interval_steps=10     # Check for pruning every these many epochs
)

# Check if a study with the same name already exists in the database
# If it does, load it to resume the optimization.
try:
    study = optuna.load_study(study_name=study_name, storage=study_db_path)
    print(f"Loaded existing study '{study_name}' from {study_db_path}. Resuming optimization.")
except KeyError:
    # If the study does not exist, create a new one
    print(f"Creating new study '{study_name}' at {study_db_path}.")
    study = optuna.create_study(
        study_name=study_name,
        direction="minimize",
        storage=study_db_path,
        pruner=pruner
    )

print("\nStarting Optuna optimization for MLP...")
study.optimize(objective,
                   n_trials=50,
                   timeout=7200,
                   show_progress_bar=True)
print("\nOptuna optimization finished for MLP.")

# Print best trial results
print("\n--- Best Trial Results for MLP ---")
print(f"Best trial number: {study.best_trial.number}")
print(f"Best RMSE (Validation): {study.best_value:.4f}")
print("Best hyperparameters:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")

if "final_r2_score" in study.best_trial.user_attrs:
    print(f"Best R2 Score (Validation): {study.best_trial.user_attrs['final_r2_score']:.4f}")

Optuna study for MLP will be stored at: sqlite:///..\studies\mlp_study\mlp_optuna_study.db
Loaded existing study 'mlp_regression_pGI50' from sqlite:///..\studies\mlp_study\mlp_optuna_study.db. Resuming optimization.

Starting Optuna optimization for MLP...


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2025-07-15 20:44:37,349] Trial 102 finished with value: 0.6443317158507817 and parameters: {'hidden_size': 877, 'lr': 0.0002068072782275973, 'batch_size': 128, 'n_epochs': 240}. Best is trial 102 with value: 0.6443317158507817.
[I 2025-07-15 20:48:08,528] Trial 103 finished with value: 0.6487836594518843 and parameters: {'hidden_size': 788, 'lr': 0.00017600995241280688, 'batch_size': 128, 'n_epochs': 189}. Best is trial 102 with value: 0.6443317158507817.
[I 2025-07-15 20:48:09,936] Trial 104 pruned. 
[I 2025-07-15 20:51:26,380] Trial 105 finished with value: 0.6432357152089493 and parameters: {'hidden_size': 794, 'lr': 0.00018179788831323602, 'batch_size': 128, 'n_epochs': 173}. Best is trial 105 with value: 0.6432357152089493.
[I 2025-07-15 20:53:37,075] Trial 106 finished with value: 0.6432342027977853 and parameters: {'hidden_size': 728, 'lr': 0.0001686614362357667, 'batch_size': 128, 'n_epochs': 189}. Best is trial 106 with value: 0.6432342027977853.
[I 2025-07-15 20:53:38,452]

## Train Final Model

### Reinitialize Everything with Best Hyperparams

In [18]:
best_params = study.best_trial.params
best_hidden_size = best_params["hidden_size"]
best_learning_rate = best_params["lr"]
best_batch_size = best_params["batch_size"]
best_n_epochs = best_params["n_epochs"]

print(f"Best hyperparameters from Optuna: {best_params}")

# Re-initialize the model with best hyperparameters
input_size = X_train_tensor.shape[1]
output_size = 1
final_mlp_model = MLP(input_size, best_hidden_size, output_size).to(device)

# Re-initialize criterion and optimizer
final_criterion = nn.MSELoss()
final_optimizer = optim.Adam(final_mlp_model.parameters(), lr=best_learning_rate)

# Re-create DataLoaders with the best batch size (Training + Validation data COMBINED)
X_train_val_tensor = torch.cat((X_train_tensor, X_val_tensor), dim=0)
y_train_val_tensor = torch.cat((y_train_tensor, y_val_tensor), dim=0)

final_train_val_dataset = TensorDataset(X_train_val_tensor, y_train_val_tensor)
final_train_val_loader = DataLoader(final_train_val_dataset, batch_size=best_batch_size, shuffle=True)

# Create the FINAL TEST DataLoader
final_test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
final_test_loader = DataLoader(final_test_dataset, batch_size=best_batch_size, shuffle=False)

print(f"Final model, criterion, optimizer, and DataLoaders initialized with best parameters.")

Best hyperparameters from Optuna: {'hidden_size': 801, 'lr': 0.00016038661160511867, 'batch_size': 128, 'n_epochs': 195}
Final model, criterion, optimizer, and DataLoaders initialized with best parameters.


### Get Current Git Commit ID

In [19]:
def get_git_commit_hash():
    try:
        # Get the short commit hash
        commit_hash = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).strip().decode('ascii')
        return commit_hash
    except (subprocess.CalledProcessError, FileNotFoundError):
        return "unknown_commit"

In [20]:
# Optionally, see the current commit ID
current_commit = get_git_commit_hash()
print(f"Current Git Commit ID: {current_commit}")

Current Git Commit ID: 60d86ce


### Train and Save Model

In [21]:
best_final_val_rmse = float('inf')
patience_counter_final = 0
final_patience = 50

current_commit_hash = get_git_commit_hash()
model_filename = f"final_best_mlp_model_{current_commit_hash}.pt" # Pre-define filename

print(f"Retraining final MLP model for {best_n_epochs} epochs with best parameters...")
print(f"Associated Git Commit ID for saved model: {current_commit_hash}")

for epoch in range(best_n_epochs):
    # Training
    final_mlp_model.train()
    total_train_loss = 0
    num_train_batches = 0
    for batch_x, batch_y in final_train_val_loader:
        # Move data to device
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)
        
        final_optimizer.zero_grad()
        outputs = final_mlp_model(batch_x)
        loss = final_criterion(outputs, batch_y)
        loss.backward()
        final_optimizer.step()

        total_train_loss += loss.item()
        num_train_batches += 1

    avg_train_loss = total_train_loss / num_train_batches

    # Evaluation
    final_mlp_model.eval()
    val_predictions = []
    val_targets = []
    with torch.no_grad():
        for batch_x_eval, batch_y_eval in final_train_val_loader:
            batch_x_eval = batch_x_eval.to(device)
            batch_y_eval = batch_y_eval.to(device)
            
            val_outputs = final_mlp_model(batch_x_eval)
            val_predictions.extend(val_outputs.cpu().numpy().flatten())
            val_targets.extend(batch_y_eval.cpu().numpy().flatten())

    current_val_rmse = np.sqrt(mean_squared_error(val_targets, val_predictions))
    print(f"Epoch {epoch+1}/{best_n_epochs}, Train Loss: {avg_train_loss:.4f}, Eval RMSE on combined data: {current_val_rmse:.4f}")

    # Dynamic Best Model Saving & Early Stopping
    if current_val_rmse < best_final_val_rmse:
        best_final_val_rmse = current_val_rmse
        torch.save(final_mlp_model.state_dict(), mlp_models_base_dir / model_filename) # Save the model state
        patience_counter_final = 0 # Reset patience counter if performance improved
        print(f"--- New best final model saved at epoch {epoch+1} with RMSE: {current_val_rmse:.4f} ---")
    else:
        patience_counter_final += 1 # Increment patience counter if no improvement
        print(f"No improvement for {patience_counter_final} epochs. Best RMSE so far: {best_final_val_rmse:.4f}")

    if patience_counter_final >= final_patience:
        print(f"Early stopping triggered at epoch {epoch+1}.")
        break

print("Final model training complete.")

Retraining final MLP model for 195 epochs with best parameters...
Associated Git Commit ID for saved model: 60d86ce
Epoch 1/195, Train Loss: 10.9005, Eval RMSE on combined data: 0.9733
--- New best final model saved at epoch 1 with RMSE: 0.9733 ---
Epoch 2/195, Train Loss: 0.7031, Eval RMSE on combined data: 0.6753
--- New best final model saved at epoch 2 with RMSE: 0.6753 ---
Epoch 3/195, Train Loss: 0.4381, Eval RMSE on combined data: 0.5675
--- New best final model saved at epoch 3 with RMSE: 0.5675 ---
Epoch 4/195, Train Loss: 0.3292, Eval RMSE on combined data: 0.5020
--- New best final model saved at epoch 4 with RMSE: 0.5020 ---
Epoch 5/195, Train Loss: 0.2661, Eval RMSE on combined data: 0.4540
--- New best final model saved at epoch 5 with RMSE: 0.4540 ---
Epoch 6/195, Train Loss: 0.2241, Eval RMSE on combined data: 0.4215
--- New best final model saved at epoch 6 with RMSE: 0.4215 ---
Epoch 7/195, Train Loss: 0.1907, Eval RMSE on combined data: 0.3879
--- New best final mode

### Evaluate Model

In [22]:
# Load the best state dict model
print(f"Loading best saved model from '{model_filename}' for final test evaluation...")
path_to_saved_model = mlp_models_base_dir / model_filename
loaded_model_state_dict = torch.load(path_to_saved_model)
final_mlp_model.load_state_dict(loaded_model_state_dict)
final_mlp_model.eval() # Set to evaluation mode for final test

print("\nStarting final evaluation on test set...")
test_predictions = []
test_targets = []
with torch.no_grad():
    for batch_x_test, batch_y_test in final_test_loader:
        batch_x_test = batch_x_test.to(device)
        batch_y_test = batch_y_test.to(device)

        test_outputs = final_mlp_model(batch_x_test)
        test_predictions.extend(test_outputs.cpu().numpy().flatten())
        test_targets.extend(batch_y_test.cpu().numpy().flatten())

final_test_rmse = np.sqrt(mean_squared_error(test_targets, test_predictions))
final_test_r2 = r2_score(test_targets, test_predictions)

print(f"Final Model Test RMSE: {final_test_rmse:.4f}")
print(f"Final Model Test R2: {final_test_r2:.4f}")

Loading best saved model from 'final_best_mlp_model_60d86ce.pt' for final test evaluation...

Starting final evaluation on test set...
Final Model Test RMSE: 0.6418
Final Model Test R2: 0.5703
