<a href="https://colab.research.google.com/github/Shirui-peng/Deep-Learning-in-Quantitative-Trading/blob/main/Chapter4/01_wf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

We now present a complete workflow to conduct hyperparameter tuning with cross-validation by using optuna. In particular, I utilize hold-out cross-validation in which samples are chronologically fed into training sets after being used for validation.

In [1]:
!pip install optuna
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import KFold
import optuna
import optuna.visualization as vis

Collecting optuna
  Downloading optuna-4.5.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.5.0-py3-none-any.whl (400 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.9/400.9 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.9.0 optuna-4.5.0


In [2]:
torch.manual_seed(42)
np.random.seed(42)


N = 1000  # total number of samples
D = 10    # number of features

X = np.random.randn(N, D).astype(np.float32)
# Create a synthetic target that depends on X + some noise
true_w = np.random.randn(D).astype(np.float32)
y = X @ true_w + 0.5 * np.random.randn(N).astype(np.float32)

# Convert to torch tensors
X_torch = torch.from_numpy(X)
y_torch = torch.from_numpy(y)

In [3]:
class MLPRegressor(nn.Module):
    def __init__(self, input_dim, hidden_dims, output_dim=1):
        super().__init__()
        layers = []
        prev_dim = input_dim
        for hd in hidden_dims:
            layers.append(nn.Linear(prev_dim, hd))
            layers.append(nn.ReLU())
            prev_dim = hd
        layers.append(nn.Linear(prev_dim, output_dim))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

In [4]:
train_end_fractions = [0.7, 0.8, 0.9]
val_fraction = 0.1  # each validation is 10% of data

def rolling_forward_cv_mse(model_fn, batch_size, lr, n_epochs=20):
    """Perform 3-fold rolling forward CV. Return average validation MSE."""
    mse_scores = []
    criterion = nn.MSELoss()

    for frac in train_end_fractions:
        train_end = int(frac * N)
        val_end   = int((frac + val_fraction) * N)

        # Safety check if val_end exceeds dataset size
        if val_end > N:
            break  # no more folds possible if we run off the end

        # Create train/val splits
        X_train, y_train = X_torch[:train_end], y_torch[:train_end]
        X_val,   y_val   = X_torch[train_end:val_end], y_torch[train_end:val_end]

        # Build model & optimizer
        model = model_fn()
        optimizer = optim.Adam(model.parameters(), lr=lr)

        # DataLoader-like approach
        train_data = torch.utils.data.TensorDataset(X_train, y_train)
        train_loader = torch.utils.data.DataLoader(
            train_data, batch_size=batch_size, shuffle=True
        )

        # Train
        for epoch in range(n_epochs):
            model.train()
            for xb, yb in train_loader:
                optimizer.zero_grad()
                pred = model(xb).squeeze()  # shape [batch_size]
                loss = criterion(pred, yb)
                loss.backward()
                optimizer.step()

        # Validation
        model.eval()
        with torch.no_grad():
            val_pred = model(X_val).squeeze()
            val_loss = criterion(val_pred, y_val).item()
        mse_scores.append(val_loss)

    return float(np.mean(mse_scores)) if len(mse_scores) > 0 else np.inf

In [5]:
def objective(trial):
    # Hyperparameters
    n_layers = trial.suggest_int("n_layers", 1, 3)
    hidden_dims = []
    for i in range(n_layers):
        hd = trial.suggest_int(f"units_layer_{i}", 16, 64, step=16)
        hidden_dims.append(hd)

    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [8, 16, 32])
    n_epochs = 20  # keep it small for demo

    # Define a function that builds our model
    def model_builder():
        return MLPRegressor(input_dim=D, hidden_dims=hidden_dims).float()

    # Perform the rolling forward CV
    avg_val_mse = rolling_forward_cv_mse(
        model_fn=model_builder,
        batch_size=batch_size,
        lr=lr,
        n_epochs=n_epochs
    )

    return avg_val_mse

In [6]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=5, show_progress_bar=True)

[I 2025-09-13 21:36:57,905] A new study created in memory with name: no-name-fed664ef-a466-435f-8718-8032e0f032f5


  0%|          | 0/5 [00:00<?, ?it/s]

[I 2025-09-13 21:37:12,454] Trial 0 finished with value: 0.3085498462120692 and parameters: {'n_layers': 3, 'units_layer_0': 64, 'units_layer_1': 32, 'units_layer_2': 64, 'lr': 0.0008759800616471889, 'batch_size': 8}. Best is trial 0 with value: 0.3085498462120692.
[I 2025-09-13 21:37:16,169] Trial 1 finished with value: 0.32129426797231037 and parameters: {'n_layers': 3, 'units_layer_0': 16, 'units_layer_1': 48, 'units_layer_2': 48, 'lr': 0.009539290703683779, 'batch_size': 16}. Best is trial 0 with value: 0.3085498462120692.
[I 2025-09-13 21:37:19,458] Trial 2 finished with value: 0.312910000483195 and parameters: {'n_layers': 2, 'units_layer_0': 64, 'units_layer_1': 16, 'lr': 0.0032941686627155293, 'batch_size': 16}. Best is trial 0 with value: 0.3085498462120692.
[I 2025-09-13 21:37:27,763] Trial 3 finished with value: 0.32289118071397144 and parameters: {'n_layers': 3, 'units_layer_0': 48, 'units_layer_1': 16, 'units_layer_2': 64, 'lr': 0.001057770558992759, 'batch_size': 8}. Best

In [7]:
best_trial = study.best_trial
print("\nBest Trial:")
print(f"  Value (MSE): {best_trial.value:.6f}")
print("  Params:")
for k, v in best_trial.params.items():
    print(f"    {k}: {v}")

# 7) (Optional) Retrain final model on the entire dataset with best params
best_params = best_trial.params
final_n_layers = best_params["n_layers"]
final_hidden_dims = []
for i in range(final_n_layers):
    final_hidden_dims.append(best_params[f"units_layer_{i}"])
final_lr = best_params["lr"]
final_batch_size = best_params["batch_size"]

final_model = MLPRegressor(input_dim=D, hidden_dims=final_hidden_dims).float()
final_optimizer = optim.Adam(final_model.parameters(), lr=final_lr)
criterion = nn.MSELoss()

dataset_full = torch.utils.data.TensorDataset(X_torch, y_torch)
loader_full = torch.utils.data.DataLoader(dataset_full, batch_size=final_batch_size, shuffle=True)

n_epochs_final = 20
for epoch in range(n_epochs_final):
    final_model.train()
    for xb, yb in loader_full:
        final_optimizer.zero_grad()
        pred = final_model(xb).squeeze()
        loss = criterion(pred, yb)
        loss.backward()
        final_optimizer.step()

print("\nRetrained final model with best hyperparameters!")


Best Trial:
  Value (MSE): 0.308025
  Params:
    n_layers: 1
    units_layer_0: 64
    lr: 0.004127387633559554
    batch_size: 8

Retrained final model with best hyperparameters!


In [8]:
fig = vis.plot_optimization_history(study)
fig.show()