# Optuna Example

In [70]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader , TensorDataset
from torch.optim import Adam
import torch.nn.init as init

import numpy as np

import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import MultipleLocator
import matplotlib.cm as cm

import copy
import seaborn as sns

from scipy.stats import norm
from sklearn.neighbors import KernelDensity, LocalOutlierFactor

import tqdm

import optuna
from optuna.trial import TrialState



## Load Data

In [71]:
# model hyperparameters
cuda = torch.cuda.is_available()
DEVICE = torch.device("cuda" if cuda else "cpu")

num_seeds = 300
seed = 0

all_state_dim = 64
state_dim = 64
action_dim = 19
training_seed = 300

# Load fullstate
data_fullstate = np.empty(num_seeds, dtype=object)
data_no_joint_pos = np.empty(num_seeds, dtype=object)
data_no_joint_vel = np.empty(num_seeds, dtype=object)
data_no_action = np.empty(num_seeds, dtype=object)
data_no_imu = np.empty(num_seeds, dtype=object)
data_no_fc = np.empty(num_seeds, dtype=object)
for i in range(num_seeds): # HEBB-FULL_STATE-seed_0-fullstate-rand-0
    data_fullstate[i] = np.load(f"data/HEBB-full/HEBB-FULL_STATE-seed_{seed}-fullstate-rand-{i}.npz")    
    
train_x = torch.empty((0, all_state_dim), dtype=torch.float32 ,device=DEVICE)
train_y = torch.empty((0, action_dim), dtype=torch.float32,device=DEVICE)
test_x = torch.empty((0, all_state_dim), dtype=torch.float32,device=DEVICE)
test_y = torch.empty((0, action_dim), dtype=torch.float32,device=DEVICE)
for i in range(training_seed):
    train_x = torch.cat((train_x, torch.tensor(data_fullstate[i]["state"].reshape(data_fullstate[i]["state"].shape[0], -1), dtype=torch.float32,device=DEVICE)), dim=0)
    train_y = torch.cat((train_y, torch.tensor(data_fullstate[i]["action_lowpass"].reshape(data_fullstate[i]["action_lowpass"].shape[0], -1), dtype=torch.float32,device=DEVICE)), dim=0)
# for j in range(training_seed, num_seeds):
#     test_x = torch.cat((test_x, torch.tensor(data_fullstate[j]["state"].reshape(data_fullstate[j]["state"].shape[0], -1), dtype=torch.float32,device=DEVICE)), dim=0)
#     test_y = torch.cat((test_y, torch.tensor(data_fullstate[j]["action_lowpass"].reshape(data_fullstate[j]["action_lowpass"].shape[0], -1), dtype=torch.float32,device=DEVICE)), dim=0)

## Network

In [72]:
"""
    A simple implementation of Gaussian MLP Encoder
"""
class Predictor(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Predictor, self).__init__()

        self.FC_input = nn.Linear(input_dim, hidden_dim)
        self.FC_input2 = nn.Linear(hidden_dim, hidden_dim)
        
        self.FC_mean  = nn.Linear(hidden_dim, output_dim)
        self.FC_var   = nn.Linear (hidden_dim, output_dim)
        
        self.LeakyReLU = nn.LeakyReLU(0.2)
        self.tanh = nn.Tanh()
        # self.LeakyReLU = nn.ReLU()
        self.softplus = nn.Softplus()
        
        self.training = True
        
    def reparameterization(self , mean, var):
        epsilon = torch.randn_like(var).to(DEVICE)        # sampling epsilon        
        z = mean + var*epsilon                          # reparameterization trick
        return z
    
    def forward(self, x):
        h_       = self.tanh(self.FC_input(x))
        h_       = self.tanh(self.FC_input2(h_))
        mean     = self.FC_mean(h_)         # encoder produces mean and log of variance 
        log_var  = self.FC_var(h_)          # (i.e., parateters of simple tractable normal distribution "q"
        # log_var  = self.softplus(log_var)  # clamp log_var to avoid numerical issues
        
        z = self.reparameterization(mean, torch.exp(log_var))  # reparameterization trick
        # z is sampling from the distribution z = mean + var * epsilon
        return z,mean, log_var

## Optuna setting

In [73]:
batch_size = 1000

pos_index = torch.arange(0, 19) # pos
vel_index = torch.arange(19, 38) # vel
action_index = torch.arange(38, 57) # vel
IMU_index = torch.arange(57, 60)
fc_index = torch.arange(60, 64)

pos_dataset = TensorDataset(train_x[:,pos_index], train_y)
vel_dataset = TensorDataset(train_x[:,vel_index], train_y)
action_dataset = TensorDataset(train_x[:,action_index], train_y)
IMU_dataset = TensorDataset(train_x[:,IMU_index], train_y)
fc_dataset = TensorDataset(train_x[:,fc_index], train_y)

# pos_loader = DataLoader(pos_dataset, batch_size=batch_size)
# vel_loader = DataLoader(vel_dataset, batch_size=batch_size)
# action_loader = DataLoader(action_dataset, batch_size=batch_size)
# IMU_loader = DataLoader(IMU_dataset, batch_size=batch_size)
# fc_loader = DataLoader(fc_dataset, batch_size=batch_size)

# print("TRAIN : X , Y shape : ",train_x[:,state_index].shape , train_y.shape)
# print("TEST : X , Y shape : ",test_x[:,state_index].shape , test_y.sha/pe)

# test_dataset = TensorDataset(test_x[:,state_index], test_y)
# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [74]:
hidden_dim = 256
input_dim = 4
output_dim = 19
epochs = 150

train_dataset = fc_dataset

def objective(trial):
    
    lr = trial.suggest_float("lr", 1e-6, 1e-3, log=True)
    hidden_dim = trial.suggest_categorical("hidden_dim", [128, 256 ,512, 1024])
    batch_size = trial.suggest_categorical("batch_size", [16 , 32 ,64, 128, 256, 512, 1024, 2048])    

    model = Predictor(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim).to(DEVICE)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    criterion = nn.MSELoss()
    
    optimizer = Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        total_loss = 0.0
        count = 0
        for x_batch, y_batch in train_loader:
            x_batch = x_batch.to(DEVICE)    # dim = [batch_size, state_dim]
            y_batch = y_batch.to(DEVICE)
            
            optimizer.zero_grad()
            pred, mean , log_var = model(x_batch)
            loss = criterion(pred, y_batch)
            loss.backward()
            optimizer.step()

            total_loss += loss.item() * x_batch.size(0)
            count += 1

        avg_loss = total_loss / len(train_dataset)
        trial.report(avg_loss, epoch)
        # train_losses.append(avg_loss)
    return avg_loss

In [75]:
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=100, timeout=900)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

[I 2025-08-16 14:48:03,319] A new study created in memory with name: no-name-ca66b019-0fac-4af7-a571-69cae407aeae
[I 2025-08-16 14:54:24,875] Trial 0 finished with value: 0.09381456708351771 and parameters: {'lr': 5.583649229448393e-06, 'hidden_dim': 512, 'batch_size': 64}. Best is trial 0 with value: 0.09381456708351771.
[I 2025-08-16 14:56:24,303] Trial 1 finished with value: 0.09388205257018407 and parameters: {'lr': 4.078409024963005e-06, 'hidden_dim': 1024, 'batch_size': 1024}. Best is trial 0 with value: 0.09381456708351771.
[I 2025-08-16 14:58:18,520] Trial 2 finished with value: 0.09383780233939489 and parameters: {'lr': 0.00013406276551057106, 'hidden_dim': 128, 'batch_size': 512}. Best is trial 0 with value: 0.09381456708351771.
[I 2025-08-16 15:02:11,428] Trial 3 finished with value: 0.09391831790447235 and parameters: {'lr': 0.0005518345317866675, 'hidden_dim': 1024, 'batch_size': 128}. Best is trial 0 with value: 0.09381456708351771.
[I 2025-08-16 15:23:17,074] Trial 4 fin

Study statistics: 
  Number of finished trials:  5
  Number of pruned trials:  0
  Number of complete trials:  5
Best trial:
  Value:  0.09380147417028745
  Params: 
    lr: 1.1236751914030148e-05
    hidden_dim: 128
    batch_size: 16


# Use Model