In [1]:
import numpy as np
from agent_DQN import DQN_agent
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
param_space = {
    'lr': (1e-5, 1e-1),
    'gamma': (0.9, 0.999),
    'batch_size' : (32, 256),
    'T' : (1, 96),
    'number_of_layers' : (64, 512),
    'episodes': (10, 1000),
    'soft_update_interval' : (5, 100)
}

In [7]:
def train_dqn(params):
    lr = params['lr']
    gamma = params['gamma']
    batch_size = params['batch_size']
    T = params['T']
    number_of_layers = params['number_of_layers']
    episodes = params['episodes']
    soft_update_interval = params['soft_update_interval']

    agent = DQN_agent(episodes = episodes, 
                      lr = lr, 
                      gamma=gamma, 
                      batch_size=batch_size, 
                      T=T,
                      number_of_layers=number_of_layers,
                      soft_update_interval=soft_update_interval)
    
    for episode in range(1, agent.episodes + 1):

        agent.gather_samples()
        total_reward = agent.optimize(episode)
    
        if episode % 10 == 0:
            print(f"Episode: {episode}, Training_loss: {agent.training_losses[-1]}, Validation_loss: {agent.validation_losses[-1]}")
    
        patience_counter = 0
        # Early stoppping
        if agent.last_validation_loss < agent.best_val_loss:
            agent.best_val_loss = agent.last_validation_loss
            patience_counter = 0
        else:
            patience_counter += 1
        if patience_counter >= agent.patience:
            print(f'Early stopping at epoch {episode + 1}')
            break
    return -total_reward

In [8]:
def objective(trial):
    # Suggest hyperparameters to try
    params = {
        'lr': trial.suggest_float('lr', param_space['lr'][0], param_space['lr'][1]),
        'gamma': trial.suggest_float('gamma', param_space['gamma'][0], param_space['gamma'][1]),
        'batch_size': trial.suggest_int('batch_size',  param_space['batch_size'][0], param_space['batch_size'][1]),
        'T': trial.suggest_int('T',  param_space['T'][0], param_space['T'][1])
    }
    return train_dqn(params)

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

[I 2023-12-11 21:37:27,079] A new study created in memory with name: no-name-26b97cd7-8bc1-4632-a8ed-4c18cdda8b57
  next_state = torch.FloatTensor(batch.next_state).to(device)


Early stopping at epoch 45


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Early stopping at epoch 64
Early stopping at epoch 106
Early stopping at epoch 44
Early stopping at epoch 34
Early stopping at epoch 83
Early stopping at epoch 41
Early stopping at epoch 75
Early stopping at epoch 46
Early stopping at epoch 32
Early stopping at epoch 75
Early stopping at epoch 58


In [None]:
print('Best hyperparameters:', study.best_params)
print('Best value:', -study.best_value)