In [1]:
import os
import optuna

import numpy as np

from algorithms import DoubleDQN
from environment import CreditPayerEnv
from pipeline import MetricsStudy

os.makedirs("figures", exist_ok=True)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def eval_trajectory(env: CreditPayerEnv, agent: DoubleDQN, trajectory_max_len: int):
    total_reward = 0
    state = env.reset()
    for i in range(trajectory_max_len):
        action = agent.get_action(state)

        state, reward, done, _ = env.step(action)

        total_reward += reward

        if done:
            break

    return total_reward

In [3]:
def eval_model(
    env: CreditPayerEnv, agent: DoubleDQN, trajectory_max_len: int, repeat_count: int
):
    rewards = []
    for i in range(repeat_count):
        reward = eval_trajectory(env, agent, trajectory_max_len)
        rewards.append(reward)

    return np.mean(rewards)

In [4]:
def double_objective(trial: optuna.Trial):
    env = CreditPayerEnv()
    state_dim = env.state_dim
    action_n = env.action_n

    episode_n = 50
    trajectory_max_len = 100

    epsilon_decrease = 1 / episode_n / trajectory_max_len

    gamma = trial.suggest_float("gamma", 0.9, 1)
    tau = trial.suggest_float("tau", 0.001, 0.15)

    batch_size = trial.suggest_int('batch_size', 60, 70)

    agent = DoubleDQN(
        state_dim,
        action_n,
        gamma=gamma,
        lr=0.001,
        tau=tau,
        batch_size=batch_size,
        epsilon_decrease=epsilon_decrease,
        epsilon_min=0,
    )
    study = MetricsStudy(env, agent, trajectory_max_len)

    study.study_agent(episode_n=50)

    agent.epsilon_min = 0
    agent.epsilon = 0

    return eval_model(env, agent, trajectory_max_len, repeat_count=10)

In [5]:
double_study = optuna.create_study(direction="maximize")
double_study.optimize(double_objective, n_trials=50)

trial = double_study.best_trial

print("Reward: {}".format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[I 2024-01-02 23:34:22,387] A new study created in memory with name: no-name-bc295e8d-ab62-4126-9031-1ce69854c563
[I 2024-01-02 23:34:48,325] Trial 0 finished with value: -76.3 and parameters: {'gamma': 0.9059209041721685, 'tau': 0.10400314147384127, 'batch_size': 68}. Best is trial 0 with value: -76.3.
[I 2024-01-02 23:35:14,579] Trial 1 finished with value: -83.4 and parameters: {'gamma': 0.9150319407006361, 'tau': 0.0775886000875049, 'batch_size': 68}. Best is trial 0 with value: -76.3.
[I 2024-01-02 23:35:41,583] Trial 2 finished with value: -45.8 and parameters: {'gamma': 0.9819669937924143, 'tau': 0.03913198792567727, 'batch_size': 68}. Best is trial 2 with value: -45.8.
[I 2024-01-02 23:36:09,312] Trial 3 finished with value: -47.9 and parameters: {'gamma': 0.961996719267122, 'tau': 0.04782562623614529, 'batch_size': 69}. Best is trial 2 with value: -45.8.
[I 2024-01-02 23:36:36,436] Trial 4 finished with value: -56.4 and parameters: {'gamma': 0.9098598367591694, 'tau': 0.066177

In [None]:
fig = optuna.visualization.plot_optimization_history(double_study)
fig.write_html('figures/optimization_history.html')
# fig.write_image('optimization_history.png')
fig

In [None]:
fig = optuna.visualization.plot_slice(double_study)
fig.write_html('figures/slice.html')
fig