# Cart-pole Hyperparameter Optimization

<br>
<br>

<span style="font-family: monospace; font-size:18px;">
In this jupyter notebook, we explore and optimize various reinforcement learning algorithms for the classic cart-pole balancing task. We leverage the power of the Optuna library to fine-tune the hyperparameters of different models, including Semi-gradient SARSA, Deep Q-Network (DQN), and both discrete and continuous variants of the Actor-Critic method.
</span>


## Import libraries

In [None]:
import torch
import torch.optim as optim
import optuna
from optuna_utils import ObjectiveFunction
from config import ACTIONS, THETA_DOT_BOUNDS, X_DOT_BOUNDS
from cart_pole_class import CartPole
from tile_coder import TileCoder
from replay_memory_class import ReplayMemory
from semi_gradient_sarsa_agent import SemiGradientSarsaAgent
from ann_model_classes import FCN, SoftmaxFCN, GaussianFCN
from model_handler_classes import ModelHandler, ActorModelHandler
from dqn_agent_class import DQNAgent
from actor_critic_agent_class import ActorCriticAgent
from model_utils import train, EarlyStopCallback
from epsilon_decay_class import EpsilonDecay
import warnings
warnings.filterwarnings("ignore")

## Semi-gradient SARSA 

#### Create Model

In [None]:
def create_sarsa_model(trial):
    
    env = CartPole()
    
    alpha = trial.suggest_float('alpha', 0.01, 0.5)
    num_tilings = trial.suggest_int('num_tilings', 1, 20)
    tiles_per_dimension = trial.suggest_categorical('tiles_per_dimension', [(i, i, i, i) for i in range(4, 33, 2)])
    epsilon_end = trial.suggest_float('epsilon_end', 0.001, 0.1, log=True)
    last_episode = trial.suggest_categorical('last_episode', [5*n for n in range(2, 20)])
    
    tile_coder = TileCoder(num_tilings=num_tilings, tiles_per_dimension=tiles_per_dimension)
    epsilon = EpsilonDecay(epsilon_start=1.0, epsilon_end=epsilon_end, last_episode=last_episode, decay_method='linear')
    
    sarsa_agent = SemiGradientSarsaAgent(env, ACTIONS, tile_coder, epsilon, alpha=alpha, gamma=0.99)
    
    return sarsa_agent


#### Objective Function

In [None]:
early_stop_callback = EarlyStopCallback(threshold=200)

objective = ObjectiveFunction(create_sarsa_model,
                              callbacks=early_stop_callback,
                              iterations=10,
                              train_episodes=1000,
                              evaluation_episodes=10,
                              evaluation_interval=1
                             )

#### Optimize

In [None]:
study = optuna.create_study(study_name="sarsa",
                            direction='minimize',
                            storage="sqlite:///optuna_database.db",
                            load_if_exists=True
)

study.optimize(objective, n_trials=100)

print('Best hyperparameters: ', study.best_params, " Best value: ", study.best_value)

## Optimize DQN Model Parameters

#### Create Model

In [None]:
def create_dqn_model(trial):
    
    env = CartPole()
    
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128, 256, 512, 1024])
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    layers = trial.suggest_categorical('layers', [2, 3])    
    n_hidden_units = trial.suggest_categorical('n_hidden_units', [2**n for n in range(2, 11)])
    leaky_relu_alpha = trial.suggest_loguniform('leaky_relu_alpha', 1e-3, 0.5)
    epsilon_end = trial.suggest_float('epsilon_end', 0.001, 0.1, log=True)
    last_episode = trial.suggest_categorical('last_episode', [5*n for n in range(2, 50)])
    
    model = FCN(output_dim=2, hidden_units=layers*[n_hidden_units], activation=torch.nn.LeakyReLU(negative_slope=leaky_relu_alpha))
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    model_handler = ModelHandler(model, optimizer)
    replay_memory = ReplayMemory(memory_size=100000)
    epsilon = EpsilonDecay(epsilon_start=1.0, epsilon_end=epsilon_end, last_episode=last_episode, decay_method='linear')
    
    dqn_agent = DQNAgent(env, ACTIONS, model_handler, replay_memory, epsilon, batch_size=batch_size, gamma=0.99)
    
    return dqn_agent


#### Objective Function

In [None]:
early_stop_callback = EarlyStopCallback(threshold=200)

objective = ObjectiveFunction(create_dqn_model,
                              callbacks=early_stop_callback,
                              iterations=10,
                              train_episodes=1000,
                              evaluation_episodes=10,
                              evaluation_interval=1
                             )

#### Optimize

In [None]:
study = optuna.create_study(study_name="dqn",
                            direction='minimize',
                            storage="sqlite:///optuna_database.db",
                            load_if_exists=True
)

study.optimize(objective, n_trials=100)

print('Best hyperparameters: ', study.best_params, " Best value: ", study.best_value)

## Optimize Discrete Actor Critic Model Parameters

#### Create Model

In [None]:
def create_discrete_actor_critic_model(trial):
    
    env = CartPole()
    
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256, 512, 1024])

    critic_learning_rate = trial.suggest_loguniform('critic_learning_rate', 1e-5, 1e-1)
    actor_learning_rate = trial.suggest_loguniform('actor_learning_rate', 1e-7, 1e-1)
    
    critic_layers = trial.suggest_categorical('critic_layers', [2, 3])
    actor_layers = trial.suggest_categorical('actor_layers', [2, 3])
    
    n_hidden_units_critic = trial.suggest_categorical('n_hidden_units_critic', [2**n for n in range(4, 11)])
    n_hidden_units_actor = trial.suggest_categorical('n_hidden_units_actor', [2**n for n in range(5, 11)])
    
    critic_leaky_relu_alpha = trial.suggest_loguniform('critic_leaky_relu_alpha', 1e-3, 0.5)
    actor_leaky_relu_alpha = trial.suggest_loguniform('actor_leaky_relu_alpha', 1e-3, 0.5)
    
    replay_memory = ReplayMemory(memory_size=100000)
    
    critic_ann_model = FCN(output_dim=1, hidden_units=critic_layers*[n_hidden_units_critic], activation=torch.nn.LeakyReLU(negative_slope=critic_leaky_relu_alpha))
    optimizer = optim.Adam(critic_ann_model.parameters(), lr=critic_learning_rate)
    critic_model_handler = ModelHandler(critic_ann_model, optimizer)
    
    actor_ann_model = SoftmaxFCN(output_dim=2, hidden_units=actor_layers*[n_hidden_units_actor], activation=torch.nn.LeakyReLU(negative_slope=actor_leaky_relu_alpha))
    optimizer = optim.Adam(actor_ann_model.parameters(), lr=actor_learning_rate)
    actor_model_handler = ActorModelHandler(actor_ann_model, optimizer)
    
    actor_critic_agent = ActorCriticAgent(env, critic_model_handler, actor_model_handler, replay_memory,
                                          actions=ACTIONS, batch_size=batch_size, gamma=0.99
    )
    
    return actor_critic_agent

#### Objective Function

In [None]:
early_stop_callback = EarlyStopCallback(threshold=200)

objective = ObjectiveFunction(create_discrete_actor_critic_model,
                              callbacks=early_stop_callback,
                              iterations=10,
                              train_episodes=1000,
                              evaluation_episodes=10,
                              evaluation_interval=1
                             )

#### Optimize

In [None]:
study = optuna.create_study(study_name="actor_critic",
                            direction='minimize',
                            storage="sqlite:///optuna_database.db",
                            load_if_exists=True
)

study.optimize(objective, n_trials=100)

print('Best hyperparameters: ', study.best_params, " Best value: ", study.best_value)

## Optimize Continuous Actor Critic Model Parameters

#### Create Model

In [None]:
def create_continuous_actor_critic_model(trial):
    
    env = CartPole()
    
    batch_size = trial.suggest_categorical('batch_size', [32, 64, 128, 256, 512, 1024])

    critic_learning_rate = trial.suggest_loguniform('critic_learning_rate', 1e-5, 1e-1)
    actor_learning_rate = trial.suggest_loguniform('actor_learning_rate', 1e-7, 1e-1)
    
    critic_layers = trial.suggest_categorical('critic_layers', [2, 3])
    actor_layers = trial.suggest_categorical('actor_layers', [2, 3])
    
    n_hidden_units_critic = trial.suggest_categorical('n_hidden_units_critic', [2**n for n in range(4, 11)])
    n_hidden_units_actor = trial.suggest_categorical('n_hidden_units_actor', [2**n for n in range(4, 11)])
    
    critic_leaky_relu_alpha = trial.suggest_loguniform('critic_leaky_relu_alpha', 1e-3, 0.5)
    actor_leaky_relu_alpha = trial.suggest_loguniform('actor_leaky_relu_alpha', 1e-3, 0.5)
    
    replay_memory = ReplayMemory(memory_size=100000)
    
    critic_ann_model = FCN(output_dim=1,
                           hidden_units=critic_layers*[n_hidden_units_critic],
                           activation=torch.nn.LeakyReLU(negative_slope=critic_leaky_relu_alpha))
    
    optimizer = optim.Adam(critic_ann_model.parameters(), lr=critic_learning_rate)
    
    critic_model_handler = ModelHandler(critic_ann_model, optimizer)
    
    actor_ann_model = GaussianFCN(output_dim=1,
                                  hidden_units=actor_layers*[n_hidden_units_actor],
                                  activation=torch.nn.LeakyReLU(negative_slope=actor_leaky_relu_alpha))
    
    optimizer = optim.Adam(actor_ann_model.parameters(), lr=actor_learning_rate)
    
    actor_model_handler = ActorModelHandler(actor_ann_model, optimizer)
    
    actor_critic_agent = ActorCriticAgent(env, critic_model_handler, actor_model_handler, replay_memory, batch_size=batch_size, gamma=0.99)
    
    return actor_critic_agent

#### Objective Function

In [None]:
early_stop_callback = EarlyStopCallback(threshold=200)

objective = ObjectiveFunction(create_continuous_actor_critic_model,
                              callbacks=early_stop_callback,
                              iterations=10,
                              train_episodes=1000,
                              evaluation_episodes=10,
                              evaluation_interval=1
                             )

#### Optimize

In [None]:
study = optuna.create_study(study_name="actor_critic_continuous",
                            direction='minimize',
                            storage="sqlite:///optuna_database.db",
                            load_if_exists=True
)

study.optimize(objective, n_trials=100)

print('Best hyperparameters: ', study.best_params, " Best value: ", study.best_value)