In [None]:
import ray
from ray import tune
import gymnasium as gym
import itertools
from agent import DQNAgent

In [None]:
BATCH_SIZE = 64
LEARNING_RATE = 0.0005

gamma = 0.999
max_num_episodes = 1000
hidden_size = [128, 64]
min_epsilon = 0.01
max_eps_episode = 150

print_every = 100

In [None]:
def train_agent(config):
    """
    """
    env = gym.make('LunarLander-v3', continuous=False, enable_wind=False)
    
    agent = DQNAgent(
        env, 
        hidden_size=config['hidden_size'], 
        batch_size=BATCH_SIZE,
        replay_buffer_size=10000,
        learning_rate=config['learning_rate'],
        gamma=config['gamma'],
        min_epsilon=min_epsilon, 
        max_eps_episode=max_eps_episode, 
        num_episodes=config.get('num_episodes', 1000),
        print_every=500
    )
    
    scores, avg_scores = agent.learn(epsilon_decay_fn=agent.exponential_epsilon_decay)
    
    final_avg_score = avg_scores[-1]
    tune.report({"final_avg_score":final_avg_score})
    
    env.close()

In [None]:
ray.init(ignore_reinit_error=True) # Ignore error if already initialized

hidden_layer = [list(x) for x in itertools.permutations([32, 64, 128], 2)]

analysis = tune.run(
    train_agent,
    config={
        'learning_rate': 0.0005,
        'hidden_size': tune.grid_search(hidden_layer),
        'gamma': 0.999,
        'num_episodes': 1000
    },
    metric='final_avg_score',
    mode='max',
    num_samples=1,
    verbose=1
)
print('Best hyperparameters found were: ', analysis.get_best_config(metric='final_avg_score', mode='max'))

DQN

Current best trial: 56185_00005 with final_avg_score=198.6452985509797 and params={'learning_rate': 0.0005, 'hidden_size': [128, 64], 'gamma': 0.999, 'num_episodes': 1000}