In [3]:
import ray
from ray import tune
import gymnasium as gym
from agent import Agent
import itertools

In [None]:
def train_agent(config):
    """
    """
    env = gym.make('LunarLander-v3', continuous=False, enable_wind=False)
    
    agent = Agent(
        env,
        hidden_size=config['hidden_size'],
        learning_rate=config['learning_rate'],
        gamma=config['gamma'],
        num_episodes=config.get('num_episodes', 1000),
        print_every=100
    )
    
    scores, avg_scores = agent.learn(ignore_threshold=True)
    
    final_avg_score = avg_scores[-1]
    tune.report({"final_avg_score":final_avg_score})
    
    env.close()

In [None]:
ray.init(ignore_reinit_error=True) # Ignore error if already initialized

hidden_layer = [list(x) for x in itertools.permutations([32, 64, 128], 2)]

analysis = tune.run(
    train_agent,
    config={
        'learning_rate': tune.grid_search([0.001, 0.003]),
        'hidden_size': tune.grid_search(hidden_layer),
        'gamma': tune.grid_search([0.999, 0.99]),
        'num_episodes': 1000
    },
    metric='final_avg_score',
    mode='max',
    num_samples=1,
    verbose=1
)
print('Best hyperparameters found were: ', analysis.get_best_config(metric='final_avg_score', mode='max'))