In [None]:
import sys

IN_COLAB = "google.colab" in sys.modules
if IN_COLAB:
    !git clone https://github.com/DarthReca/RL-exercises.git
    !pip install gymnasium pytorch-lightning comet_ml

# Main

In [1]:
import gymnasium as gym
from models import DQNAgent, ReplayBuffer
from random import random
from lightning_lite.utilities.seed import seed_everything
import comet_ml as cml

In [2]:
seed_everything(45)

Global seed set to 45


45

In [3]:
train_env = gym.make("LunarLander-v2")

In [None]:
args = {
    "state_dim": train_env.observation_space.shape[0],
    "action_dim": train_env.action_space.shape[0],
    "net_width": 200,
    "batch_size": 512,
    "gamma": 0.5,
    "exp_noise":  0.2,
    "env_with_dw": True,
    "DDQN": False
}
agent = DQNAgent(**args)
buffer = ReplayBuffer(args["state_dim"], max_size=1e6)

# Training

In [None]:
def epsilon_greedy(env: gym.Env, state, epsilon: float):
    if random() < epsilon:
        return env.action_space.sample()
    return agent.select_action(state, True)

In [None]:
STEPS_BEFORE_TRAINING = 10000
LEARNING_FREQUENCY = 500

In [None]:
experiment = cml.Experiment(api_key="", workspace="darthreca", project_name="LunarLander")

In [None]:
experiment.log_parameters(args)
experiment.log_parameters({
    "warming_steps": STEPS_BEFORE_TRAINING,
    "learning_frequency": LEARNING_FREQUENCY
})

In [None]:
steps = 0
for episode in range(10000):
    obs, _ = train_env.reset()
    term, trunc = False, False
    episode_reward, episode_length = 0, 0
    while not (term or trunc):
        # Take action and add to buffer
        action = epsilon_greedy(train_env, obs, 1)
        next_obs, reward, term, trunc, _ = train_env.step(action)
        buffer.add(obs, action, reward, next_obs, term or trunc)
        # Learn
        if steps > STEPS_BEFORE_TRAINING and steps % LEARNING_FREQUENCY == 0:
            for i in range(LEARNING_FREQUENCY):
                agent.train(buffer, i)
        # Updated accumulated metrics
        episode_length += 1
        episode_reward += reward
        # Update
        obs = next_obs
        steps += 1
    metrics = {"episode_reward": episode_reward, "episode_length": episode_length}
    experiment.log_metrics(metrics, steps=steps, epoch=episode)
experiment.end()
train_env.close()