In [1]:
import sys

IN_COLAB = "google.colab" in sys.modules
if IN_COLAB:
    !git clone https://github.com/DarthReca/RL-exercises.git
    !apt install swig
    !pip install gymnasium[box2d] pytorch-lightning comet_ml

# Main

In [2]:
import gymnasium as gym
from models import DQNAgent, ReplayBuffer
from random import random
from lightning_lite.utilities.seed import seed_everything
import comet_ml as cml

In [3]:
seed_everything(45)

Global seed set to 45


45

In [4]:
train_env = gym.make("LunarLander-v2")

In [5]:
experiment = cml.Experiment(api_key="", workspace="darthreca", project_name="LunarLander")

COMET INFO: Experiment is live on comet.com https://www.comet.com/darthreca/lunarlander/7bdc3b1568c44b52979d20ba42100b44



In [14]:
gym.spaces.flatdim(train_env.observation_space)

8

In [20]:
args = {
    "state_dim": gym.spaces.flatdim(train_env.observation_space),
    "action_dim": gym.spaces.flatdim(train_env.action_space),
    "net_width": 200,
    "batch_size": 512,
    "gamma": 0.5,
    "exp_noise":  0.2,
    "env_with_dw": True,
    "DDQN": False,
    "lr": 1e-3
}
agent = DQNAgent(experiment=experiment, **args)
buffer = ReplayBuffer(args["state_dim"], max_size=int(1e6))

# Training

In [21]:
def epsilon_greedy(env: gym.Env, state, epsilon: float):
    if random() < epsilon:
        return env.action_space.sample()
    return agent.select_action(state, True)

In [22]:
STEPS_BEFORE_TRAINING = 10000
LEARNING_FREQUENCY = 500

In [23]:
experiment.log_parameters(args)
experiment.log_parameters({
    "warming_steps": STEPS_BEFORE_TRAINING,
    "learning_frequency": LEARNING_FREQUENCY
})

In [25]:
steps = 0
for episode in range(100000):
    obs, _ = train_env.reset()
    term, trunc = False, False
    episode_reward, episode_length = 0, 0
    while not (term or trunc):
        # Take action and add to buffer
        action = epsilon_greedy(train_env, obs, 1)
        next_obs, reward, term, trunc, _ = train_env.step(action)
        buffer.add(obs, action, reward, next_obs, term or trunc)
        # Learn
        if steps > STEPS_BEFORE_TRAINING and steps % LEARNING_FREQUENCY == 0:
            for i in range(LEARNING_FREQUENCY):
                agent.train(buffer, i)
        # Updated accumulated metrics
        episode_length += 1
        episode_reward += reward
        # Update
        obs = next_obs
        steps += 1
    metrics = {"episode_reward": episode_reward, "episode_length": episode_length}
    experiment.log_metrics(metrics, step=steps, epoch=episode)
experiment.end()
train_env.close()

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.com/darthreca/lunarlander/7bdc3b1568c44b52979d20ba42100b44
COMET INFO:   Metrics:
COMET INFO:     episode_length : 116
COMET INFO:     episode_reward : -54.69039032789946
COMET INFO:   Parameters:
COMET INFO:     DDQN               : False
COMET INFO:     action_dim         : 4
COMET INFO:     batch_size         : 512
COMET INFO:     env_with_dw        : True
COMET INFO:     exp_noise          : 0.2
COMET INFO:     gamma              : 0.5
COMET INFO:     learning_frequency : 500
COMET INFO:     lr                 : 0.001
COMET INFO:     net_width          : 200
COMET INFO:     state_dim          : 8
COMET INFO:     warming_steps      : 10000
COMET INFO:   Uploads:
COMET INFO:     environment details      : 1
COMET INFO:     filename                 