In [1]:
import gymnasium as gym
from stable_baselines3 import A2C, DQN, PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.logger import configure
import os
from classes import CustomFlappyBirdEnv_rew100

In [2]:
# Import and register env
gym.envs.registration.register(
    id='CustomFlappyBird-v0',
    entry_point='__main__:CustomFlappyBirdEnv_rew100',
    max_episode_steps=10000000,
)

env = make_vec_env("CustomFlappyBird-v0", n_envs=4, env_kwargs={'render_mode': 'rgb_array', 'use_lidar': False})

In [3]:
# Enable log
log_dir = "../logs/"
os.makedirs(log_dir, exist_ok=True)

In [4]:
#A2C
A2C_MLP_rew100_2Mio = os.path.join(log_dir, "A2C_MLP_rew100_2Mio")

# Configure the logger to save data to a specific folder
new_logger = configure(A2C_MLP_rew100_2Mio, ["stdout", "csv"])

# Define A2C model
A2C = A2C(
    "MlpPolicy",
    env,
    learning_rate=7e-4,
    n_steps=5,
    gamma=0.99,
    gae_lambda=1.0,
    ent_coef=0.01,
    vf_coef=0.5,
    max_grad_norm=0.5,
    use_rms_prop=True,
    verbose=1,
    device='cuda'
)

# Attach the new logger to the model
A2C.set_logger(new_logger)

# Train model
A2C.learn(total_timesteps=2000000)

# Save model
A2C.save("../models/A2C_MLP_rew100_2Mio")

Logging to ../logs/A2C_MLP_rew100_2Mio


Using cuda device
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 50       |
|    ep_rew_mean        | -91.3    |
| time/                 |          |
|    fps                | 966      |
|    iterations         | 100      |
|    time_elapsed       | 2        |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -0.308   |
|    explained_variance | -0.125   |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -4.9     |
|    value_loss         | 295      |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 50       |
|    ep_rew_mean        | -92.2    |
| time/                 |          |
|    fps                | 1056     |
|    iterations         | 200      |
|    time_elapsed       | 3        |
|    total_timesteps    | 4000     |
| train/            



In [5]:
#DQN
DQN_MLP_rew100_2Mio = os.path.join(log_dir, "DQN_MLP_rew100_2Mio")

# Configure the logger to save data to a specific folder
new_logger = configure(DQN_MLP_rew100_2Mio, ["stdout", "csv"])

# Define DQN model
DQN = DQN(
    "MlpPolicy",  # Policy type
    env,
    learning_rate=1e-4,
    buffer_size=50000,
    learning_starts=1000,
    batch_size=32,
    tau=1.0,
    gamma=0.99,
    train_freq=4,
    gradient_steps=1,
    target_update_interval=1000,
    verbose=1,
    device='cuda'
)

# Attach the new logger to the model
DQN.set_logger(new_logger)

# Train model
DQN.learn(total_timesteps=2000000)

# Save model
DQN.save("../models/DQN_MLP_rew100_2Mio")

Logging to ../logs/DQN_MLP_rew100_2Mio
Using cuda device
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 50       |
|    ep_rew_mean      | -73.5    |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 16630    |
|    time_elapsed     | 0        |
|    total_timesteps  | 200      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 50       |
|    ep_rew_mean      | -66      |
|    exploration_rate | 0.998    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 17388    |
|    time_elapsed     | 0        |
|    total_timesteps  | 400      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 50       |
|    ep_rew_mean      | -68.5    |
|    exploration_rate | 0.997    

In [6]:
#PPO
PPO_MLP_rew100_2Mio = os.path.join(log_dir, "PPO_MLP_rew100_2Mio")

# Configure the logger to save data to a specific folder
new_logger = configure(PPO_MLP_rew100_2Mio, ["stdout", "csv"])

# Define PPO model
PPO = PPO(
    "MlpPolicy", 
    env, 
    learning_rate=3e-4, 
    n_steps=256, 
    batch_size=64, 
    n_epochs=10, 
    gamma=0.99, 
    ent_coef=0.01,
    verbose=1,
    device='cuda'
)

# Attach the new logger to the model
PPO.set_logger(new_logger)

# Train model
PPO.learn(total_timesteps=2000000)

# Save model
PPO.save("../models/PPO_MLP_rew100_2Mio")

Logging to ../logs/PPO_MLP_rew100_2Mio
Using cuda device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 50       |
|    ep_rew_mean     | -75.9    |
| time/              |          |
|    fps             | 1848     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 1024     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 50          |
|    ep_rew_mean          | -72.9       |
| time/                   |             |
|    fps                  | 827         |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 2048        |
| train/                  |             |
|    approx_kl            | 0.018126532 |
|    clip_fraction        | 0.117       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.678      |
|    explained_