In [1]:
from flappy_bird_gymnasium.envs.flappy_bird_env import FlappyBirdEnv
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.logger import configure
import os
from classes import CustomFlappyBirdEnv_gap200, CustomFlappyBirdEnv_gap150, CustomFlappyBirdEnv_gap125, CustomFlappyBirdEnv_std

In [2]:
# Log-Directory und Dateiname für den Callback festlegen
log_dir = "../logs/"
os.makedirs(log_dir, exist_ok=True)
custom_log_file = os.path.join(log_dir, "Curriculum_Learning")

# Configure the logger to save data to a specific folder
new_logger = configure(custom_log_file, ["stdout", "csv"])

Logging to ../logs/Curriculum_Learning


In [3]:
gym.envs.registration.register(
    id='CustomFlappyBird-v0',
    entry_point='__main__:CustomFlappyBirdEnv_gap200',
    max_episode_steps=10000000,
)

# Environment erstellen und mit VecMonitor wrappen
env = make_vec_env("CustomFlappyBird-v0", n_envs=4, env_kwargs={'render_mode': 'rgb_array', 'use_lidar': False})


In [4]:
# PPO Modell definieren
model = PPO(
    "MlpPolicy", 
    env, 
    learning_rate=3e-4, 
    n_steps=256, 
    batch_size=64, 
    n_epochs=10, 
    gamma=0.99, 
    ent_coef=0.01,
    verbose=1,
    device='cuda'
)

# Attach the new logger to the model
model.set_logger(new_logger)

# Modell trainieren mit Callback
model.learn(total_timesteps=500000)

# Modell speichern
model.save("../models/PPO_MLP_gap200_500k")

Using cuda device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 50       |
|    ep_rew_mean     | -7.62    |
| time/              |          |
|    fps             | 1258     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 1024     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 50          |
|    ep_rew_mean          | -7.12       |
| time/                   |             |
|    fps                  | 700         |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 2048        |
| train/                  |             |
|    approx_kl            | 0.007172595 |
|    clip_fraction        | 0.0119      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.687      |
|    explained_variance   | 0.0274      |
|    learnin

In [5]:
gym.envs.registration.register(
    id='CustomFlappyBird-v1',
    entry_point='__main__:CustomFlappyBirdEnv_gap150',
    max_episode_steps=10000000,
)

# Environment erstellen und mit VecMonitor wrappen
env = make_vec_env("CustomFlappyBird-v1", n_envs=4, env_kwargs={'render_mode': 'rgb_array', 'use_lidar': False})

In [6]:
# Laden des gespeicherten Modells
model = PPO.load("../models/PPO_MLP_gap200_500k")

# Setzen der neuen Umgebung im Modell
model.set_env(env)

# Fortsetzung des Trainings bei 500000 Timestamps und weiter in das gleiche Log-File schreiben
model.set_logger(new_logger)
model.learn(total_timesteps=500000, reset_num_timesteps=False)

# Modell speichern
model.save("../models/PPO_MLP_gap150_500k")

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.34e+03    |
|    ep_rew_mean          | 287         |
| time/                   |             |
|    fps                  | 1356        |
|    iterations           | 1           |
|    time_elapsed         | 0           |
|    total_timesteps      | 501760      |
| train/                  |             |
|    approx_kl            | 0.002674543 |
|    clip_fraction        | 0.0336      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.141      |
|    explained_variance   | 0.926       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00532    |
|    n_updates            | 4890        |
|    policy_gradient_loss | -0.000846   |
|    value_loss           | 0.00509     |
-----------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 2.

In [7]:
gym.envs.registration.register(
    id='CustomFlappyBird-v2',
    entry_point='__main__:CustomFlappyBirdEnv_gap125',
    max_episode_steps=10000000,
)

# Environment erstellen und mit VecMonitor wrappen
env = make_vec_env("CustomFlappyBird-v2", n_envs=4, env_kwargs={'render_mode': 'rgb_array', 'use_lidar': False})

In [8]:
# Laden des gespeicherten Modells
model = PPO.load("../models/PPO_MLP_gap150_500k")

# Setzen der neuen Umgebung im Modell
model.set_env(env)

# Fortsetzung des Trainings bei 500000 Timestamps und weiter in das gleiche Log-File schreiben
model.set_logger(new_logger)
model.learn(total_timesteps=500000, reset_num_timesteps=False)

# Modell speichern
model.save("../models/PPO_MLP_gap125_500k")

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.05e+03     |
|    ep_rew_mean          | 128          |
| time/                   |              |
|    fps                  | 1362         |
|    iterations           | 1            |
|    time_elapsed         | 0            |
|    total_timesteps      | 1002496      |
| train/                  |              |
|    approx_kl            | 0.0010707278 |
|    clip_fraction        | 0.0236       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.117       |
|    explained_variance   | -0.144       |
|    learning_rate        | 0.0003       |
|    loss                 | 0.0255       |
|    n_updates            | 9780         |
|    policy_gradient_loss | 2.4e-05      |
|    value_loss           | 0.108        |
------------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_l

In [9]:
gym.envs.registration.register(
    id='CustomFlappyBird-v3',
    entry_point='__main__:CustomFlappyBirdEnv_std',
    max_episode_steps=10000000,
)

# Environment erstellen und mit VecMonitor wrappen
env = make_vec_env("CustomFlappyBird-v3", n_envs=4, env_kwargs={'render_mode': 'rgb_array', 'use_lidar': False})

In [10]:
# Laden des gespeicherten Modells
model = PPO.load("../models/PPO_MLP_gap125_500k")

# Setzen der neuen Umgebung im Modell
model.set_env(env)

# Fortsetzung des Trainings bei 500000 Timestamps und weiter in das gleiche Log-File schreiben
model.set_logger(new_logger)
model.learn(total_timesteps=500000, reset_num_timesteps=False)

# Modell speichern
model.save("../models/PPO_MLP_gap125_500k")

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 515          |
|    ep_rew_mean          | 61.4         |
| time/                   |              |
|    fps                  | 1809         |
|    iterations           | 1            |
|    time_elapsed         | 0            |
|    total_timesteps      | 1503232      |
| train/                  |              |
|    approx_kl            | 0.0037148376 |
|    clip_fraction        | 0.0276       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.101       |
|    explained_variance   | 0.0389       |
|    learning_rate        | 0.0003       |
|    loss                 | 0.45         |
|    n_updates            | 14670        |
|    policy_gradient_loss | -0.000381    |
|    value_loss           | 0.83         |
------------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean 