# Environment Setup

In [1]:
#!pip install gym gym-retro, opencv-python

In [2]:
# PPO algorythm
from stable_baselines3 import PPO
# Monitor for logging 
from stable_baselines3.common.monitor import Monitor
# Vec wrappers to vectorize and frame stack
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
# To deal with filepaths
import os

In [3]:
# Running Environment_Setup.ipynb to download ROM and setup custom Sonic environment.
%run Environment_Setup.ipynb

Importing SonicTheHedgehog2-Genesis
Imported 1 games


In [4]:
LOG_DIR = './logs/'

In [5]:
# Create environment 
env = Sonic()
env.level(1)
env = ActionDiscretizer(env)
env = Monitor(env, LOG_DIR)
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, 4, channels_order='last')

# Setup Callback To Monitor Training Process

In [6]:
#!pip install stable-baselines3[extra]

In [7]:
# Import base callback 
from stable_baselines3.common.callbacks import BaseCallback

In [8]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [9]:
CHECKPOINT_DIR = './train/'

In [10]:
callback = TrainAndLoggingCallback(check_freq=100_000, save_path=CHECKPOINT_DIR)

# Continuing Training Best Model

In [11]:
# Best model
model = PPO.load("train/old_gen/PPO_Sonic_lvl1", env, verbose=1, tensorboard_log=LOG_DIR)

# Displaying current learning rate 
print(model.learning_rate)
print(model.n_steps)

Wrapping the env in a VecTransposeImage.
1.4336960770105995e-06
8192


In [30]:
# Lowering learning rate if needed
model.learning_rate /= 1.2
print(model.learning_rate)

4.580372217810106e-05


In [None]:
# Training
model.learn(total_timesteps=4_000_000, callback=callback)

Logging to ./logs/PPO_3
-----------------------------
| time/              |      |
|    fps             | 236  |
|    iterations      | 1    |
|    time_elapsed    | 34   |
|    total_timesteps | 8192 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 187         |
|    iterations           | 2           |
|    time_elapsed         | 87          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.003872884 |
|    clip_fraction        | 0.0105      |
|    clip_range           | 0.236       |
|    entropy_loss         | -0.651      |
|    explained_variance   | 0.355       |
|    learning_rate        | 1.43e-06    |
|    loss                 | 12.6        |
|    n_updates            | 17330       |
|    policy_gradient_loss | -0.000556   |
|    value_loss           | 110         |
-----------------------------------------
----------

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.22e+04    |
|    ep_rew_mean          | 491         |
| time/                   |             |
|    fps                  | 156         |
|    iterations           | 12          |
|    time_elapsed         | 626         |
|    total_timesteps      | 98304       |
| train/                  |             |
|    approx_kl            | 0.004080835 |
|    clip_fraction        | 0.0127      |
|    clip_range           | 0.236       |
|    entropy_loss         | -0.667      |
|    explained_variance   | 0.415       |
|    learning_rate        | 1.43e-06    |
|    loss                 | 5           |
|    n_updates            | 17430       |
|    policy_gradient_loss | -7.15e-05   |
|    value_loss           | 94.4        |
-----------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 4.

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5.45e+04     |
|    ep_rew_mean          | 620          |
| time/                   |              |
|    fps                  | 156          |
|    iterations           | 22           |
|    time_elapsed         | 1150         |
|    total_timesteps      | 180224       |
| train/                  |              |
|    approx_kl            | 0.0035401788 |
|    clip_fraction        | 0.0112       |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.344       |
|    explained_variance   | 0.506        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 66.9         |
|    n_updates            | 17530        |
|    policy_gradient_loss | -0.000361    |
|    value_loss           | 151          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5.45e+04     |
|    ep_rew_mean          | 620          |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 32           |
|    time_elapsed         | 1653         |
|    total_timesteps      | 262144       |
| train/                  |              |
|    approx_kl            | 0.0029098936 |
|    clip_fraction        | 0.0106       |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.694       |
|    explained_variance   | -6.83        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.0125       |
|    n_updates            | 17630        |
|    policy_gradient_loss | -0.000723    |
|    value_loss           | 0.109        |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 6.58e+04     |
|    ep_rew_mean          | 662          |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 42           |
|    time_elapsed         | 2163         |
|    total_timesteps      | 344064       |
| train/                  |              |
|    approx_kl            | 0.0022859408 |
|    clip_fraction        | 0.0154       |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.297       |
|    explained_variance   | 0.0914       |
|    learning_rate        | 1.43e-06     |
|    loss                 | 15.1         |
|    n_updates            | 17730        |
|    policy_gradient_loss | -0.000534    |
|    value_loss           | 68.9         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7e+04        |
|    ep_rew_mean          | 721          |
| time/                   |              |
|    fps                  | 159          |
|    iterations           | 52           |
|    time_elapsed         | 2666         |
|    total_timesteps      | 425984       |
| train/                  |              |
|    approx_kl            | 0.0023784314 |
|    clip_fraction        | 0.0139       |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.415       |
|    explained_variance   | 0.148        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 15.6         |
|    n_updates            | 17830        |
|    policy_gradient_loss | -0.000273    |
|    value_loss           | 121          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 6.92e+04     |
|    ep_rew_mean          | 626          |
| time/                   |              |
|    fps                  | 158          |
|    iterations           | 62           |
|    time_elapsed         | 3198         |
|    total_timesteps      | 507904       |
| train/                  |              |
|    approx_kl            | 0.0026768816 |
|    clip_fraction        | 0.0165       |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.469       |
|    explained_variance   | 0.182        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 9.71         |
|    n_updates            | 17930        |
|    policy_gradient_loss | -0.000765    |
|    value_loss           | 82.6         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 6.99e+04     |
|    ep_rew_mean          | 573          |
| time/                   |              |
|    fps                  | 152          |
|    iterations           | 72           |
|    time_elapsed         | 3864         |
|    total_timesteps      | 589824       |
| train/                  |              |
|    approx_kl            | 0.0013909326 |
|    clip_fraction        | 0.00721      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.297       |
|    explained_variance   | -8.54        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.0104       |
|    n_updates            | 18030        |
|    policy_gradient_loss | -0.000553    |
|    value_loss           | 0.0275       |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.3e+04      |
|    ep_rew_mean          | 486          |
| time/                   |              |
|    fps                  | 148          |
|    iterations           | 82           |
|    time_elapsed         | 4531         |
|    total_timesteps      | 671744       |
| train/                  |              |
|    approx_kl            | 0.0029982028 |
|    clip_fraction        | 0.0302       |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.352       |
|    explained_variance   | 0.178        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 461          |
|    n_updates            | 18130        |
|    policy_gradient_loss | -0.000275    |
|    value_loss           | 135          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.3e+04      |
|    ep_rew_mean          | 486          |
| time/                   |              |
|    fps                  | 144          |
|    iterations           | 92           |
|    time_elapsed         | 5206         |
|    total_timesteps      | 753664       |
| train/                  |              |
|    approx_kl            | 0.0013793582 |
|    clip_fraction        | 0.0129       |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.227       |
|    explained_variance   | -3.93        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.0039       |
|    n_updates            | 18230        |
|    policy_gradient_loss | -0.0008      |
|    value_loss           | 0.0605       |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.53e+04      |
|    ep_rew_mean          | 361           |
| time/                   |               |
|    fps                  | 142           |
|    iterations           | 102           |
|    time_elapsed         | 5865          |
|    total_timesteps      | 835584        |
| train/                  |               |
|    approx_kl            | 0.00066814944 |
|    clip_fraction        | 0.00546       |
|    clip_range           | 0.236         |
|    entropy_loss         | -0.225        |
|    explained_variance   | 0.222         |
|    learning_rate        | 1.43e-06      |
|    loss                 | 9.33          |
|    n_updates            | 18330         |
|    policy_gradient_loss | -0.00163      |
|    value_loss           | 5.66          |
-------------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.53e+04     |
|    ep_rew_mean          | 361          |
| time/                   |              |
|    fps                  | 140          |
|    iterations           | 112          |
|    time_elapsed         | 6534         |
|    total_timesteps      | 917504       |
| train/                  |              |
|    approx_kl            | 0.0014476478 |
|    clip_fraction        | 0.00775      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.2         |
|    explained_variance   | 0.214        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.566        |
|    n_updates            | 18430        |
|    policy_gradient_loss | -0.000134    |
|    value_loss           | 11.5         |
------------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_l

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.81e+04    |
|    ep_rew_mean          | 297         |
| time/                   |             |
|    fps                  | 139         |
|    iterations           | 122         |
|    time_elapsed         | 7158        |
|    total_timesteps      | 999424      |
| train/                  |             |
|    approx_kl            | 0.000715949 |
|    clip_fraction        | 0.0082      |
|    clip_range           | 0.236       |
|    entropy_loss         | -0.125      |
|    explained_variance   | -5.03       |
|    learning_rate        | 1.43e-06    |
|    loss                 | 0.00832     |
|    n_updates            | 18530       |
|    policy_gradient_loss | -0.000575   |
|    value_loss           | 0.0279      |
-----------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 7.

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 8.05e+04      |
|    ep_rew_mean          | 237           |
| time/                   |               |
|    fps                  | 137           |
|    iterations           | 132           |
|    time_elapsed         | 7852          |
|    total_timesteps      | 1081344       |
| train/                  |               |
|    approx_kl            | 0.00093758164 |
|    clip_fraction        | 0.0107        |
|    clip_range           | 0.236         |
|    entropy_loss         | -0.121        |
|    explained_variance   | -0.0575       |
|    learning_rate        | 1.43e-06      |
|    loss                 | -0.00385      |
|    n_updates            | 18630         |
|    policy_gradient_loss | -0.00104      |
|    value_loss           | 0.249         |
-------------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.25e+04     |
|    ep_rew_mean          | 202          |
| time/                   |              |
|    fps                  | 136          |
|    iterations           | 142          |
|    time_elapsed         | 8528         |
|    total_timesteps      | 1163264      |
| train/                  |              |
|    approx_kl            | 0.0007641602 |
|    clip_fraction        | 0.00524      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.181       |
|    explained_variance   | -3.56        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.0102       |
|    n_updates            | 18730        |
|    policy_gradient_loss | -0.000264    |
|    value_loss           | 0.0319       |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.25e+04     |
|    ep_rew_mean          | 202          |
| time/                   |              |
|    fps                  | 137          |
|    iterations           | 152          |
|    time_elapsed         | 9073         |
|    total_timesteps      | 1245184      |
| train/                  |              |
|    approx_kl            | 0.0043667136 |
|    clip_fraction        | 0.00839      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.156       |
|    explained_variance   | -1.39        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.00574      |
|    n_updates            | 18830        |
|    policy_gradient_loss | 2.56e-05     |
|    value_loss           | 0.0464       |
------------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_l

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.24e+04     |
|    ep_rew_mean          | 168          |
| time/                   |              |
|    fps                  | 139          |
|    iterations           | 162          |
|    time_elapsed         | 9541         |
|    total_timesteps      | 1327104      |
| train/                  |              |
|    approx_kl            | 0.0022480555 |
|    clip_fraction        | 0.0185       |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.333       |
|    explained_variance   | -0.0516      |
|    learning_rate        | 1.43e-06     |
|    loss                 | 38.7         |
|    n_updates            | 18930        |
|    policy_gradient_loss | -0.000736    |
|    value_loss           | 128          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.24e+04     |
|    ep_rew_mean          | 168          |
| time/                   |              |
|    fps                  | 140          |
|    iterations           | 172          |
|    time_elapsed         | 10021        |
|    total_timesteps      | 1409024      |
| train/                  |              |
|    approx_kl            | 0.0008060643 |
|    clip_fraction        | 0.00929      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.155       |
|    explained_variance   | -2.78        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.0077       |
|    n_updates            | 19030        |
|    policy_gradient_loss | -0.000594    |
|    value_loss           | 0.0257       |
------------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_l

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.39e+04    |
|    ep_rew_mean          | 107         |
| time/                   |             |
|    fps                  | 141         |
|    iterations           | 181         |
|    time_elapsed         | 10453       |
|    total_timesteps      | 1482752     |
| train/                  |             |
|    approx_kl            | 0.006612323 |
|    clip_fraction        | 0.00693     |
|    clip_range           | 0.236       |
|    entropy_loss         | -0.148      |
|    explained_variance   | -6.58       |
|    learning_rate        | 1.43e-06    |
|    loss                 | 0.0118      |
|    n_updates            | 19120       |
|    policy_gradient_loss | 0.000376    |
|    value_loss           | 0.0251      |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.39

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.33e+04    |
|    ep_rew_mean          | 76          |
| time/                   |             |
|    fps                  | 143         |
|    iterations           | 191         |
|    time_elapsed         | 10905       |
|    total_timesteps      | 1564672     |
| train/                  |             |
|    approx_kl            | 0.004387345 |
|    clip_fraction        | 0.0125      |
|    clip_range           | 0.236       |
|    entropy_loss         | -0.254      |
|    explained_variance   | -7.65       |
|    learning_rate        | 1.43e-06    |
|    loss                 | 0.00162     |
|    n_updates            | 19220       |
|    policy_gradient_loss | 6.2e-05     |
|    value_loss           | 0.0174      |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.33

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.47e+04     |
|    ep_rew_mean          | 42.1         |
| time/                   |              |
|    fps                  | 142          |
|    iterations           | 201          |
|    time_elapsed         | 11526        |
|    total_timesteps      | 1646592      |
| train/                  |              |
|    approx_kl            | 0.0013097206 |
|    clip_fraction        | 0.00731      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.209       |
|    explained_variance   | -7.84        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.00738      |
|    n_updates            | 19320        |
|    policy_gradient_loss | -0.00041     |
|    value_loss           | 0.0167       |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.59e+04     |
|    ep_rew_mean          | 4.78         |
| time/                   |              |
|    fps                  | 141          |
|    iterations           | 211          |
|    time_elapsed         | 12201        |
|    total_timesteps      | 1728512      |
| train/                  |              |
|    approx_kl            | 0.0013051825 |
|    clip_fraction        | 0.00547      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.197       |
|    explained_variance   | 0.377        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 1.84         |
|    n_updates            | 19420        |
|    policy_gradient_loss | 5.18e-05     |
|    value_loss           | 115          |
------------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_l

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.36e+04     |
|    ep_rew_mean          | 7.79         |
| time/                   |              |
|    fps                  | 140          |
|    iterations           | 220          |
|    time_elapsed         | 12813        |
|    total_timesteps      | 1802240      |
| train/                  |              |
|    approx_kl            | 0.0011263739 |
|    clip_fraction        | 0.00364      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.222       |
|    explained_variance   | 0.599        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.222        |
|    n_updates            | 19510        |
|    policy_gradient_loss | -0.00052     |
|    value_loss           | 7.6          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.47e+04    |
|    ep_rew_mean          | -27.2       |
| time/                   |             |
|    fps                  | 139         |
|    iterations           | 229         |
|    time_elapsed         | 13435       |
|    total_timesteps      | 1875968     |
| train/                  |             |
|    approx_kl            | 0.002177857 |
|    clip_fraction        | 0.00902     |
|    clip_range           | 0.236       |
|    entropy_loss         | -0.138      |
|    explained_variance   | 0.667       |
|    learning_rate        | 1.43e-06    |
|    loss                 | 0.187       |
|    n_updates            | 19600       |
|    policy_gradient_loss | -0.00122    |
|    value_loss           | 5.37        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.47

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.47e+04    |
|    ep_rew_mean          | -27.2       |
| time/                   |             |
|    fps                  | 139         |
|    iterations           | 239         |
|    time_elapsed         | 14061       |
|    total_timesteps      | 1957888     |
| train/                  |             |
|    approx_kl            | 0.001239345 |
|    clip_fraction        | 0.00653     |
|    clip_range           | 0.236       |
|    entropy_loss         | -0.134      |
|    explained_variance   | -6.03       |
|    learning_rate        | 1.43e-06    |
|    loss                 | 0.0111      |
|    n_updates            | 19700       |
|    policy_gradient_loss | 0.000523    |
|    value_loss           | 0.0258      |
-----------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 8.

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 8.58e+04   |
|    ep_rew_mean          | -47.9      |
| time/                   |            |
|    fps                  | 139        |
|    iterations           | 249        |
|    time_elapsed         | 14662      |
|    total_timesteps      | 2039808    |
| train/                  |            |
|    approx_kl            | 0.00323545 |
|    clip_fraction        | 0.0115     |
|    clip_range           | 0.236      |
|    entropy_loss         | -0.115     |
|    explained_variance   | -0.213     |
|    learning_rate        | 1.43e-06   |
|    loss                 | 0.0281     |
|    n_updates            | 19800      |
|    policy_gradient_loss | -0.000715  |
|    value_loss           | 0.0593     |
----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.58e+04     |
|    ep_re

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 8.68e+04      |
|    ep_rew_mean          | -67.6         |
| time/                   |               |
|    fps                  | 138           |
|    iterations           | 259           |
|    time_elapsed         | 15286         |
|    total_timesteps      | 2121728       |
| train/                  |               |
|    approx_kl            | 0.00045620292 |
|    clip_fraction        | 0.00471       |
|    clip_range           | 0.236         |
|    entropy_loss         | -0.159        |
|    explained_variance   | -17.7         |
|    learning_rate        | 1.43e-06      |
|    loss                 | 0.00778       |
|    n_updates            | 19900         |
|    policy_gradient_loss | 0.000274      |
|    value_loss           | 0.0318        |
-------------------------------------------
-----------------------------------------
| rollout/                |       

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 8.76e+04      |
|    ep_rew_mean          | -64.4         |
| time/                   |               |
|    fps                  | 138           |
|    iterations           | 269           |
|    time_elapsed         | 15926         |
|    total_timesteps      | 2203648       |
| train/                  |               |
|    approx_kl            | 0.00025216973 |
|    clip_fraction        | 0.00242       |
|    clip_range           | 0.236         |
|    entropy_loss         | -0.105        |
|    explained_variance   | 0.659         |
|    learning_rate        | 1.43e-06      |
|    loss                 | 0.0414        |
|    n_updates            | 20000         |
|    policy_gradient_loss | -0.00136      |
|    value_loss           | 6.36          |
-------------------------------------------
-------------------------------------------
| rollout/                |     

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.76e+04     |
|    ep_rew_mean          | -64.4        |
| time/                   |              |
|    fps                  | 138          |
|    iterations           | 279          |
|    time_elapsed         | 16544        |
|    total_timesteps      | 2285568      |
| train/                  |              |
|    approx_kl            | 0.0013087303 |
|    clip_fraction        | 0.00624      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.0528      |
|    explained_variance   | -5.9         |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.0222       |
|    n_updates            | 20100        |
|    policy_gradient_loss | -0.00105     |
|    value_loss           | 0.0708       |
------------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_l

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 8.85e+04      |
|    ep_rew_mean          | -61.7         |
| time/                   |               |
|    fps                  | 137           |
|    iterations           | 289           |
|    time_elapsed         | 17158         |
|    total_timesteps      | 2367488       |
| train/                  |               |
|    approx_kl            | 0.00029280852 |
|    clip_fraction        | 0.00253       |
|    clip_range           | 0.236         |
|    entropy_loss         | -0.159        |
|    explained_variance   | -6.94         |
|    learning_rate        | 1.43e-06      |
|    loss                 | 0.00801       |
|    n_updates            | 20200         |
|    policy_gradient_loss | -0.000136     |
|    value_loss           | 0.0406        |
-------------------------------------------
-------------------------------------------
| rollout/                |     

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.92e+04     |
|    ep_rew_mean          | -62.7        |
| time/                   |              |
|    fps                  | 137          |
|    iterations           | 298          |
|    time_elapsed         | 17743        |
|    total_timesteps      | 2441216      |
| train/                  |              |
|    approx_kl            | 0.0018532214 |
|    clip_fraction        | 0.00157      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.0307      |
|    explained_variance   | -1.01        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.00635      |
|    n_updates            | 20290        |
|    policy_gradient_loss | -0.00188     |
|    value_loss           | 0.051        |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 8.98e+04      |
|    ep_rew_mean          | -55.3         |
| time/                   |               |
|    fps                  | 137           |
|    iterations           | 307           |
|    time_elapsed         | 18328         |
|    total_timesteps      | 2514944       |
| train/                  |               |
|    approx_kl            | 0.00051371445 |
|    clip_fraction        | 0.00455       |
|    clip_range           | 0.236         |
|    entropy_loss         | -0.0621       |
|    explained_variance   | -10.8         |
|    learning_rate        | 1.43e-06      |
|    loss                 | 0.0643        |
|    n_updates            | 20380         |
|    policy_gradient_loss | -0.000282     |
|    value_loss           | 0.0297        |
-------------------------------------------
-------------------------------------------
| rollout/                |     

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.92e+04     |
|    ep_rew_mean          | -32.9        |
| time/                   |              |
|    fps                  | 136          |
|    iterations           | 317          |
|    time_elapsed         | 18964        |
|    total_timesteps      | 2596864      |
| train/                  |              |
|    approx_kl            | 0.0043001333 |
|    clip_fraction        | 0.0159       |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.405       |
|    explained_variance   | 0.616        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.289        |
|    n_updates            | 20480        |
|    policy_gradient_loss | -0.00111     |
|    value_loss           | 1.76         |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.92e+04     |
|    ep_rew_mean          | -32.9        |
| time/                   |              |
|    fps                  | 136          |
|    iterations           | 327          |
|    time_elapsed         | 19609        |
|    total_timesteps      | 2678784      |
| train/                  |              |
|    approx_kl            | 0.0032430347 |
|    clip_fraction        | 0.0203       |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.153       |
|    explained_variance   | -3.15        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.0101       |
|    n_updates            | 20580        |
|    policy_gradient_loss | -0.00144     |
|    value_loss           | 0.0371       |
------------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mea

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 8.99e+04      |
|    ep_rew_mean          | -32.2         |
| time/                   |               |
|    fps                  | 136           |
|    iterations           | 337           |
|    time_elapsed         | 20245         |
|    total_timesteps      | 2760704       |
| train/                  |               |
|    approx_kl            | 0.00013467751 |
|    clip_fraction        | 0.00249       |
|    clip_range           | 0.236         |
|    entropy_loss         | -0.112        |
|    explained_variance   | -3.16         |
|    learning_rate        | 1.43e-06      |
|    loss                 | 0.00917       |
|    n_updates            | 20680         |
|    policy_gradient_loss | 0.00013       |
|    value_loss           | 0.0384        |
-------------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.03e+04     |
|    ep_rew_mean          | -51.7        |
| time/                   |              |
|    fps                  | 136          |
|    iterations           | 347          |
|    time_elapsed         | 20876        |
|    total_timesteps      | 2842624      |
| train/                  |              |
|    approx_kl            | 0.0021223216 |
|    clip_fraction        | 0.00734      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.098       |
|    explained_variance   | 0.768        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.316        |
|    n_updates            | 20780        |
|    policy_gradient_loss | -0.000723    |
|    value_loss           | 48.7         |
------------------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_l

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 9.02e+04      |
|    ep_rew_mean          | -51           |
| time/                   |               |
|    fps                  | 136           |
|    iterations           | 357           |
|    time_elapsed         | 21490         |
|    total_timesteps      | 2924544       |
| train/                  |               |
|    approx_kl            | 0.00095210894 |
|    clip_fraction        | 0.00537       |
|    clip_range           | 0.236         |
|    entropy_loss         | -0.0855       |
|    explained_variance   | -2.19         |
|    learning_rate        | 1.43e-06      |
|    loss                 | 0.0132        |
|    n_updates            | 20880         |
|    policy_gradient_loss | -0.000151     |
|    value_loss           | 0.0385        |
-------------------------------------------
-------------------------------------------
| rollout/                |     

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.08e+04     |
|    ep_rew_mean          | -48          |
| time/                   |              |
|    fps                  | 136          |
|    iterations           | 367          |
|    time_elapsed         | 22093        |
|    total_timesteps      | 3006464      |
| train/                  |              |
|    approx_kl            | 0.0008175205 |
|    clip_fraction        | 0.00284      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.135       |
|    explained_variance   | 0.701        |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.598        |
|    n_updates            | 20980        |
|    policy_gradient_loss | -0.000457    |
|    value_loss           | 30           |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.08e+04     |
|    ep_rew_mean          | -48          |
| time/                   |              |
|    fps                  | 136          |
|    iterations           | 377          |
|    time_elapsed         | 22689        |
|    total_timesteps      | 3088384      |
| train/                  |              |
|    approx_kl            | 0.0015064304 |
|    clip_fraction        | 0.0108       |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.25        |
|    explained_variance   | -0.344       |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.143        |
|    n_updates            | 21080        |
|    policy_gradient_loss | -0.00101     |
|    value_loss           | 0.417        |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 9.13e+04     |
|    ep_rew_mean          | -51.4        |
| time/                   |              |
|    fps                  | 136          |
|    iterations           | 387          |
|    time_elapsed         | 23298        |
|    total_timesteps      | 3170304      |
| train/                  |              |
|    approx_kl            | 0.0010752697 |
|    clip_fraction        | 0.00574      |
|    clip_range           | 0.236        |
|    entropy_loss         | -0.229       |
|    explained_variance   | -0.0696      |
|    learning_rate        | 1.43e-06     |
|    loss                 | 0.861        |
|    n_updates            | 21180        |
|    policy_gradient_loss | -0.00022     |
|    value_loss           | 0.674        |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

In [None]:
model.save("train/old_gen/PPO_Sonic_lvl1.zip")

# Creating New Model And Loading Parameters of Diffrent One

In [None]:
# Best params
parameters = {'gamma': 0.9905260007134729, 
              'n_steps': 6_144, 
              'learning_rate': 8.0670099142727265e-06, 
              'clip_range': 0.1865481147395506, 
              'gae_lambda': 0.9758617427012113
             }

In [None]:
model = PPO('CnnPolicy', env, **parameters, tensorboard_log=LOG_DIR, verbose=1)

In [None]:
model.set_parameters("train/old_gen/PPO-Sonic-Agent")

In [None]:
# Training
model.learn(total_timesteps=6_000_000, callback=callback)

In [None]:
model.save("train/old_gen/Sonic_PPO_lvl1.zip")

# Curriculum Learning

In [None]:
# Best model
model = PPO.load('train/old_gen/PPO-Sonic-Agent', verbose=1, tensorboard_log=LOG_DIR)

# Displaying current learning rate 
print(model.learning_rate)
print(model.n_steps)

In [None]:
# Lowering learning rate if needed
model.learning_rate = 100
print(model.learning_rate)

In [None]:
# Deleting any existing environment
env.close()
del env

In [None]:
# Training agent on many levels [0,1,8,9,10,11,14,16]
for _ in range(1):
    for i in [8,9]:
        # Creating env
        env = Sonic()
        env.level(i)
        env = ActionDiscretizer(env)
        env = Monitor(env, LOG_DIR)
        env = DummyVecEnv([lambda: env])
        env = VecFrameStack(env, 4, channels_order='last')

        # Setting env
        model.set_env(env)
        # Training
        model.learn(total_timesteps=500_000, callback=callback)

        #closing env
        env.close()
        del env

In [None]:
model.save("train/PPO-Curriculum-Sonic-Agent.zip")