In [1]:
import os  
import gym  
from stable_baselines3 import PPO  
from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback 

In [2]:
def create_environment(game_name):  
    return gym.make(game_name)  
  
def load_model(checkpoint_path, environment):  
    model = PPO.load(checkpoint_path, env=environment)  
    return model  

In [3]:
def train_model(model, environment, save_path, save_freq, total_timesteps):  
    checkpoint_callback = CheckpointCallback(save_freq=save_freq, save_path=save_path)  
    eval_callback = EvalCallback(environment, best_model_save_path=save_path,  
                                 log_path=save_path, eval_freq=save_freq)  
    model.learn(total_timesteps=total_timesteps, callback=[checkpoint_callback, eval_callback])  

In [4]:
def main():  
    game_name = 'ALE/Pong'  # Replace with your actual game  
    save_path = 'trained_models/Pong'  # Replace with your actual save path  
    os.makedirs(save_path, exist_ok=True)  
  
    environment = create_environment(game_name)  
  
    # Ensure the checkpoint path matches the last checkpoint's filename  
    checkpoint_path = os.path.join(save_path, 'rl_model_10000_steps.zip')  # Replace XXXXXX with the actual step number  
  
    model = load_model(checkpoint_path, environment)  
    train_model(model, environment, save_path, save_freq=10000, total_timesteps=10000)  # Adjust as needed  

In [5]:
if __name__ == '__main__':  
    main()

  logger.warn(
A.L.E: Arcade Learning Environment (version 0.8.1+53f58b7)
[Powered by Stella]


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


  if not isinstance(terminated, (bool, np.bool8)):


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.27e+03 |
|    ep_rew_mean     | -19      |
| time/              |          |
|    fps             | 354      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.15e+03   |
|    ep_rew_mean          | -20        |
| time/                   |            |
|    fps                  | 40         |
|    iterations           | 2          |
|    time_elapsed         | 101        |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.15057607 |
|    clip_fraction        | 0.612      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.5       |
|    explained_variance   | 0.3        |
|    learning_rate        | 0.0003     |
|   



Eval num_timesteps=10000, episode_reward=-17.00 +/- 1.90
Episode length: 1371.40 +/- 183.01
----------------------------------------
| eval/                   |            |
|    mean_ep_length       | 1.37e+03   |
|    mean_reward          | -17        |
| time/                   |            |
|    total_timesteps      | 10000      |
| train/                  |            |
|    approx_kl            | 0.18488887 |
|    clip_fraction        | 0.626      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.35      |
|    explained_variance   | 0.49       |
|    learning_rate        | 0.0003     |
|    loss                 | -0.125     |
|    n_updates            | 120        |
|    policy_gradient_loss | -0.103     |
|    value_loss           | 0.0288     |
----------------------------------------
New best mean reward!
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.12e+03 |
|    ep_rew_mean     | -19.4    |
| time/     