In [1]:
import gymnasium as gym
import torch
import time
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
import ale_py

from wrapper.wrapper import PongWrapper

gym.register_envs(ale_py)

ENV_NAME = "PongNoFrameskip-v4"
TOTAL_TIMESTEPS = 3_000_000
MODEL_PATH = "ppo_pong"

def train():
    env = make_atari_env(ENV_NAME, n_envs=1, seed=0)
    env.envs[0] = PongWrapper(env.envs[0], buffer_stack_size=3)  # manually wrap base env
    env = VecFrameStack(env, n_stack=4)

    # model = PPO("CnnPolicy", env, verbose=1)
    model = PPO.load(MODEL_PATH, env=env)

    model.learn(total_timesteps=TOTAL_TIMESTEPS)
    model.save(MODEL_PATH)
    env.close()

def play():
    model = PPO.load(MODEL_PATH)
    env = make_atari_env(ENV_NAME, n_envs=1, seed=0)
    env.envs[0] = PongWrapper(env.envs[0], buffer_stack_size=10)  # manually wrap base env
    env = VecFrameStack(env, n_stack=4)

    obs = env.reset()
    env.render("human")

    while True:
        action, _states = model.predict(obs, deterministic=False)
        obs, rewards, dones, info = env.step(action)
        env.render("human")
        time.sleep(0.01)

    env.close()


In [2]:
train()

A.L.E: Arcade Learning Environment (version 0.11.0+dfae0bd)
[Powered by Stella]


Wrapping the env in a VecTransposeImage.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 8.51e+03 |
|    ep_rew_mean     | -10      |
| time/              |          |
|    fps             | 500      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 8.47e+03   |
|    ep_rew_mean          | -8.4       |
| time/                   |            |
|    fps                  | 438        |
|    iterations           | 2          |
|    time_elapsed         | 9          |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.30298677 |
|    clip_fraction        | 0.298      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.314     |
|    explained_variance   | 0.847      |
|   

In [None]:
play()

KeyboardInterrupt: 

: 

In [None]:
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import A2C

import ale_py

# There already exists an environment generator
# that will make and wrap atari environments correctly.
# Here we are also multi-worker training (n_envs=4 => 4 environments)
vec_env = make_atari_env("PongNoFrameskip-v4", n_envs=4, seed=0)
# Frame-stacking with 4 frames
vec_env = VecFrameStack(vec_env, n_stack=4)

model = A2C("CnnPolicy", vec_env, verbose=1)
model.learn(total_timesteps=1)

Using cuda device
Wrapping the env in a VecTransposeImage.


<stable_baselines3.a2c.a2c.A2C at 0x74b7ac18ce60>

In [None]:
obs = vec_env.reset()
while True:
    action, _states = model.predict(obs, deterministic=False)
    obs, rewards, done, info = vec_env.step(action)
    vec_env.render("human")
    time.sleep(0.05)

KeyboardInterrupt: 