In [None]:
import gymnasium as gym
from stable_baselines3 import DQN
import ale_py

#### Check all registerd environemnts in Gymnasium

In [2]:
# List all registered envs
envs = gym.envs.registry.keys()

# Filter for Atari ones
atari_envs = [env_id for env_id in envs if "NoFrameskip" in env_id]
print(sorted(envs))

['ALE/Adventure-v5', 'ALE/AirRaid-v5', 'ALE/Alien-v5', 'ALE/Amidar-v5', 'ALE/Assault-v5', 'ALE/Asterix-v5', 'ALE/Asteroids-v5', 'ALE/Atlantis-v5', 'ALE/Atlantis2-v5', 'ALE/Backgammon-v5', 'ALE/BankHeist-v5', 'ALE/BasicMath-v5', 'ALE/BattleZone-v5', 'ALE/BeamRider-v5', 'ALE/Berzerk-v5', 'ALE/Blackjack-v5', 'ALE/Bowling-v5', 'ALE/Boxing-v5', 'ALE/Breakout-v5', 'ALE/Carnival-v5', 'ALE/Casino-v5', 'ALE/Centipede-v5', 'ALE/ChopperCommand-v5', 'ALE/CrazyClimber-v5', 'ALE/Crossbow-v5', 'ALE/Darkchambers-v5', 'ALE/Defender-v5', 'ALE/DemonAttack-v5', 'ALE/DonkeyKong-v5', 'ALE/DoubleDunk-v5', 'ALE/Earthworld-v5', 'ALE/ElevatorAction-v5', 'ALE/Enduro-v5', 'ALE/Entombed-v5', 'ALE/Et-v5', 'ALE/FishingDerby-v5', 'ALE/FlagCapture-v5', 'ALE/Freeway-v5', 'ALE/Frogger-v5', 'ALE/Frostbite-v5', 'ALE/Galaxian-v5', 'ALE/Gopher-v5', 'ALE/Gravitar-v5', 'ALE/Hangman-v5', 'ALE/HauntedHouse-v5', 'ALE/Hero-v5', 'ALE/HumanCannonball-v5', 'ALE/IceHockey-v5', 'ALE/Jamesbond-v5', 'ALE/JourneyEscape-v5', 'ALE/Kaboom-v

#### If Atari Environment not registedred run code below

In [3]:
gym.register_envs(ale_py)

#### Create environment and train model

In [4]:
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
env_name = "FreewayNoFrameskip-v4"
env = make_atari_env(env_name, n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)

A.L.E: Arcade Learning Environment (version 0.11.2+ecc1138)
[Powered by Stella]


In [5]:
%load_ext tensorboard
%tensorboard --logdir ./freeway_tensorboard/

Reusing TensorBoard on port 6006 (pid 83319), started 1:09:40 ago. (Use '!kill 83319' to kill it.)

In [6]:
#Define model here

model = DQN(
    'CnnPolicy',
    env,
    verbose=1,
    learning_rate=0.00025,
    gamma=0.99,
    buffer_size=100000,
    batch_size=32,
    train_freq=4,
    target_update_interval=10000,
    learning_starts=50000,
    exploration_fraction=0.1,
    exploration_final_eps=0.01,
    tensorboard_log="./freeway_tensorboard"
)


#train model here
model.learn(total_timesteps=1000000)

model.save("model_Freeway")

Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to ./freeway_tensorboard/DQN_1
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 8.19e+03 |
|    ep_rew_mean      | 0        |
|    exploration_rate | 0.919    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1752     |
|    time_elapsed     | 4        |
|    total_timesteps  | 8188     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 8.19e+03 |
|    ep_rew_mean      | 0        |
|    exploration_rate | 0.838    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1828     |
|    time_elapsed     | 8        |
|    total_timesteps  | 16364    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 8.19e+03 |
|    ep_rew_mean      | 0   

#### Valdiate model by having it play live atari game

In [None]:
#Load model here
model.load("model_Freeway")

In [None]:
from gymnasium.wrappers import RecordVideo
import matplotlib.pyplot as plt

env = gym.make(env_name, render_mode="rgb_array")
env = gym.wrappers.AtariPreprocessing(env, grayscale_obs=True, screen_size=84, frame_skip=4, scale_obs=False)
env = gym.wrappers.FrameStackObservation(env, stack_size=4)
env = RecordVideo(env, "./")

env_data = env.reset()
obs = env_data[0] #observation

episode_reward = 0
max_ep_timesteps = 100000

for t in range(max_ep_timesteps):
    print(f"timestep: {t}")

    action, _states = model.predict(obs, deterministic = True) # inference the model given the current game data. 
    env_data = env.step(action) # update enviorment data with action from inferenced model 
    obs = env_data[0] #update obs with new enviorment data from agent action 
    reward = env_data[1] #update reward with new enviorment data from agent action 
    done = env_data[2] # update if episode is done with new viorment data from agent action

    episode_reward += reward

    plt.imshow(obs[-1], cmap = "gray")
    plt.show()

    print(f"action: {action}")
    print(f"episode reward: {episode_reward}")
    print(env_data[1:])


    if done: break
env.close()