In [None]:
import gymnasium as gym
from stable_baselines3 import DQN
import os
import ale_py

In [None]:
gym.register_envs(ale_py)

In [None]:
#Create enviorment 
env_name = "Riverraid-v4"
env = gym.make(env_name, render_mode = "rgb_array")

In [None]:
%load_ext tensorboard
%tensorboard --logdir ./Riverraid_tensorboard/

In [None]:
%reload_ext tensorboard

In [None]:
#Define Model
model = DQN(
    'CnnPolicy',
    env,
    verbose=1,
    learning_rate=0.00025,
    gamma=.95,
    buffer_size=26000,
    batch_size=32,
    train_freq=4,
    target_update_interval=5000,
    learning_starts=50000,
    exploration_fraction=0.3,
    exploration_final_eps=0.01,
    tensorboard_log="./Riverraid_tensorboard"
)
#Train Model
model.learn(total_timesteps=1000000)
#Save Model
model.save("model_Riverraid")

In [None]:
#Load Model
model.load("model_Riverraid")

In [None]:
from gymnasium.wrappers import RecordVideo
import matplotlib.pyplot as plt

#Inference Model
env_name = "Riverraid-v4"
env = gym.make(env_name, render_mode = "rgb_array")
env = RecordVideo(env, "./Videos")

env_data = env.reset()
obs = env_data[0]

episode_reward = 0
max_ep_timesteps = 1000000

for t in range(max_ep_timesteps):
    print(f"timestep: {t}")

    action, _ = model.predict(obs)
    env_data = env.step(action)
    obs = env_data[0]
    reward = env_data[1]
    done = env_data[2]

    episode_reward += reward

    print(f"action: {action}")
    print(f"episode reward: {episode_reward}")
    print(env_data[1:])

    if done: break
env.close()