In [1]:
%%capture
!apt install python-opengl
!apt install ffmpeg
!apt install xvfb
!pip install pyvirtualdisplay
!pip install pyglet==1.5.1

In [2]:
%%capture
!pip install swig
!pip install gymnasium[box2d]
!pip install stable_baselines3

In [3]:
# Virtual Display
from pyvirtualdisplay import Display
import imageio

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

<pyvirtualdisplay.display.Display at 0x7f0b0073f7f0>

In [4]:
# NumPy
import numpy as np

# MatPlotLib
import matplotlib.pyplot as plt
%matplotlib inline

# Gym
import gymnasium as gym

# Stable Baselines
import stable_baselines3
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize

from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
# Define Seeded Environment Creation
def make_seeded_env(rank: int, seed: int = 50):
    def _init():
        env = gym.make("CarRacing-v2", continuous=True, render_mode="rgb_array")
        env = gym.wrappers.GrayScaleObservation(env)
        env = gym.wrappers.FrameStack(env, 4)
        env.reset(seed = seed + rank)
        return env
    set_random_seed(seed)
    return _init

In [None]:
# Define Unseeded Environment Creation
def make_unseeded_env(rank: int, seed: int = 50):
    def _init():
        env = gym.make("CarRacing-v2", continuous=True, render_mode="rgb_array")
        env = gym.wrappers.GrayScaleObservation(env)
        env = gym.wrappers.FrameStack(env, 4)
        env.reset(seed = np.random.randint(9999))
        return env
    set_random_seed(np.random.randint(9999))
    return _init

In [None]:
# Define Recording Process (Adapted from Class Assignments)
def record_video(env, agent, out_directory, fps=30):
  images = []
  done = [False]
  state = vec_env.reset()
  img = vec_env.render(mode='rgb_array')
  images.append(img)
  total_step = 0
  while not any(done) and total_step <= 10000:
    # Take the action (index) that have the maximum expected future reward given that state
    action, _ = agent.predict(state)
    state, reward, done, info = vec_env.step(action) # We directly put next_state = state for recording logic
    img = vec_env.render(mode='rgb_array')
    images.append(img)
    total_step += 1
  imageio.mimsave(out_directory, [np.array(img) for i, img in enumerate(images)], fps=fps)

SEEDED EVALUATION

In [None]:
# Make Evaluation/Recording Environment
vec_env = DummyVecEnv([make_seeded_env(0)])
vec_env = VecNormalize(vec_env, norm_reward=False)
_ = vec_env.reset()

In [None]:
# Evaluation
model = PPO.load("ppo_carracing")
mean_reward, std_reward = evaluate_policy(model, vec_env, n_eval_episodes=10)

print(mean_reward, std_reward)

677.1284952230751 237.26076344279085


In [None]:
# Record
record_video(vec_env, model, 'seeded_replay.mp4')



UNSEEDED EVALUATION

In [None]:
# Make Evaluation/Recording Environment
vec_env = DummyVecEnv([make_unseeded_env(0)])
vec_env = VecNormalize(vec_env, norm_reward=False)
_ = vec_env.reset()

In [None]:
# Evaluation
model = PPO.load("ppo_carracing")
mean_reward, std_reward = evaluate_policy(model, vec_env, n_eval_episodes=10)

print(mean_reward, std_reward)



591.9635081730783 211.40069599473964


In [None]:
# Record
record_video(vec_env, model, 'unseeded_replay.mp4')

