In [None]:
%%capture
!apt install python-opengl
!apt install ffmpeg
!apt install xvfb
!pip install pyvirtualdisplay
!pip install pyglet==1.5.1

In [None]:
# Virtual display
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

<pyvirtualdisplay.display.Display at 0x7ef23a8ec970>

In [None]:
%%capture
!pip install swig
!pip install gymnasium[box2d]
!pip install stable_baselines3

In [None]:
import numpy as np

from collections import deque

import matplotlib.pyplot as plt
%matplotlib inline

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

# Gym
import gymnasium as gym
import gym_pygame

# Hugging Face Hub
from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.
import imageio

# Stable Baselines
import stable_baselines3
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack

from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy

  from tensorflow.tsl.python.lib.core import pywrap_ml_dtypes


In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

  and should_run_async(code)


In [None]:
print(device)

cuda:0


In [None]:
# Make Environment Approach #1
def make_env(rank: int, seed: int = 50):
    def _init():
        env = gym.make("CarRacing-v2", continuous=False, render_mode="rgb_array")
        env = gym.wrappers.GrayScaleObservation(env)
        env.reset(seed = seed + rank)
        return env
    set_random_seed(seed)
    return _init

num_cpu = 4
vec_env = DummyVecEnv([make_env(i) for i in range(num_cpu)])
vec_env = VecFrameStack(vec_env, 4)

In [None]:
# Make Environment Approach #2
vec_env = make_vec_env("CarRacing-v2", n_envs=4, env_kwargs={'continuous': False})
vec_env = VecFrameStack(vec_env, 4)

In [None]:
# Training
model = PPO("MlpPolicy", vec_env, verbose=1)
model.learn(total_timesteps=10000, progress_bar=True)

model.save("ppo_carracing")

Using cuda device


Output()

-----------------------------
| time/              |      |
|    fps             | 67   |
|    iterations      | 1    |
|    time_elapsed    | 122  |
|    total_timesteps | 8192 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 66          |
|    iterations           | 2           |
|    time_elapsed         | 247         |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.010296366 |
|    clip_fraction        | 0.133       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.6        |
|    explained_variance   | 0.0181      |
|    learning_rate        | 0.0003      |
|    loss                 | 0.373       |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.0117     |
|    value_loss           | 0.863       |
-----------------------------------------


In [None]:
# Evaluation
model = PPO.load("ppo_carracing")
mean_reward, std_reward = evaluate_policy(model, vec_env, n_eval_episodes=10)

print(mean_reward, std_reward)



-41.61973622888327 47.5705570672604


In [None]:
def record_video(env, policy, out_directory, fps=30):
  """
  Generate a replay video of the agent
  :param env
  :param Qtable: Qtable of our agent
  :param out_directory
  :param fps: how many frame per seconds (with taxi-v3 and frozenlake-v1 we use 1)
  """
  images = []
  done = [False]
  x = 0
  state = vec_env.reset()
  img = vec_env.render(mode='rgb_array')
  images.append(img)
  while not any(done) and x <= 10000:
    # Take the action (index) that have the maximum expected future reward given that state
    action, _ = policy.predict(state)
    state, reward, done, info = vec_env.step(action) # We directly put next_state = state for recording logic
    img = vec_env.render(mode='rgb_array')
    images.append(img)
    x += 1
  imageio.mimsave(out_directory, [np.array(img) for i, img in enumerate(images)], fps=fps)

In [None]:
# Record for Approach #1
vec_env = DummyVecEnv([make_env(0)])
vec_env = VecFrameStack(vec_env, 4)

record_video(vec_env, model, './replay.mp4')



In [None]:
# Record for Approach #2
vec_env = make_vec_env("CarRacing-v2", n_envs=1, env_kwargs={'continuous': False})
vec_env = VecFrameStack(vec_env, 4)

record_video(vec_env, model, './replay.mp4')

