In [1]:
# Update and install system dependencies
!apt update -y > /dev/null
!apt install -y xvfb ffmpeg python-opengl > /dev/null

# Install Python packages for Gym, MuJoCo, and rendering
!pip install gymnasium[mujoco] pyvirtualdisplay imageio[ffmpeg]  stable-baselines3 -q




W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


E: Unable to locate package python-opengl


# Hopper


In [2]:
from pyvirtualdisplay import Display
import gymnasium as gym
import imageio

# Start a virtual display (needed in Colab or headless)
display = Display(visible=0, size=(1400, 900))
display.start()

# Create Hopper environment with RGB rendering
env = gym.make("Hopper-v5", render_mode="rgb_array")
obs, info = env.reset()

frames = []
terminated = truncated = False
total_reward = 0

while not (terminated or truncated):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    frame = env.render()
    frames.append(frame)
    total_reward += reward

env.close()

# Save video
imageio.mimsave("hopper_video.mp4", frames, fps=30)
print("Total reward:", total_reward)


Total reward: 21.424401823469037


In [3]:
from IPython.display import HTML
from base64 import b64encode

mp4 = open("/content/hopper_video.mp4", "rb").read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

HTML(f"""
<video width=600 controls>
    <source src="{data_url}" type="video/mp4">
</video>
""")


## Entrenamiento SAC

In [4]:
from pyvirtualdisplay import Display
import gymnasium as gym
from stable_baselines3 import SAC
from stable_baselines3.common.env_util import make_vec_env
import torch

# Start virtual display for rendering (optional for video)
display = Display(visible=0, size=(1400, 900))
display.start()

# Check if CUDA is available
print("GPU available?", torch.cuda.is_available())
print("Using device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")

# Create Hopper environment
env = make_vec_env("Hopper-v5", n_envs=1)

# Train SAC agent on GPU (if available)
model = SAC("MlpPolicy", env, verbose=1, device="cuda")  # <<< Use GPU
model.learn(total_timesteps=130_000)



GPU available? True
Using device: Tesla T4
Using cuda device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 18       |
|    ep_rew_mean     | 12.4     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 1705     |
|    time_elapsed    | 0        |
|    total_timesteps | 72       |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 16.6     |
|    ep_rew_mean     | 11.4     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 166      |
|    time_elapsed    | 0        |
|    total_timesteps | 133      |
| train/             |          |
|    actor_loss      | -4.29    |
|    critic_loss     | 2.04     |
|    ent_coef        | 0.991    |
|    ent_coef_loss   | -0.0467  |
|    learning_rate   | 0.0003   |
|    n_updates       | 32       |
---------------------------------
---------------------

<stable_baselines3.sac.sac.SAC at 0x7d0405584350>

In [5]:
import imageio

# Create a new environment with rendering enabled
record_env = gym.make("Hopper-v5", render_mode="rgb_array")
obs, info = record_env.reset()
frames = []
terminated = truncated = False
total_reward = 0

while not (terminated or truncated):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = record_env.step(action)
    frame = record_env.render()
    frames.append(frame)
    total_reward += reward

record_env.close()

# Save to video
imageio.mimsave("sac_hopper_video.mp4", frames, fps=30)
print("Total reward:", total_reward)


Total reward: 1179.7569965545295


In [6]:
from IPython.display import HTML
from base64 import b64encode

mp4 = open("/content/sac_hopper_video.mp4", "rb").read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

HTML(f"""
<video width=600 controls>
    <source src="{data_url}" type="video/mp4">
</video>
""")


# Ant

In [7]:
from pyvirtualdisplay import Display
import gymnasium as gym
import imageio

# Start a virtual display (needed in Colab or headless)
display = Display(visible=0, size=(1400, 900))
display.start()

# Create Ant environment with RGB rendering
env = gym.make("Ant-v5", render_mode="rgb_array")
obs, info = env.reset()

frames = []
terminated = truncated = False
total_reward = 0

while not (terminated or truncated):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    frame = env.render()
    frames.append(frame)
    total_reward += reward

env.close()

# Save video
imageio.mimsave("ant_video.mp4", frames, fps=30)
print("Total reward:", total_reward)


Total reward: -339.32393336160385


In [8]:
from IPython.display import HTML
from base64 import b64encode

mp4 = open("/content/ant_video.mp4", "rb").read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

HTML(f"""
<video width=600 controls>
    <source src="{data_url}" type="video/mp4">
</video>
""")

In [9]:
from pyvirtualdisplay import Display
import gymnasium as gym
from stable_baselines3 import A2C
from stable_baselines3.common.env_util import make_vec_env
import torch

# Start virtual display for rendering (optional for video)
display = Display(visible=0, size=(1400, 900))
display.start()

# Check if CUDA is available
print("GPU available?", torch.cuda.is_available())
print("Using device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")

# Create Ant environment
env = make_vec_env("Ant-v5", n_envs=1)

# Train A2C agent on GPU (if available)
model = A2C("MlpPolicy", env, verbose=1, device="cuda")  # <<< Use A2C + GPU
model.learn(total_timesteps=130_000)


GPU available? True
Using device: Tesla T4
Using cuda device




------------------------------------
| rollout/              |          |
|    ep_len_mean        | 67.9     |
|    ep_rew_mean        | -61.4    |
| time/                 |          |
|    fps                | 293      |
|    iterations         | 100      |
|    time_elapsed       | 1        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -11.4    |
|    explained_variance | -1.37    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -15.5    |
|    std                | 1.01     |
|    value_loss         | 3.92     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 67.9     |
|    ep_rew_mean        | -61.4    |
| time/                 |          |
|    fps                | 275      |
|    iterations         | 200      |
|    time_elapsed       | 3        |
|    total_timesteps    | 1000     |
|

<stable_baselines3.a2c.a2c.A2C at 0x7d03d6415850>

In [12]:
import imageio

# Create a new environment with rendering enabled for Ant
record_env = gym.make("Ant-v5", render_mode="rgb_array")
obs, info = record_env.reset()
frames = []
terminated = truncated = False
total_reward = 0

# Use the trained A2C model to run the environment
while not (terminated or truncated):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = record_env.step(action)
    frame = record_env.render()
    frames.append(frame)
    total_reward += reward

record_env.close()

# Save the collected frames as a video
imageio.mimsave("a2c_ant_video.mp4", frames, fps=30)
print("Total reward:", total_reward)


Total reward: 885.7167109933183


In [13]:
from IPython.display import HTML
from base64 import b64encode

mp4 = open("/content/a2c_ant_video.mp4", "rb").read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

HTML(f"""
<video width=600 controls>
    <source src="{data_url}" type="video/mp4">
</video>
""")