# Reinforcement Learning Tutorials

This notebook demonstrates basic reinforcement learning with Stable Baselines3 and Gymnasium environments.

Run the cell below if you need to install the required libraries. In Google Colab they will be installed when you execute the cell.

In [None]:
!pip install stable-baselines3 gymnasium

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO, DQN

## 0. Train PPO on CartPole

In [None]:
env = gym.make('CartPole-v1')
model = PPO('MlpPolicy', env, verbose=1)
model.learn(total_timesteps=10000)
env.close()

In [ ]:
import os
os.makedirs('videos', exist_ok=True)
video_env = gym.wrappers.RecordVideo(
    gym.make('CartPole-v1', render_mode='rgb_array'),
    video_folder='videos',
    name_prefix='ppo_cartpole',
    episode_trigger=lambda e: True,
)
obs, _ = video_env.reset()
terminated, truncated = False, False
while not (terminated or truncated):
    action, _ = model.predict(obs)
    obs, _, terminated, truncated, _ = video_env.step(action)
video_env.close()


## 1. Train DQN on MountainCar

In [None]:
env = gym.make('MountainCar-v0')
model = DQN('MlpPolicy', env, verbose=1)
model.learn(total_timesteps=50000)
env.close()

In [ ]:
import os
os.makedirs('videos', exist_ok=True)
video_env = gym.wrappers.RecordVideo(
    gym.make('MountainCar-v0', render_mode='rgb_array'),
    video_folder='videos',
    name_prefix='dqn_mountaincar',
    episode_trigger=lambda e: True,
)
obs, _ = video_env.reset()
terminated, truncated = False, False
while not (terminated or truncated):
    action, _ = model.predict(obs)
    obs, _, terminated, truncated, _ = video_env.step(action)
video_env.close()


## 2. Humanoid example (requires Mujoco)

In [None]:
env = gym.make('Humanoid-v4')
model = PPO('MlpPolicy', env, verbose=1)
model.learn(total_timesteps=10000)
env.close()

In [ ]:
import os
os.makedirs('videos', exist_ok=True)
video_env = gym.wrappers.RecordVideo(
    gym.make('Humanoid-v4', render_mode='rgb_array'),
    video_folder='videos',
    name_prefix='ppo_humanoid',
    episode_trigger=lambda e: True,
)
obs, _ = video_env.reset()
terminated, truncated = False, False
while not (terminated or truncated):
    action, _ = model.predict(obs)
    obs, _, terminated, truncated, _ = video_env.step(action)
video_env.close()
