# Multiprocessing: Unleashing the Power of Vectorized Environments
See: [https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#multiprocessing-unleashing-the-power-of-vectorized-environments](https://stable-baselines3.readthedocs.io/en/master/guide/examples.html#multiprocessing-unleashing-the-power-of-vectorized-environments)
See also the [Colab version](https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/multiprocessing_rl.ipynb) showing a comparaison between a single and multiprocess training (Multiprocess is faster)

In [2]:
import gymnasium as gym
import numpy as np

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed

In [3]:
def make_env(env_id, rank, seed=0):
    """
    Utility function for multiprocessed env.

    :param env_id: (str) the environment ID (name)
    :param rank: (int) the number of environments you wish to have in subprocesses
    :param seed: (int) the initial seed for RNG
    :return: (int) index of the subprocess
    """
    def _init():
        env = gym.make(env_id)
        env.action_space.seed(seed + rank)
        env.observation_space.seed(seed + rank)

        return env

    set_random_seed(seed)
    return _init()

In [5]:
env_id = "CartPole-v1"
num_cpu = 4 # Number of processes to use

# Create the vectorized environment
#env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])

# Stable Baselines provies the make_vec_env() helper which does exactly the previous steps for us.
# We can choose between `DummyVecEnv` (usually faster) and `SubprocVecEnv`
env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv)

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=25_000)

Using cuda device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 21.3     |
|    ep_rew_mean     | 21.3     |
| time/              |          |
|    fps             | 2940     |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 8192     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 32.6        |
|    ep_rew_mean          | 32.6        |
| time/                   |             |
|    fps                  | 1894        |
|    iterations           | 2           |
|    time_elapsed         | 8           |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.014250005 |
|    clip_fraction        | 0.227       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.681      |
|    explained_variance   | -0.00113    |
|    learnin

<stable_baselines3.ppo.ppo.PPO at 0x7f527a60bd30>

In [6]:
obs = env.reset()
for _ in range(1000):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()