## Simple multi-processing demo using SubprocVecEnv from SB3

Import packages and read config file.

In [None]:
import yaml
from stable_baselines3 import PPO
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.vec_env import SubprocVecEnv

from primaite.session.environment import PrimaiteGymEnv


In [None]:
from primaite.config.load import data_manipulation_config_path

In [None]:
with open(data_manipulation_config_path(), 'r') as f:
    cfg = yaml.safe_load(f)

Set up training data.

In [None]:

EPISODE_LEN = 128
NUM_EPISODES = 10
NO_STEPS = EPISODE_LEN * NUM_EPISODES
BATCH_SIZE = 32
LEARNING_RATE = 3e-4


Define an environment function.

In [None]:


def make_env(rank: int, seed: int = 0) -> callable:
    """Wrapper script for _init function."""

    def _init() -> PrimaiteGymEnv:
        env = PrimaiteGymEnv(env_config=cfg)
        env.reset(seed=seed + rank)
        model = PPO(
            "MlpPolicy",
            env,
            learning_rate=LEARNING_RATE,
            n_steps=NO_STEPS,
            batch_size=BATCH_SIZE,
            verbose=0,
            tensorboard_log="./PPO_UC2/",
        )
        model.learn(total_timesteps=NO_STEPS)
        return env

    set_random_seed(seed)
    return _init


Run experiment.

In [None]:
n_procs = 2
train_env = SubprocVecEnv([make_env(i + n_procs) for i in range(n_procs)])
