# W & B Logging - integrate with Stable-Baselines3 (SB3) algorithms

API: ```495763973e99bfdb0b2163f83d82304edf9bd4e4```

- Project: https://wandb.ai/rajesh-siraskar-team/RS-ML-project
- Raw PyTorch project: https://wandb.ai/rajesh-siraskar-team/RS-ML-project/runs/nbft5kxi

In [1]:
import random
import gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv

In [2]:
import wandb
from wandb.integration.sb3 import WandbCallback

wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mrajesh-siraskar[0m ([33mrajesh-siraskar-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

### Raw PyTorch use

In [6]:
epochs=10
lr=0.01

run = wandb.init(
    # Set the project where this run will be logged
    project="RS-ML-project",
    # Track hyperparameters and run metadata
    config={
        "learning_rate": lr,
        "epochs": epochs,
    })

offset = random.random() / 5
print(f"lr: {lr}")

# simulating a training run
for epoch in range(2, epochs):
    acc = 1 - 2 ** -epoch - random.random() / epoch - offset
    loss = 2 ** -epoch + random.random() / epoch + offset
    print(f"epoch={epoch}, accuracy={acc}, loss={loss}")
    wandb.log({"accuracy": acc, "loss": loss})

lr: 0.01
epoch=2, accuracy=0.5180527073372365, loss=0.7017149861731453
epoch=3, accuracy=0.611624366854121, loss=0.4235290832859938
epoch=4, accuracy=0.6735402683234769, loss=0.3221509220528607
epoch=5, accuracy=0.7862811796785344, loss=0.24821062528823992
epoch=6, accuracy=0.7247553638060228, loss=0.17868028428095437
epoch=7, accuracy=0.7669023957222336, loss=0.2636543021016444
epoch=8, accuracy=0.7381959427847369, loss=0.2783640753166964
epoch=9, accuracy=0.785317218042482, loss=0.26306326697306615


### Stable-Baselines 3 run

In [4]:
config = {
    "policy_type": "MlpPolicy",
    "total_timesteps": 1000,
    "env_name": "CartPole-v1",
}
run = wandb.init(
    project="sb3",
    config=config,
    sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
    # monitor_gym=True,  # auto-upload the videos of agents playing the game
    save_code=True,  # optional
)

In [5]:
def make_env():
    env = gym.make(config["env_name"])
    env = Monitor(env)  # record stats such as returns
    return env

In [6]:
env = DummyVecEnv([make_env])
# env = VecVideoRecorder(env, f"videos/{run.id}", record_video_trigger=lambda x: x % 2000 == 0, video_length=200)
# model = PPO(config["policy_type"], env, verbose=1, tensorboard_log=f"runs/{run.id}")

model = PPO(config["policy_type"], env, verbose=1, tensorboard_log=f"runs/{run.id}")

model.learn(
    total_timesteps=config["total_timesteps"],
    callback=WandbCallback(
        gradient_save_freq=100,
        # model_save_path=f"models/{run.id}",
        verbose=2,
    ),
)
run.finish()

Using cpu device
Logging to runs/dw7j6918\PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 21.8     |
|    ep_rew_mean     | 21.8     |
| time/              |          |
|    fps             | 1066     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------
