In [None]:
import numpy as np
from environment import LifeStyleCoachEnv
from stable_baselines3 import TD3
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.noise import NormalActionNoise
import gymnasium as gym

env = LifeStyleCoachEnv()
check_env(env)  

noise_sigmas = [0.05, 0.1, 0.2]

for sigma in noise_sigmas:
    test_env = LifeStyleCoachEnv()
    n_actions = test_env.action_space.shape[-1]

    action_noise = NormalActionNoise(
        mean=np.zeros(n_actions),
        sigma=np.ones(n_actions) * sigma
    )

    model = TD3(
        "MultiInputPolicy",
        test_env,
        action_noise=action_noise,
        verbose=1,
        tensorboard_log=f"./tensorboard/sigma_{sigma:.2f}",
        learning_rate=1e-3,
        batch_size=256,
        gamma=0.99,
    )

    print(f"Test with {sigma}")
    model.learn(total_timesteps=250000)  

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.

Short test training with 0.05
Logging to ./tensorboard/sigma_0.05/TD3_1
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 600       |
|    ep_rew_mean     | -7.27e+03 |
| time/              |           |
|    episodes        | 4         |
|    fps             | 166       |
|    time_elapsed    | 14        |
|    total_timesteps | 2400      |
| train/             |           |
|    actor_loss      | 119       |
|    critic_loss     | 326       |
|    learning_rate   | 0.001     |
|    n_updates       | 2299      |
----------------------------------
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 600       |
|    ep_rew_mean     | -7.42e+03 |
| time/              |           |
|    episodes        | 8         |
|    fps             | 168       |
|    time_elapsed    | 28        |
|    total_timesteps | 4800 