In [1]:
import sys
from datetime import datetime

import gymnasium as gym
import numpy as np
import wandb
from stable_baselines3 import *
from stable_baselines3.common.callbacks import CallbackList
from stable_baselines3.common.logger import HumanOutputFormat
from stable_baselines3.common.logger import Logger as SB3Logger

import sinergym
from sinergym.utils.callbacks import *
from sinergym.utils.constants import *
from sinergym.utils.logger import WandBOutputFormat
from sinergym.utils.rewards import *
from sinergym.utils.wrappers import *

Couldn't import dot_parser, loading of dot files will not be possible.


In [2]:
# Environment ID
environment = 'Eplus-5zone-mixed-continuous-stochastic-v1'

# Training episodes
episodes = 5

# Name of the experiment
experiment_date = datetime.today().strftime('%Y-%m-%d_%H:%M')
experiment_name = 'SB3_PPO-' + environment + \
    '-episodes-' + str(episodes)
experiment_name += '_' + experiment_date

In [3]:
env = gym.make(environment, env_name=experiment_name)
eval_env = gym.make(environment, env_name=experiment_name+'_EVALUATION')

[38;20m[ENVIRONMENT] (INFO) : Creating Gymnasium environment.[0m
[38;20m[ENVIRONMENT] (INFO) : Name: SB3_PPO-Eplus-5zone-mixed-continuous-stochastic-v1-episodes-5_2025-08-06_16:13[0m
[38;20m[MODEL] (INFO) : Working directory created: /workspace/SB3_PPO-Eplus-5zone-mixed-continuous-stochastic-v1-episodes-5_2025-08-06_16:13-res1[0m
[38;20m[MODEL] (INFO) : Model Config is correct.[0m
[38;20m[MODEL] (INFO) : Building model Output:Variable updated with defined variable names.[0m
[38;20m[MODEL] (INFO) : Updated building model Output:Meter with meter names.[0m
[38;20m[MODEL] (INFO) : Runperiod established.[0m
[38;20m[MODEL] (INFO) : Episode length (seconds): 31536000.0[0m
[38;20m[MODEL] (INFO) : timestep size (seconds): 900.0[0m
[38;20m[MODEL] (INFO) : timesteps per episode: 35040[0m
[38;20m[REWARD] (INFO) : Reward function initialized.[0m
[38;20m[ENVIRONMENT] (INFO) : Environment created successfully.[0m
[38;20m[ENVIRONMENT] (INFO) : Creating Gymnasium environment.[

In [9]:
env = NormalizeObservation(env)
env = NormalizeAction(env)
env = LoggerWrapper(env)
env = CSVLogger(env)

# Discomment the following line to log to WandB (remember to set the API key as an environment variable)
wandb.init(settings=wandb.Settings(init_timeout=120))
env = WandBLogger(env,
                 entity='ojun-info-alpha-example',
                 project_name='final-report',
                 run_name=experiment_name,
                 group='ojun-info-alpha',
                 tags=['DRL', 'PPO', '5zone', 'continuous', 'stochastic', 'v1'],
                 save_code = True,
                 dump_frequency = 1000,
                 artifact_save = False,)

eval_env = NormalizeObservation(eval_env)
eval_env = NormalizeAction(eval_env)
eval_env = LoggerWrapper(eval_env)
eval_env = CSVLogger(eval_env)

[38;20m[WRAPPER NormalizeObservation] (INFO) : Wrapper initialized.[0m
[38;20m[WRAPPER NormalizeAction] (INFO) : New normalized action space: Box(-1.0, 1.0, (2,), float32)[0m
[38;20m[WRAPPER NormalizeAction] (INFO) : Wrapper initialized.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Wrapper initialized.[0m
[38;20m[WRAPPER CSVLogger] (INFO) : Wrapper initialized.[0m


[38;20m[WRAPPER WandBLogger] (INFO) : Wrapper initialized.[0m
[38;20m[WRAPPER NormalizeObservation] (INFO) : Wrapper initialized.[0m
[38;20m[WRAPPER NormalizeAction] (INFO) : New normalized action space: Box(-1.0, 1.0, (2,), float32)[0m
[38;20m[WRAPPER NormalizeAction] (INFO) : Wrapper initialized.[0m
[38;20m[WRAPPER LoggerWrapper] (INFO) : Wrapper initialized.[0m
[38;20m[WRAPPER CSVLogger] (INFO) : Wrapper initialized.[0m


In [10]:
# In this case, all the hyperparameters are the default ones
model = PPO('MlpPolicy', env, verbose=1, device='cpu')

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [11]:
callbacks = []

# Set up Evaluation logging and saving best model
eval_callback = LoggerEvalCallback(
    eval_env=eval_env,
    train_env=env,
    n_eval_episodes=1,
    eval_freq_episodes=2,
    deterministic=True)

callbacks.append(eval_callback)
callback = CallbackList(callbacks)

In [12]:
timesteps = episodes * (env.get_wrapper_attr('timestep_per_episode') - 1)

In [14]:
# wandb logger and setting in SB3
if is_wrapped(env, WandBLogger):
    logger = SB3Logger(
        folder=None,
        output_formats=[
            HumanOutputFormat(
                sys.stdout,
                max_length=120),
            WandBOutputFormat()])
    model.set_logger(logger)

In [15]:
model.learn(
    total_timesteps=timesteps,
    callback=callback,
    log_interval=100)

[38;20m[WRAPPER WandBLogger] (INFO) : End of episode detected, dumping summary metrics in WandB Platform.[0m
[38;20m[WRAPPER CSVLogger] (INFO) : End of episode detected, data updated in monitor and progress.csv.[0m
[38;20m[WRAPPER NormalizeObservation] (INFO) : Normalization calibration saved.[0m           
[38;20m[WRAPPER CSVLogger] (INFO) : End of episode detected, data updated in monitor and progress.csv.[0m
[38;20m[WRAPPER NormalizeObservation] (INFO) : Normalization calibration saved.[0m           
[38;20m[WRAPPER CSVLogger] (INFO) : End of episode detected, data updated in monitor and progress.csv.[0m
[38;20m[WRAPPER NormalizeObservation] (INFO) : Normalization calibration saved.[0m           
[38;20m[WRAPPER CSVLogger] (INFO) : End of episode detected, data updated in monitor and progress.csv.[0m
[38;20m[WRAPPER NormalizeObservation] (INFO) : Normalization calibration saved.[0m           
[38;20m[WRAPPER CSVLogger] (INFO) : End of episode detected, data update

<stable_baselines3.ppo.ppo.PPO at 0x705cff521d60>

In [16]:
model.save(env.get_wrapper_attr('workspace_path') + '/model')

In [None]:
env.close()

[38;20m[WRAPPER WandBLogger] (INFO) : Environment closed, dumping summary metrics in WandB Platform.[0m
Simulation Progress [Episode 7]:  43%|████▎     | 43/100 [01:03<00:24,  2.29%/s, 43% completed]