In [1]:
import sys
from datetime import datetime

import gymnasium as gym
import numpy as np
import wandb
from stable_baselines3 import *
from stable_baselines3.common.callbacks import CallbackList
from stable_baselines3.common.logger import HumanOutputFormat
from stable_baselines3.common.logger import Logger as SB3Logger

import sinergym
from sinergym.utils.callbacks import *
from sinergym.utils.constants import *
from sinergym.utils.logger import WandBOutputFormat
from sinergym.utils.rewards import *
from sinergym.utils.wrappers import *

Couldn't import dot_parser, loading of dot files will not be possible.


In [22]:
# Environment ID
environment = 'Eplus-office-mixed-continuous-stochastic-v1'

# Training episodes
episodes = 10

# Name of the experiment
experiment_date = datetime.today().strftime('%Y-%m-%d_%H:%M')
experiment_name = 'SB3_PPO-' + environment + \
    '-episodes-' + str(episodes)
experiment_name += '_' + experiment_date

In [66]:
extra_conf = {
    'timesteps_per_hour': 6,
    'runperiod': (1, 1, 2024, 3, 31, 2025),
}

env = gym.make(environment,
               env_name=experiment_name,
               weather_files=["JPN_Nagoya.476350_IWEC.epw"],
               reward=LinearReward,
               reward_kwargs = {
                   "energy_weight": 0.8,
                   'temperature_variables': ['perimeter_bot_zn_1_air_temperature',
                     'perimeter_bot_zn_2_air_temperature',
                     'perimeter_bot_zn_3_air_temperature',
                     'perimeter_bot_zn_4_air_temperature',
                     'perimeter_mid_zn_1_air_temperature',
                     'perimeter_mid_zn_2_air_temperature',
                     'perimeter_mid_zn_3_air_temperature',
                     'perimeter_mid_zn_4_air_temperature',
                     'perimeter_top_zn_1_air_temperature',
                     'perimeter_top_zn_2_air_temperature',
                     'perimeter_top_zn_3_air_temperature',
                     'perimeter_top_zn_4_air_temperature',],
                   'energy_variables': ['HVAC_electricity_demand_rate'],
                   "range_comfort_winter": (18.0, 28.0),
                   "range_comfort_summer": (18.0, 28.0),
               })

eval_env = gym.make(environment,
                    env_name=experiment_name+'_EVALUATION',
                    weather_files=["JPN_Nagoya.476350_IWEC.epw"],
                    reward=LinearReward,
                    reward_kwargs = {
                        "energy_weight": 0.8,
                        'temperature_variables': ['perimeter_bot_zn_1_air_temperature',
                         'perimeter_bot_zn_2_air_temperature',
                         'perimeter_bot_zn_3_air_temperature',
                         'perimeter_bot_zn_4_air_temperature',
                         'perimeter_mid_zn_1_air_temperature',
                         'perimeter_mid_zn_2_air_temperature',
                         'perimeter_mid_zn_3_air_temperature',
                         'perimeter_mid_zn_4_air_temperature',
                         'perimeter_top_zn_1_air_temperature',
                         'perimeter_top_zn_2_air_temperature',
                         'perimeter_top_zn_3_air_temperature',
                         'perimeter_top_zn_4_air_temperature',],
                        'energy_variables': ['HVAC_electricity_demand_rate'],
                        "range_comfort_winter": (18.0, 28.0),
                        "range_comfort_summer": (18.0, 28.0),
                    })

[38;20m[ENVIRONMENT] (INFO) : Creating Gymnasium environment.[0m
[38;20m[ENVIRONMENT] (INFO) : Name: SB3_PPO-Eplus-office-mixed-continuous-stochastic-v1-episodes-10_2025-08-10_15:21[0m
[38;20m[MODEL] (INFO) : Working directory created: /workspace/SB3_PPO-Eplus-office-mixed-continuous-stochastic-v1-episodes-10_2025-08-10_15:21-res10[0m
[38;20m[MODEL] (INFO) : Model Config is correct.[0m
[38;20m[MODEL] (INFO) : Building model Output:Variable updated with defined variable names.[0m
[38;20m[MODEL] (INFO) : Updated building model Output:Meter with meter names.[0m
[38;20m[MODEL] (INFO) : Runperiod established.[0m
[38;20m[MODEL] (INFO) : Episode length (seconds): 31536000.0[0m
[38;20m[MODEL] (INFO) : timestep size (seconds): 900.0[0m
[38;20m[MODEL] (INFO) : timesteps per episode: 35040[0m
[38;20m[REWARD] (INFO) : Reward function initialized.[0m
[38;20m[ENVIRONMENT] (INFO) : Environment created successfully.[0m
[38;20m[ENVIRONMENT] (INFO) : Creating Gymnasium environme

In [1]:
env = NormalizeObservation(env)
env = NormalizeAction(env)
env = LoggerWrapper(env)
env = CSVLogger(env)

# Discomment the following line to log to WandB (remember to set the API key as an environment variable)
wandb.init(settings=wandb.Settings(init_timeout=120))
env = WandBLogger(env,
                 entity='ojun-info-alpha-example',
                 project_name='final-report-elaborated',
                 run_name=experiment_name,
                 group='ojun-info-alpha',
                 tags=['DRL', 'PPO', 'office', 'continuous', 'stochastic', 'v1'],
                 save_code = True,
                 dump_frequency = 1000,
                 artifact_save = False,)

eval_env = NormalizeObservation(eval_env)
eval_env = NormalizeAction(eval_env)
eval_env = LoggerWrapper(eval_env)
eval_env = CSVLogger(eval_env)

NameError: name 'NormalizeObservation' is not defined

In [68]:
# In this case, all the hyperparameters are the default ones
model = PPO('MlpPolicy', env, verbose=1, device='cpu')

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [69]:
callbacks = []

# Set up Evaluation logging and saving best model
eval_callback = LoggerEvalCallback(
    eval_env=eval_env,
    train_env=env,
    n_eval_episodes=1,
    eval_freq_episodes=2,
    deterministic=True)

callbacks.append(eval_callback)
callback = CallbackList(callbacks)

In [70]:
timesteps = episodes * (env.get_wrapper_attr('timestep_per_episode') - 1)

In [71]:
# wandb logger and setting in SB3
if is_wrapped(env, WandBLogger):
    logger = SB3Logger(
        folder=None,
        output_formats=[
            HumanOutputFormat(
                sys.stdout,
                max_length=200),
            WandBOutputFormat()])
    model.set_logger(logger)

In [72]:
model.learn(
    total_timesteps=timesteps,
    callback=callback,
    log_interval=100)

#----------------------------------------------------------------------------------------------#
[38;20m[ENVIRONMENT] (INFO) : Starting a new episode.[0m
[38;20m[ENVIRONMENT] (INFO) : Episode 1: SB3_PPO-Eplus-office-mixed-continuous-stochastic-v1-episodes-10_2025-08-10_15:21[0m
#----------------------------------------------------------------------------------------------#
[38;20m[MODEL] (INFO) : Episode directory created.[0m
[38;20m[MODEL] (INFO) : Weather file JPN_Nagoya.476350_IWEC.epw used.[0m
[38;20m[MODEL] (INFO) : Adapting weather to building model.[0m
[38;20m[MODEL] (INFO) : Weather noise applied to columns: ['Dry Bulb Temperature'][0m
[38;20m[ENVIRONMENT] (INFO) : Saving episode output path in /workspace/SB3_PPO-Eplus-office-mixed-continuous-stochastic-v1-episodes-10_2025-08-10_15:21-res10/episode-1/output.[0m
[38;20m[SIMULATOR] (INFO) : handlers initialized.[0m
[38;20m[SIMULATOR] (INFO) : handlers are ready.[0m
[38;20m[SIMULATOR] (INFO) : System is ready.[

<stable_baselines3.ppo.ppo.PPO at 0x780d5c901c10>

In [30]:
model.save(env.get_wrapper_attr('workspace_path') + '/model')

In [1]:
env.close()

NameError: name 'env' is not defined