In [5]:
import sys
from datetime import datetime

import gymnasium as gym
import numpy as np
import wandb
from stable_baselines3 import *
from stable_baselines3.common.callbacks import CallbackList
from stable_baselines3.common.logger import HumanOutputFormat
from stable_baselines3.common.logger import Logger as SB3Logger

import sinergym
from sinergym.utils.callbacks import *
from sinergym.utils.constants import *
from sinergym.utils.logger import WandBOutputFormat
from sinergym.utils.rewards import *
from sinergym.utils.wrappers import *

In [6]:
from stable_baselines3.common.env_util import make_vec_env

In [13]:
# Environment ID
environment = 'Eplus-office-mixed-continuous-stochastic-v1'

# Training episodes
episodes = 100

# Name of the experiment
experiment_date = datetime.today().strftime('%Y-%m-%d_%H:%M')
experiment_name = 'SB3_PPO-' + environment + \
    '-episodes-' + str(episodes)
experiment_name += '_' + experiment_date

In [14]:
extra_conf = {
    'timesteps_per_hour': 6,
    'runperiod': (1, 7, 2024, 31, 7, 2024),
}

In [15]:
env = gym.make(environment,
               env_name=experiment_name,
               weather_files=["JPN_Nagoya.476350_IWEC.epw"],
               reward=LinearReward,
               reward_kwargs = {
                   "energy_weight": 0.5,
                   'temperature_variables': ['perimeter_bot_zn_1_air_temperature',
                     'perimeter_bot_zn_2_air_temperature',
                     'perimeter_bot_zn_3_air_temperature',
                     'perimeter_bot_zn_4_air_temperature',
                     'perimeter_mid_zn_1_air_temperature',
                     'perimeter_mid_zn_2_air_temperature',
                     'perimeter_mid_zn_3_air_temperature',
                     'perimeter_mid_zn_4_air_temperature',
                     'perimeter_top_zn_1_air_temperature',
                     'perimeter_top_zn_2_air_temperature',
                     'perimeter_top_zn_3_air_temperature',
                     'perimeter_top_zn_4_air_temperature',
                                            ],
                   'energy_variables': ['HVAC_electricity_demand_rate'],
                   "range_comfort_winter": (20.0, 23.5),
                   "range_comfort_summer": (23.0, 26.0),
                   "lambda_energy": 0.0001,
               },
              building_config=extra_conf,)

eval_env = gym.make(environment,
                    env_name=experiment_name+'_EVALUATION',
                    weather_files=["JPN_Nagoya.476350_IWEC.epw"],
                    reward=LinearReward,
                    reward_kwargs = {
                        "energy_weight": 0.5,
                        'temperature_variables': ['perimeter_bot_zn_1_air_temperature',
                         'perimeter_bot_zn_2_air_temperature',
                         'perimeter_bot_zn_3_air_temperature',
                         'perimeter_bot_zn_4_air_temperature',
                         'perimeter_mid_zn_1_air_temperature',
                         'perimeter_mid_zn_2_air_temperature',
                         'perimeter_mid_zn_3_air_temperature',
                         'perimeter_mid_zn_4_air_temperature',
                         'perimeter_top_zn_1_air_temperature',
                         'perimeter_top_zn_2_air_temperature',
                         'perimeter_top_zn_3_air_temperature',
                         'perimeter_top_zn_4_air_temperature',
                                                 ],
                        'energy_variables': ['HVAC_electricity_demand_rate'],
                        "range_comfort_winter": (20.0, 23.5),
                        "range_comfort_summer": (23.0, 26.0),
                        "lambda_energy": 0.0001,
                    },
                   building_config=extra_conf,)

[38;20m[ENVIRONMENT] (INFO) : Creating Gymnasium environment.[0m                             
[38;20m[ENVIRONMENT] (INFO) : Name: SB3_PPO-Eplus-office-mixed-continuous-stochastic-v1-episodes-100_2025-08-12_11:06[0m
[38;20m[MODEL] (INFO) : Working directory created: /workspace/SB3_PPO-Eplus-office-mixed-continuous-stochastic-v1-episodes-100_2025-08-12_11:06-res1[0m
[38;20m[MODEL] (INFO) : Model Config is correct.[0m                                          
[38;20m[MODEL] (INFO) : Building model Output:Variable updated with defined variable names.[0m
[38;20m[MODEL] (INFO) : Updated building model Output:Meter with meter names.[0m             
[38;20m[MODEL] (INFO) : Building configuration: runperiod updated to {'start_day': 1, 'start_month': 7, 'start_year': 2024, 'end_day': 31, 'end_month': 7, 'end_year': 2024, 'start_weekday': 6, 'n_steps_per_hour': 6}[0m
[38;20m[MODEL] (INFO) : Updated episode length (seconds): 2678400.0[0m                       
[38;20m[MODEL] (INFO

In [16]:
env = NormalizeObservation(env)
env = NormalizeAction(env)
env = LoggerWrapper(env)
env = CSVLogger(env)

# run = wandb.init(entity='ojun-info-alpha',
#            project='final',
#            name='first',
#            settings=wandb.Settings(init_timeout=120.0))
# Discomment the following line to log to WandB (remember to set the API key as an environment variable)
# env = WandBLogger(env,
#                  entity='ojun-info-alpha',
#                  project_name='final',
#                  run_name='first',
#                  save_code = True,
#                  dump_frequency = 1000,
#                  artifact_save = False)

eval_env = NormalizeObservation(eval_env)
eval_env = NormalizeAction(eval_env)
eval_env = LoggerWrapper(eval_env)
eval_env = CSVLogger(eval_env)

# In this case, all the hyperparameters are the default ones
model = PPO(
    'MlpPolicy',
    env,
    verbose=1,
    device='cpu',
)

callbacks = []

# Set up Evaluation logging and saving best model
eval_callback = LoggerEvalCallback(
    eval_env=eval_env,
    train_env=env,
    n_eval_episodes=5,
    eval_freq_episodes=10,
    deterministic=True)

callbacks.append(eval_callback)
callback = CallbackList(callbacks)

timesteps = episodes * (env.get_wrapper_attr('timestep_per_episode') - 1)

# wandb logger and setting in SB3
if is_wrapped(env, WandBLogger):
    logger = SB3Logger(
        folder=None,
        output_formats=[
            HumanOutputFormat(
                sys.stdout,
                max_length=200),
            WandBOutputFormat()])
    model.set_logger(logger)

model.learn(
    total_timesteps=timesteps,
    callback=callback,
    log_interval=100)

[38;20m[WRAPPER NormalizeObservation] (INFO) : Wrapper initialized.[0m                       
[38;20m[WRAPPER NormalizeAction] (INFO) : New normalized action space: Box(-1.0, 1.0, (2,), float32)[0m
[38;20m[WRAPPER NormalizeAction] (INFO) : Wrapper initialized.[0m                            
[38;20m[WRAPPER LoggerWrapper] (INFO) : Wrapper initialized.[0m                              
[38;20m[WRAPPER CSVLogger] (INFO) : Wrapper initialized.[0m                                  
[38;20m[WRAPPER NormalizeObservation] (INFO) : Wrapper initialized.[0m                       
[38;20m[WRAPPER NormalizeAction] (INFO) : New normalized action space: Box(-1.0, 1.0, (2,), float32)[0m
[38;20m[WRAPPER NormalizeAction] (INFO) : Wrapper initialized.[0m                            
[38;20m[WRAPPER LoggerWrapper] (INFO) : Wrapper initialized.[0m                              
[38;20m[WRAPPER CSVLogger] (INFO) : Wrapper initialized.[0m                                  
Simulation Progress 

<stable_baselines3.ppo.ppo.PPO at 0x7d019568dc40>

In [None]:
model.save(env.get_wrapper_attr('workspace_path') + '/model')

In [None]:
env.close()
eval_env.close()
#env.wandb_run.finish()

Simulation Progress [Episode 35]: 100%|██████████| 100/100 [00:21<00:00, 25.83%/s, 100% completed]

In [20]:
random_env = gym.make(environment,
               env_name="EMPTY_A",
               weather_files=["JPN_Nagoya.476350_IWEC.epw"],
               reward=LinearReward,
               reward_kwargs = {
                   "energy_weight": 0.5,
                   'temperature_variables': ['perimeter_bot_zn_1_air_temperature',
                     'perimeter_bot_zn_2_air_temperature',
                     'perimeter_bot_zn_3_air_temperature',
                     'perimeter_bot_zn_4_air_temperature',
                     'perimeter_mid_zn_1_air_temperature',
                     'perimeter_mid_zn_2_air_temperature',
                     'perimeter_mid_zn_3_air_temperature',
                     'perimeter_mid_zn_4_air_temperature',
                     'perimeter_top_zn_1_air_temperature',
                     'perimeter_top_zn_2_air_temperature',
                     'perimeter_top_zn_3_air_temperature',
                     'perimeter_top_zn_4_air_temperature',],
                   'energy_variables': ['HVAC_electricity_demand_rate'],
                   "range_comfort_winter": (20.0, 23.5),
                   "range_comfort_summer": (23.0, 26.0),
                   "lambda_energy": 0.0001,
               },
            building_config=extra_conf,
              action_space=gym.spaces.Box(
                low=0,
                high=0,
                shape=(2,))
)

random_env = NormalizeObservation(random_env)
random_env = NormalizeAction(random_env)
random_env = LoggerWrapper(random_env)
random_env = CSVLogger(random_env)


[38;20m[ENVIRONMENT] (INFO) : Creating Gymnasium environment.[0m                                
[38;20m[ENVIRONMENT] (INFO) : Name: EMPTY_A[0m                                                  
[38;20m[MODEL] (INFO) : Working directory created: /workspace/EMPTY_A-res2[0m                   
[38;20m[MODEL] (INFO) : Model Config is correct.[0m                                             
[38;20m[MODEL] (INFO) : Building model Output:Variable updated with defined variable names.[0m  
[38;20m[MODEL] (INFO) : Updated building model Output:Meter with meter names.[0m                
[38;20m[MODEL] (INFO) : Building configuration: runperiod updated to {'start_day': 1, 'start_month': 7, 'start_year': 2024, 'end_day': 31, 'end_month': 7, 'end_year': 2024, 'start_weekday': 6, 'n_steps_per_hour': 6}[0m
[38;20m[MODEL] (INFO) : Updated episode length (seconds): 2678400.0[0m                          
[38;20m[MODEL] (INFO) : Updated timestep size (seconds): 600.0[0m                   

  logger.warn("A Box action space maximum and minimum values are equal.")


In [21]:
# random agent
mean_rewards = []
for i in range(10):
    obs, info = random_env.reset()
    rewards = []
    truncated = terminated = False
    current_month = 0
    while not (terminated or truncated):
        a = random_env.action_space.sample()
        obs, reward, terminated, truncated, info = random_env.step(a)

        rewards.append(reward)
        if info['month'] != current_month:  # display results every month
            current_month = info['month']
            print('Reward: ', sum(rewards), info)

    mean_rewards.append(np.mean(rewards))

print(f"mean reward: {np.mean(mean_rewards)}")

#----------------------------------------------------------------------------------------------#  
[38;20m[ENVIRONMENT] (INFO) : Starting a new episode.[0m                                        
[38;20m[ENVIRONMENT] (INFO) : Episode 1: EMPTY_A[0m                                             
#----------------------------------------------------------------------------------------------#  
[38;20m[MODEL] (INFO) : Episode directory created.[0m                                           
[38;20m[MODEL] (INFO) : Weather file JPN_Nagoya.476350_IWEC.epw used.[0m                        
[38;20m[MODEL] (INFO) : Adapting weather to building model.[0m                                  
[38;20m[MODEL] (INFO) : Weather noise applied to columns: ['Dry Bulb Temperature'][0m           
[38;20m[ENVIRONMENT] (INFO) : Saving episode output path in /workspace/EMPTY_A-res2/episode-1/output.[0m
[38;20m[SIMULATOR] (INFO) : handlers initialized.[0m                                            
[

In [22]:
env.close()

[38;20m[WRAPPER CSVLogger] (INFO) : Environment closed, data updated in monitor and progress.csv.[0m
[38;20m[WRAPPER NormalizeObservation] (INFO) : Normalization calibration saved.[0m             
Simulation Progress [Episode 7]: 100%|██████████| 100/100 [00:38<00:00,  2.60%/s, 100% completed]
[38;20m[ENVIRONMENT] (INFO) : Environment closed. [SB3_PPO-Eplus-office-mixed-continuous-stochastic-v1-episodes-5_2025-08-10_21:56][0m


In [23]:
eval_env.close()


[38;20m[WRAPPER CSVLogger] (INFO) : Environment closed, data updated in monitor and progress.csv.[0m
[38;20m[WRAPPER NormalizeObservation] (INFO) : Normalization calibration saved.[0m
[38;20m[ENVIRONMENT] (INFO) : Environment closed. [SB3_PPO-Eplus-office-mixed-continuous-stochastic-v1-episodes-5_2025-08-10_21:56_EVALUATION][0m
[38;20m[WRAPPER CSVLogger] (INFO) : Environment closed, data updated in monitor and progress.csv.[0m
[38;20m[WRAPPER CSVLogger] (INFO) : Environment closed, data updated in monitor and progress.csv.[0m
[38;20m[WRAPPER NormalizeObservation] (INFO) : Normalization calibration saved.[0m
[38;20m[ENVIRONMENT] (INFO) : Environment closed. [SB3_PPO-Eplus-office-mixed-continuous-stochastic-v1-episodes-5_2025-08-10_21:56][0m
