In [1]:
from stable_baselines3 import DQN 
import numpy as np
import sys
from collections import OrderedDict
sys.path.insert(0,'boptestGymService')
from boptestGymEnv import BoptestGymEnv
from boptestGymEnv import BoptestGymEnvRewardWeightCost, NormalizedActionWrapper, NormalizedObservationWrapper, SaveAndTestCallback,DiscretizedActionWrapper
from stable_baselines3 import SAC
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
from testing import utilities
import random
import os
from stable_baselines3 import SAC,PPO
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.logger import configure
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.env_util import make_vec_env

import requests
url = 'http://127.0.0.1:5000'
# url="https://api.boptest.net" 


In [2]:
import numpy as np
import requests

class BoptestGymEnvCustomReward(BoptestGymEnv):
    
    def calculate_objective(self, kpis):
        """
        Calculate the objective based on the given KPI values.
        """
        cost_tot = kpis.get('cost_tot', 0) or 0
        pdih_tot = kpis.get('pdih_tot', 0) or 0
        pele_tot = kpis.get('pele_tot', 0) or 0
        tdis_tot = kpis.get('tdis_tot', 0) or 0
        idis_tot = kpis.get('idis_tot', 0) or 0

        objective = (
            cost_tot +
            4.25 * (pdih_tot + pele_tot) +
            0.005 * tdis_tot +
            0.0001 * idis_tot
        )

        return objective

    def get_reward(self):
        try:
            #use this one running on local server
            kpis = requests.get(f'{self.url}/kpi').json()['payload']

            #use this when running boptest server
            # print(self.test_id)
            # print(self.url)
            # kpis = requests.get('{0}/kpi/{1}'.format(self.url,self.testid)).json()['payload']
            # print(kpis)
        except requests.exceptions.RequestException as e:
            print(f"Error fetching KPIs: {e}")
            return 0  # In case of error, return zero reward

        current_objective = self.calculate_objective(kpis)
        # Compute reward
        
        reward = -(current_objective - self.objective_integrand)
       
        self.objective_integrand = current_objective
        
        
        return reward


In [None]:
import os
import wandb
from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
from stable_baselines3.common.callbacks import WandbCallback
from wandb.integration.sb3 import WandbCallback

def train_SAC_with_callback(model_path=None,
                            log_dir=os.path.join('results', 'SAC_AD1', 'Model1'),
                            tensorboard_log=os.path.join('results', 'SAC_AD1', 'Model1')):
    """
    Method to train a SAC agent using a callback to save the model periodically and log to WandB.

    Parameters
    ----------
    model_path : str, optional
        Path to a pre-trained model. If provided, the model will be loaded and further trained.
    log_dir : str
        Directory where monitoring data and best-trained model are stored.
    tensorboard_log : str
        Path to directory to load tensorboard logs.
    """
    
    excluding_periods = []
    excluding_periods.append((173*24*3600, 266*24*3600))  # Summer period

    # Define environment configuration
    env_config = {
        "url": url,
        "actions": ['ahu_oveFanSup_u', 'oveValCoi_u', 'oveValRad_u'],
        "observations": {
            'time': (0, 31536000),
            'reaTZon_y': (200., 400.),
            'reaCO2Zon_y': (200., 2000.),
            'weaSta_reaWeaTDryBul_y': (250., 350.),
            'PriceElectricPowerHighlyDynamic': (-0.4, 0.4),
            'LowerSetp[1]': (280., 310.),
            'UpperSetp[1]': (280., 310.),
            'UpperCO2[1]':(0,10000)
        },
        "predictive_period": 5 * 3600,
        "scenario": {'electricity_price': 'highly_dynamic'},
        "random_start_time": True,
        "max_episode_length": 3 * 24 * 3600,
        "step_period": 3600,
        "excluding_periods": excluding_periods
    }

    env = BoptestGymEnvCustomReward(**env_config)

    
    
    os.makedirs(log_dir, exist_ok=True)

    env = Monitor(env=env, filename=os.path.join(log_dir, 'monitor.csv'))

    # Initialize WandB and start a new run, with environment configuration added
    run = wandb.init(
        project="SAC-Training",  # Replace with your project name
          # Replace with your WandB entity
        sync_tensorboard=True,  # Auto-sync with TensorBoard
        config={
            "algo": "SAC",
              # Replace with the actual environment name
            "total_timesteps": 1000000,
            "gamma": 0.99,
            "learning_rate": 3e-4,
            "buffer_size": 1000000,
            "batch_size": 256,
            "train_freq": 1,
            "gradient_steps": 1,
            "ent_coef": 'auto',
            "env_config": env_config  # Include the environment configuration
        }
    )
    
    # Callback to save model every 1000 steps with a unique name
    checkpoint_callback = CheckpointCallback(
        save_freq=1000, 
        save_path=log_dir,
        name_prefix="sac_model",
        verbose=1
    )

    # WandB callback to track training metrics
    wandb_callback = WandbCallback(
        model_save_path=os.path.join(log_dir, "wandb_models"),
        model_save_freq=1000,
        verbose=2
    )

    # Set up logger with TensorBoard logging continuation
    new_logger = configure(log_dir, ['stdout', 'csv', 'tensorboard'])

    # Load existing model if model_path is given, else create a new one
    if model_path and os.path.isfile(model_path):
        model = SAC.load(model_path, env=env, tensorboard_log=tensorboard_log)
        print(f"Loaded pre-trained model from {model_path}")
        model.set_logger(new_logger)  # Reconfigure the logger to continue logging
    else:
        model = SAC(
            'MlpPolicy', 
            env, 
            verbose=1, 
            gamma=0.99,
            learning_rate=3e-4,
            buffer_size=1000000,
            batch_size=256,
            train_freq=1,
            gradient_steps=1,
            ent_coef='auto',
            tensorboard_log=tensorboard_log,
        )
        model.set_logger(new_logger)
        print("Starting training from scratch.")
    
    # Train the agent with the callback
    model.learn(total_timesteps=int(100000), callback=[checkpoint_callback, wandb_callback])
    
    # Finish WandB run
    run.finish()
    
    return env, model

if __name__ == "__main__":
    model_path = None # Update this with the correct path if needed
    env, model = train_SAC_with_callback(model_path=model_path)
    model.save(os.path.join('results', 'SAC', 'final_model'))
    print("Training completed. Model saved in results/SAC/")
    print("TensorBoard logs saved in results/SAC/")
