In [1]:
from stable_baselines3 import DQN 
import numpy as np
import sys
from collections import OrderedDict
sys.path.insert(0,'boptestGymService')
from boptestGymEnv import BoptestGymEnv
from boptestGymEnv import BoptestGymEnvRewardWeightCost, NormalizedActionWrapper, NormalizedObservationWrapper, SaveAndTestCallback,DiscretizedActionWrapper
from stable_baselines3 import SAC
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
from testing import utilities
import random
import os
from stable_baselines3 import SAC,PPO
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.logger import configure
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.env_util import make_vec_env

import requests
url = 'http://127.0.0.1:5000'
# url="https://api.boptest.net" 


In [2]:
import numpy as np
import requests

class BoptestGymEnvCustomReward(BoptestGymEnv):
    
    def calculate_objective(self, kpis):
        """
        Calculate the objective based on the given KPI values.
        """
        cost_tot = kpis.get('cost_tot')
        pdih_tot = kpis.get('pdih_tot') 
        pele_tot = kpis.get('pele_tot') 
        tdis_tot = kpis.get('tdis_tot') 
        idis_tot = kpis.get('idis_tot')

        objective = (
            cost_tot +
            4.25 * (pdih_tot + pele_tot) +
            0.005 * tdis_tot +
            0.0001 * idis_tot
        )

        return objective

    def get_reward(self):
        
        kpis = requests.get(f'{self.url}/kpi').json()['payload']
      

        current_objective = self.calculate_objective(kpis)
        # Compute reward
        
        reward = -(current_objective - self.objective_integrand)
        print("reward",reward)
        self.objective_integrand = current_objective
        
        
        return reward


In [3]:
import os
import wandb
from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
import torch
from wandb.integration.sb3 import WandbCallback

def train_SAC_with_callback(model_path=None,
                            log_dir=os.path.join('results', 'SAC_AD1', 'Model1'),
                            tensorboard_log=os.path.join('results', 'SAC_AD1', 'Model1')):
    """
    Method to train a SAC agent using a callback to save the model periodically and log to WandB.

    Parameters
    ----------
    model_path : str, optional
        Path to a pre-trained model. If provided, the model will be loaded and further trained.
    log_dir : str
        Directory where monitoring data and best-trained model are stored.
    tensorboard_log : str
        Path to directory to load tensorboard logs.
    """
    
    excluding_periods = []
    excluding_periods.append((173*24*3600, 266*24*3600))  # Summer period

    # Define environment configuration
    env_config = {
        "url": url,
        "actions": ['ahu_oveFanSup_u','oveValCoi_u', 'oveValRad_u'], # 'oveValCoi_u', 'oveValRad_u'
        "observations": {
            'time':(0,604800),
            'reaTZon_y': (280., 310.),
            'reaCO2Zon_y': (200., 2000.),
            'weaSta_reaWeaTDryBul_y': (250., 350.),
            'PriceElectricPowerHighlyDynamic': (-0.4, 0.4),
            'LowerSetp[1]': (280., 310.),
            'UpperSetp[1]': (280., 310.),
            'UpperCO2[1]':(0,10000)
        },
        "predictive_period": 24 * 3600,
        "regressive_period": 6*3600,
        "scenario": {'electricity_price': 'highly_dynamic'},
        "random_start_time": True,
        "step_period": 3600,
        "warmup_period": 1*24*3600,
        "max_episode_length": 3*24*3600,
        "excluding_periods": excluding_periods
    }

    env = BoptestGymEnvCustomReward(**env_config)

    env = NormalizedObservationWrapper(env)
    env = NormalizedActionWrapper(env) 
    if torch.cuda.is_available():
        device = torch.device("cuda")
        torch.cuda.empty_cache()  # Clear GPU cache
        print("CUDA is available. Using GPU.")
    else:
        device = torch.device("cpu")
        print("CUDA is not available. Using CPU.")
    os.makedirs(log_dir, exist_ok=True)

    env = Monitor(env=env, filename=os.path.join(log_dir, 'monitor.csv'))

    # Initialize WandB and start a new run, with environment configuration added
    run = wandb.init(
        project="SAC-Training",  # Replace with your project name
          # Replace with your WandB entity
        sync_tensorboard=True,  # Auto-sync with TensorBoard
        config={
            "algo": "SAC",
              # Replace with the actual environment name
            "total_timesteps": 1000000,
            "gamma": 0.99,
            "learning_rate": 3e-4,
            "buffer_size": 1000000,
            "batch_size": 256,
            "train_freq": 1,
            "gradient_steps": 1,
            "ent_coef": 'auto',
            "env_config": env_config  # Include the environment configuration
        },
        name="SAC_10m",
        id="9pdbb7hh",
        resume="allow"
    )
    print(run.id)
    # Callback to save model every 1000 steps with a unique name
    # Create the callback test and save the agent while training
    callback = SaveAndTestCallback(env, check_freq=1000, save_freq=1000,
                                       log_dir=log_dir, test=False)

    # WandB callback to track training metrics
    wandb_callback = WandbCallback(
        model_save_path=os.path.join(log_dir, "wandb_models"),
        model_save_freq=1000,
        verbose=2
    )

    # Set up logger with TensorBoard logging continuation
    new_logger = configure(log_dir, ['stdout', 'csv', 'tensorboard'])

    # Load existing model if model_path is given, else create a new one
    if model_path and os.path.isfile(model_path):
        model = SAC.load(model_path, env=env, tensorboard_log=tensorboard_log)
        print(f"Loaded pre-trained model from {model_path}")
        model.set_logger(new_logger)  # Reconfigure the logger to continue logging
    else:
        model = SAC(
            'MlpPolicy', 
            env, 
            verbose=1, 
            gamma=0.99,
            learning_rate=3e-4,
            buffer_size=1000000,
            batch_size=256,
            train_freq=1,
            gradient_steps=1,
            ent_coef='auto',
            tensorboard_log=tensorboard_log,
        )
        model.set_logger(new_logger)
        print("Starting training from scratch.")
     # Verify that the model is on the correct device
    print(f"Model is on device: {next(model.policy.parameters()).device}")
    # Train the agent with the callback
    model.learn(total_timesteps=int(1000000), callback=[callback, wandb_callback])
    
    # Finish WandB run
    run.finish()
    
    return env, model

if __name__ == "__main__":
    model_path = "results/SAC_AD1/Model1/model_4000_latest.zip" # Update this with the correct path if needed
    env, model = train_SAC_with_callback(model_path=model_path)
    model.save(os.path.join('results', 'SAC', 'final_model'))
    print("Training completed. Model saved in results/SAC/")
    print("TensorBoard logs saved in results/SAC/")


  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


CUDA is available. Using GPU.


[34m[1mwandb[0m: Currently logged in as: [33mdixitaniket1212[0m ([33mdixitaniket1212-coventry-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


9pdbb7hh
Logging to results/SAC_AD1/Model1
Wrapping the env in a DummyVecEnv.
Loaded pre-trained model from results/SAC_AD1/Model1/model_4000_latest.zip
Model is on device: cuda:0
reward -0.7212330565185668
reward -0.015171365221527289
reward -0.00447165846806441
reward -0.0039015386817347064
reward -0.0044706748443071875
reward -0.004621731988933897
reward -0.004670047217282147
reward -0.006115162822059372
reward -0.005480846602274769
reward -0.006529261176053236
reward -0.0032877778362150334
reward -0.007384939704442872
reward -0.007572580488396485
reward -0.029710979544206095
reward -0.04198660042479874
reward -0.13262302335963227
reward -0.008589783771631887
reward -0.0031004778694652124
reward -0.017698967881577765
reward -0.010939753282718234


KeyboardInterrupt: 

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
