In [1]:
from stable_baselines3 import DQN 
import numpy as np
import sys
from collections import OrderedDict
sys.path.insert(0,'boptestGymService')
from boptestGymEnv import BoptestGymEnv
from boptestGymEnv import BoptestGymEnvRewardWeightCost, NormalizedActionWrapper, NormalizedObservationWrapper, SaveAndTestCallback,DiscretizedActionWrapper
from stable_baselines3 import SAC
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
from testing import utilities
import random
import os
from stable_baselines3 import SAC,PPO
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.logger import configure
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.env_util import make_vec_env

import requests
url = 'http://127.0.0.1:5000'
# url="https://api.boptest.net" 


In [2]:
import numpy as np
import requests

class BoptestGymEnvCustomReward(BoptestGymEnv):
    
    def calculate_objective(self, kpis):
        """
        Calculate the objective based on the given KPI values.
        """
        cost_tot = kpis.get('cost_tot')
        pdih_tot = kpis.get('pdih_tot') 
        pele_tot = kpis.get('pele_tot') 
        tdis_tot = kpis.get('tdis_tot') 
        idis_tot = kpis.get('idis_tot')

        objective = (
            cost_tot +
            4.25 * (pdih_tot + pele_tot) +
            0.005 * tdis_tot +
            0.0001 * idis_tot
        )

        return objective

    def get_reward(self):
        #use this one running on local server
        kpis = requests.get(f'{self.url}/kpi').json()['payload']

        current_objective = self.calculate_objective(kpis)
        # Compute reward
        
        reward = -(current_objective - self.objective_integrand)
        print(reward)
        self.objective_integrand = current_objective
        
        
        return reward


In [3]:
import os
import wandb
from stable_baselines3 import A2C
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
import torch
from wandb.integration.sb3 import WandbCallback

def train_A2C_with_callback(model_path=None,
                            log_dir=os.path.join('results', 'A2C_AD1', 'Model1'),
                            tensorboard_log=os.path.join('results', 'A2C_AD1', 'Model1')):
    """
    Method to train an A2C agent using a callback to save the model periodically and log to WandB.

    Parameters
    ----------
    model_path : str, optional
        Path to a pre-trained model. If provided, the model will be loaded and further trained.
    log_dir : str
        Directory where monitoring data and best-trained model are stored.
    tensorboard_log : str
        Path to directory to load tensorboard logs.
    """
    
    excluding_periods = []
    excluding_periods.append((173*24*3600, 266*24*3600))  # Summer period

    # Define environment configuration
    env_config = {
        "url": url,
        "actions": ['ahu_oveFanSup_u','oveValCoi_u', 'oveValRad_u'], 
        "observations": {
            'time': (0, 31536000),
            'reaTZon_y': (200., 400.),
            'reaCO2Zon_y': (200., 2000.),
            'weaSta_reaWeaTDryBul_y': (250., 350.),
            'PriceElectricPowerHighlyDynamic': (-0.4, 0.4),
            'LowerSetp[1]': (280., 310.),
            'UpperSetp[1]': (280., 310.),
            'UpperCO2[1]':(0,10000)
        },
        "predictive_period": 5 * 3600,
        "scenario": {'electricity_price': 'highly_dynamic'},
        "random_start_time": True,
        "max_episode_length": 3 * 24 * 3600,
        "step_period": 3600,
        "excluding_periods": excluding_periods
    }

    env = BoptestGymEnvCustomReward(**env_config)
    env = DiscretizedActionWrapper(env, n_bins_act=3)
    # env = NormalizedObservationWrapper(env)
    if torch.cuda.is_available():
        device = torch.device("cuda")
        torch.cuda.empty_cache()  # Clear GPU cache
        print("CUDA is available. Using GPU.")
    else:
        device = torch.device("cpu")
        print("CUDA is not available. Using CPU.")
    os.makedirs(log_dir, exist_ok=True)

    env = Monitor(env=env, filename=os.path.join(log_dir, 'monitor.csv'))

    # Initialize WandB and start a new run, with environment configuration added
    run = wandb.init(
        project="A2C-Training",  
        sync_tensorboard=True,  
        config={
            "algo": "A2C",
            "total_timesteps": 1000000,
            "gamma": 0.99,
            "learning_rate": 7e-4,  
            "n_steps": 5,  
            "vf_coef": 0.25,  
            "ent_coef": 0.01,  
            "env_config": env_config  
        },
        name="A2C_10m",
        id="new_a2c_id",
        resume="allow"
    )
    print(run.id)

    # Callback to save model every 1000 steps with a unique name
    # checkpoint_callback = CheckpointCallback(
    #     save_freq=2000, 
    #     save_path=log_dir,
    #     name_prefix="a2c_model",
    #     verbose=1
    # )
    callback = SaveAndTestCallback(env,check_freq=1000,save_freq=1000,log_dir=log_dir,test=False)

    # WandB callback to track training metrics
    wandb_callback = WandbCallback(
        model_save_path=os.path.join(log_dir, "wandb_models"),
        model_save_freq=1000,
        verbose=2
    )

    # Set up logger with TensorBoard logging continuation
    new_logger = configure(log_dir, ['stdout', 'csv', 'tensorboard'])

    # Load existing model if model_path is given, else create a new one
    if model_path and os.path.isfile(model_path):
        model = A2C.load(model_path, env=env, tensorboard_log=tensorboard_log)
        print(f"Loaded pre-trained model from {model_path}")
        model.set_logger(new_logger)
    else:
        model = A2C(
            'MlpPolicy', 
            env, 
            verbose=1, 
            gamma=0.99,
            learning_rate=7e-4,  
            n_steps=5,  
            vf_coef=0.25,  
            ent_coef=0.01,  
            tensorboard_log=tensorboard_log,
        )
        model.set_logger(new_logger)
        print("Starting training from scratch.")
     # Verify that the model is on the correct device
    print(f"Model is on device: {next(model.policy.parameters()).device}")

    # Train the agent with the callback
    model.learn(total_timesteps=int(1000000), callback=[callback, wandb_callback])
    
    # Finish WandB run
    run.finish()
    
    return env, model

if __name__ == "__main__":
    model_path = "results/A2C_AD1/Model1/model_830000.zip"
    env, model = train_A2C_with_callback(model_path=model_path)
    model.save(os.path.join('results', 'A2C', 'final_model'))
    print("Training completed. Model saved in results/A2C/")
    print("TensorBoard logs saved in results/A2C/")


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


CUDA is available. Using GPU.


[34m[1mwandb[0m: Currently logged in as: [33mdixitaniket1212[0m ([33mdixitaniket1212-coventry-university[0m). Use [1m`wandb login --relogin`[0m to force relogin


new_a2c_id
Logging to results/A2C_AD1/Model1
Wrapping the env in a DummyVecEnv.
Loaded pre-trained model from results/A2C_AD1/Model1/model_830000.zip
Model is on device: cuda:0
-0.005960694796939249
-2.917851868323995e-06
-0.29776358745428955
-0.00013425010192774645
-0.0037111222053750614
-1.494579553484865e-05
-0.009929937175887249
-1.4206589076415366e-05
-0.024743067139234243
-0.0035956769946755562
-0.00279001958408881
-0.0042223268223234856
-0.001783435962923574
-0.016864808832838918
-0.0025285321719195064
-0.00435832718903767
-0.0034500393030253984
-2.1468955817338653e-05
-0.006073608372274164
-0.0012964490353326585
-0.008059200953123458
-0.009081555466649571
-0.0021029881936647232
-3.2498711767248167e-06
-0.003862108363601846
-0.00981046378614936
-0.0012529141067852412
-0.004237057062934091
-0.007059946779445159
-0.01708014766957794
-0.02603401838262115
-0.0019148834851901997
-0.004450152739679802
-1.695642889659732e-05
-0.003962732365191235
-0.003568438742515223
-0.00366183131669