In [1]:
import wandb
from wandb.integration.sb3 import WandbCallback
from stable_baselines3 import DQN
import numpy as np
import sys
from collections import OrderedDict

sys.path.insert(0, "boptestGymService")
from boptestGymEnv import BoptestGymEnv
from boptestGymEnv import (
    BoptestGymEnvRewardWeightCost,
    NormalizedActionWrapper,
    NormalizedObservationWrapper,
    SaveAndTestCallback,
    DiscretizedActionWrapper,
)
from stable_baselines3 import SAC
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
from testing import utilities
import random
import os
from stable_baselines3 import SAC, PPO, TD3
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.logger import configure
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.env_util import make_vec_env
import torch
import requests


import os
import wandb
from stable_baselines3 import TD3  # Import TD3 instead of SAC
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.logger import configure
import torch
from wandb.integration.sb3 import WandbCallback

In [2]:
import numpy as np
import requests


class BoptestGymEnvCustomReward(BoptestGymEnv):

    def calculate_objective(self, kpis):
        """
        Calculate the objective based on the given KPI values.
        """
        cost_tot = kpis.get("cost_tot")
        pdih_tot = kpis.get("pdih_tot")
        pele_tot = kpis.get("pele_tot")
        tdis_tot = kpis.get("tdis_tot")
        idis_tot = kpis.get("idis_tot")

        objective = (
            cost_tot
            + 4.25 * (pdih_tot + pele_tot)
            + 0.005 * tdis_tot
            + 0.0001 * idis_tot
        )

        return objective

    def get_reward(self):

        kpis = requests.get(f"{self.url}/kpi").json()["payload"]

        current_objective = self.calculate_objective(kpis)
        # Compute reward

        reward = -(current_objective - self.objective_integrand)
        print("reward", reward)
        self.objective_integrand = current_objective

        return reward

In [None]:
import os
import sys
import yaml
import torch
import random

from testing import utilities
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.vec_env.vec_monitor import VecMonitor
from stable_baselines3.common.logger import configure
from boptestGymEnv import (
    BoptestGymEnv,
    NormalizedObservationWrapper,
    DiscretizedActionWrapper,
)


def generate_urls_from_yml(boptest_root_dir):
    """Method that returns as many urls for BOPTEST-Gym environments
    as those specified at the BOPTEST `docker-compose.yml` file.
    It assumes that `generateDockerComposeYml.py` has been called first.

    Parameters
    ----------
    boptest_root_dir: str
        String with directory to BOPTEST where the `docker-compose.yml`
        file should be located.

    Returns
    -------
    urls: list
        List of urls where BOPTEST test cases will be allocated.

    """
    docker_compose_loc = os.path.join(boptest_root_dir, "docker-compose.yml")

    # Read the docker-compose.yml file
    with open(docker_compose_loc, "r") as stream:
        try:
            docker_compose_data = yaml.safe_load(stream)
            services = docker_compose_data.get("services", {})

            # Extract the port and URL of the service
            urls = []
            for service, config in services.items():
                ports = config.get("ports", [])
                for port in ports:
                    # Extract host port
                    host_port = port.split(":")[1]
                    urls.append(f"http://127.0.0.1:{host_port}")

            print(urls)  # Print URLs

        except yaml.YAMLError as exc:
            print(exc)

    return urls


def make_env(url):
    """Function that instantiates the environment.
    Parameters
    ----------
    url: string
        Rest API url for communication with this environment.
    seed: integer
        Seed for random starting times of episodes in this environment.
    """

    def _init():
     
        env = BoptestGymEnvCustomReward(
            url=url,
            actions=["ahu_oveFanSup_u", "oveValCoi_u", "oveValRad_u"],
            observations={
                "time": (0, 604800),
                "reaTZon_y": (280.0, 310.0),
                "reaCO2Zon_y": (200.0, 2000.0),
                "PriceElectricPowerHighlyDynamic": (-0.4, 0.4),
                "LowerSetp[1]": (280.0, 310.0),
                "UpperSetp[1]": (280.0, 310.0),
            },
            scenario={"electricity_price": "dynamic"},
            predictive_period=4 * 3600,
            random_start_time=True,
            excluding_periods=[
                (22 * 24 * 3600, 265 * 24 * 3600),
                (290 * 24 * 3600, 365 * 24 * 3600),
            ],
            max_episode_length=3 * 3600,
            step_period=1800,
        )
        env = NormalizedObservationWrapper(
            env
        )  # Add observation normalization if needed
        env = DiscretizedActionWrapper(
            env, n_bins_act=10
        )  # Add action discretization if needed

        return env

    return _init


def train_DQN_vectorized(
    venv,
    log_dir=os.path.join('results', 'DQN_AD1', 'Model3'),
    tensorboard_log=os.path.join('results', 'DQN_AD1', 'Model3')
):
    """Method to train DQN agent using vectorized environment.

    Parameters
    ----------
    venv: stable_baselines3.common.vec_env.SubprocVecEnv
        vectorized environment to be learned.

    """

    # Create logging directory if not exists. Monitoring data and agent model will be stored here
    os.makedirs(log_dir, exist_ok=True)
    env_config = { "url": "url",
                "actions": ["ahu_oveFanSup_u", "oveValCoi_u", "oveValRad_u"],
                "observations": {
                    "time": [0, 604800],
                    "reaTZon_y": [280.0, 310.0],
                    "reaCO2Zon_y": [200.0, 2000.0],
                    "PriceElectricPowerHighlyDynamic": [-0.4, 0.4],
                    "LowerSetp[1]": [280.0, 310.0],
                    "UpperSetp[1]": [280.0, 310.0],
                },
                "scenario": {"electricity_price": "dynamic"},
                "predictive_period": 14400,
                "random_start_time": "true",
                "excluding_periods": [[1900800, 22896000], [25056000, 31536000]],
                "max_episode_length": 10800,
                "step_period": 1800,}
    # Modify the environment to include the callback
    venv = VecMonitor(venv=venv, filename=os.path.join(log_dir, "monitor.csv"))
    run = wandb.init(
        project="DQN",  # Replace with your project name
        sync_tensorboard=True,  # Auto-sync with TensorBoard
        config=env_config,
        name="DQN",
        id="4",
        resume="allow",

    )
    print(run.id)
    # Create the callback: evaluate with one episode after 100 steps for training. We keep it very short for testing.
    # When using multiple environments, each call to ``env.step()`` will effectively correspond to ``n_envs`` steps.
    # To account for that, you can use ``eval_freq = eval_freq/venv.num_envs``
    eval_freq = 1000
    eval_callback = EvalCallback(
        venv,
        best_model_save_path=log_dir,
        log_path=log_dir,
        eval_freq=int(eval_freq / venv.num_envs),
        n_eval_episodes=1,
        deterministic=True,
    )
    wandb_callback = WandbCallback(
        model_save_path=log_dir,
        model_save_freq=1000,
        verbose=2,
        
    )
    # Try to find CUDA core since it's optimized for parallel computing tasks
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Instantiate an RL agent with DQN
    # model = DQN(
    #     "MlpPolicy",
    #     venv,
    #     verbose=1,
    #     gamma=0.99,
    #     learning_rate=5e-4,
    #     batch_size=24,
    #     seed=123456,
    #     buffer_size=365 * 24,
    #     learning_starts=24,
    #     train_freq=1,
    #     exploration_initial_eps=1.0,
    #     exploration_final_eps=0.01,
    #     exploration_fraction=0.1,
    #     device=device,
    #     tensorboard_log=tensorboard_log,
    # )
    model=DQN.load("results/DQN_AD1/Model3/best_model.zip",env=venv,tensorboard_log=tensorboard_log)
    
    # Set up logger with TensorBoard logging continuation
    new_logger = configure(log_dir, ['stdout', 'csv', 'tensorboard'])
    model.set_logger(new_logger)

    # Main training loop
    model.learn(total_timesteps=5000000, callback=[eval_callback,wandb_callback])


if __name__ == "__main__":

    boptest_root = "/Users/aniketdixit/Desktop/ADRENALIN/HVAC_Local/boptestGymService"

    # Get the argument from command line when use Linux

    boptest_root_dir = boptest_root

    # Use URLs obtained from docker-compose.yml
    urls = generate_urls_from_yml(boptest_root_dir=boptest_root_dir)

    # Create BOPTEST-Gym environment replicas
    envs = [make_env(url) for url in urls]

    # Create a vectorized environment using SubprocVecEnv
    venv = SubprocVecEnv(envs)

    # Train vectorized environment
    train_DQN_vectorized(venv)