# Simple simulation without any actions taken

The goal of this file is to run a simple simulation with a plot of the temperatures inside and to calculate the total energy use for one month.

<br>

All the results can be found in the monitor.csv file. This file is under "examples" -> "env name" (in this case Eplus-demo-V1) -res and the new number after simulation (current is 27). Also progress.csv is available.

<br>

Questions:
-   Why does every month only last 23 days?
-   How to change the schedule. Now the temperature setpoint is 21,1C from 07:00 untill 12:00. The rest of the day it is 12,8C
    -   Want to change this to 07:00-19:00


<br>
How to deal with weekends/ non-working hours? Just set it to 12 degrees or completely off.




# Rule-based controller

In [1]:
from typing import List, Any, Sequence
from sinergym.utils.common import get_season_comfort_range
from datetime import datetime
import gym
import numpy as np
import sinergym
from sinergym.utils.wrappers import LoggerWrapper
from gym import Env
from math import exp
from typing import Any, Dict, List, Tuple, Union

  logger.warn(


In [5]:
"""Implementation of reward functions."""


class BaseReward(object):

    def __init__(self, env):
        """
        Base reward class.

        All reward functions should inherit from this class.

        Args:
            env (Env): Gym environment.
        """
        self.env = env

    def __call__(self):
        """Method for calculating the reward function."""
        raise NotImplementedError(
            "Reward class must have a `__call__` method.")


class MyLinearReward(BaseReward):

    def __init__(
        self,
        env: Env,
        temperature_variable: Union[str, list],
        energy_variable: str,
        range_comfort_winter: Tuple[int, int],
        range_comfort_summer: Tuple[int, int],
        summer_start: Tuple[int, int] = (6, 1),
        summer_final: Tuple[int, int] = (9, 30),
        energy_weight: float = 0.6,
        #energy_weight: float = 1,
        lambda_energy: float = 0.003,
        #lambda_energy: float = 1,
        lambda_temperature: float = 50
    ):
        """
        Linear reward function.

        It considers the energy consumption and the absolute difference to temperature comfort.

        .. math::
            R = - W * lambda_E * power - (1 - W) * lambda_T * (max(T - T_{low}, 0) + max(T_{up} - T, 0))

        Args:
            env (Env): Gym environment.
            temperature_variable (Union[str, list]): Name(s) of the temperature variable(s).
            energy_variable (str): Name of the energy/power variable.
            range_comfort_winter (Tuple[int,int]): Temperature comfort range for cold season. Depends on environment you are using.
            range_comfort_summer (Tuple[int,int]): Temperature comfort range for hot season. Depends on environment you are using.
            summer_start (Tuple[int,int]): Summer session tuple with month and day start. Defaults to (6,1).
            summer_final (Tuple[int,int]): Summer session tuple with month and day end. defaults to (9,30).
            energy_weight (float, optional): Weight given to the energy term. Defaults to 0.5.
            lambda_energy (float, optional): Constant for removing dimensions from power(1/W). Defaults to 1e-4.
            lambda_temperature (float, optional): Constant for removing dimensions from temperature(1/C). Defaults to 1.0.
        """

        super(MyLinearReward, self).__init__(env)

        # Name of the variables
        self.temp_name = temperature_variable
        self.energy_name = energy_variable

        # Reward parameters
        self.range_comfort_winter = range_comfort_winter
        self.range_comfort_summer = range_comfort_summer
        self.W_energy = energy_weight
        self.lambda_energy = lambda_energy
        self.lambda_temp = lambda_temperature

        # Summer period
        self.summer_start = summer_start  # (month,day)
        self.summer_final = summer_final  # (month,day)

    def __call__(self) -> Tuple[float, Dict[str, Any]]:
        """
        Calculate the reward function.

        Returns:
            Tuple[float, Dict[str, Any]]: Reward value and dictionary with their individual components.
        """
        # Current observation
        obs_dict = self.env.obs_dict.copy()

        # Energy term
        #reward_energy = - self.lambda_energy * obs_dict[self.energy_name]
        reward_energy = - (self.lambda_energy * obs_dict[self.energy_name])

        # Comfort
        comfort, temps = self._get_comfort(obs_dict)

        if comfort == 0:
            reward_comfort = 5
        else:
            reward_comfort = - self.lambda_temp * comfort
        # Weighted sum of both terms
        reward = self.W_energy * reward_energy + \
            (1.0 - self.W_energy) * reward_comfort

        reward_terms = {
            'reward_energy': reward_energy,
            'total_energy': obs_dict[self.energy_name],
            'reward_comfort': reward_comfort,
            'abs_comfort': comfort,
            'temperatures': temps
        }

        return reward, reward_terms

    def _get_comfort(self,
                     obs_dict: Dict[str,
                                    Any]) -> Tuple[float,
                                                   List[float]]:
        """Calculate the comfort term of the reward.

        Returns:
            Tuple[float, List[float]]: comfort penalty and List with temperatures used.
        """

        hour = obs_dict["hour"]
        month = obs_dict['month']
        day = obs_dict['day']
        year = obs_dict['year']
        current_dt = datetime(year, month, day)

        # Periods
        summer_start_date = datetime(
            year,
            self.summer_start[0],
            self.summer_start[1])
        summer_final_date = datetime(
            year,
            self.summer_final[0],
            self.summer_final[1])



        if current_dt >= summer_start_date and current_dt <= summer_final_date:
            if current_dt.weekday() >= 5 or hour not in range(8,19):
                temp_range = (15,30)
            else:
                temp_range = self.range_comfort_summer 
        else:
            if current_dt.weekday() >= 5 or hour not in range(8,19):
                temp_range = (15,30)
            else:
                temp_range = self.range_comfort_winter


        temps = [v for k, v in obs_dict.items() if k in self.temp_name]
        comfort = 0.0
        for T in temps:
            if T < temp_range[0] or T > temp_range[1]:
                comfort += min(abs(temp_range[0] - T), abs(T - temp_range[1]))
  
 

        return comfort, temps


class MyExpReward(MyLinearReward):

    def __init__(
        self,
        env: Env,
        temperature_variable: Union[str, list],
        energy_variable: str,
        range_comfort_winter: Tuple[int, int],
        range_comfort_summer: Tuple[int, int],
        summer_start: Tuple[int, int] = (6, 1),
        summer_final: Tuple[int, int] = (9, 30),
        #changes from 0.5 to 0.7
        energy_weight: float = 1,
        lambda_energy: float = 1e-4,
        #lambda_energy: float = 1,
        lambda_temperature: float = 1
    ):
        """
        Reward considering exponential absolute difference to temperature comfort.

        .. math::
            R = - W * lambda_E * power - (1 - W) * lambda_T * exp( (max(T - T_{low}, 0) + max(T_{up} - T, 0)) )

        Args:
            env (Env): Gym environment.
            temperature_variable (Union[str, list]): Name(s) of the temperature variable(s).
            energy_variable (str): Name of the energy/power variable.
            range_comfort_winter (Tuple[int,int]): Temperature comfort range for cold season. Depends on environment you are using.
            range_comfort_summer (Tuple[int,int]): Temperature comfort range for hot season. Depends on environment you are using.
            summer_start (Tuple[int,int]): Summer session tuple with month and day start. Defaults to (6,1).
            summer_final (Tuple[int,int]): Summer session tuple with month and day end. defaults to (9,30).
            energy_weight (float, optional): Weight given to the energy term. Defaults to 0.5.
            lambda_energy (float, optional): Constant for removing dimensions from power(1/W). Defaults to 1e-4.
            lambda_temperature (float, optional): Constant for removing dimensions from temperature(1/C). Defaults to 1.0.
        """

        super(MyExpReward, self).__init__(
            env,
            temperature_variable,
            energy_variable,
            range_comfort_winter,
            range_comfort_summer,
            summer_start,
            summer_final,
            energy_weight,
            lambda_energy,
            lambda_temperature
        )

    def _get_comfort(self,
                     obs_dict: Dict[str,
                                    Any]) -> Tuple[float,
                                                   List[float]]:
        """Calculate the comfort term of the reward.

        Returns:
            Tuple[float, List[float]]: comfort penalty and List with temperatures used.
        """

        hour = obs_dict["hour"]
        month = obs_dict['month']
        day = obs_dict['day']
        year = obs_dict['year']
        current_dt = datetime(year, month, day)

        # Periods
        summer_start_date = datetime(
            year,
            self.summer_start[0],
            self.summer_start[1])
        summer_final_date = datetime(
            year,
            self.summer_final[0],
            self.summer_final[1])

        # if current_dt >= summer_start_date and current_dt <= summer_final_date:
        #     temp_range = self.range_comfort_summer 
        # else:
        #     temp_range = self.range_comfort_winter
        if current_dt >= summer_start_date and current_dt <= summer_final_date:
            if current_dt.weekday() >= 5 or hour not in range(8,19):
                temp_range = (15,30)
            else:
                temp_range = self.range_comfort_summer 
        else:
            if current_dt.weekday() >= 5 or hour not in range(8,19):
                temp_range = (15,30)
            else:
                temp_range = self.range_comfort_winter


        temps = [v for k, v in obs_dict.items() if k in self.temp_name]
        comfort = 0.0
        for T in temps:
            if T < temp_range[0] or T > temp_range[1]:
                comfort += exp(min(abs(temp_range[0] - T),
                                   abs(T - temp_range[1])))

            # else:
            #     comfort -= 5

        return comfort, temps


class MyHourlyExpReward(MyExpReward):

    def __init__(
        self,
        env: Env,
        temperature_variable: Union[str, list],
        energy_variable: str,
        # range_comfort_winter: Tuple[int, int],
        # range_comfort_summer: Tuple[int, int],
        range_comfort_winter = (20,23),
        range_comfort_summer = (23,26),
        summer_start: Tuple[int, int] = (6, 1),
        summer_final: Tuple[int, int] = (9, 30),
        min_energy_weight: float = 0.3,
        #default energy lambda = 1
        lambda_energy: float = 2e-4,
        lambda_temperature: float = 2,
        range_comfort_hours: tuple = (8, 19)
    ):
        """
        Linear reward function with a time-dependent weight for consumption and energy terms.

        Args:
            env (Env): Gym environment.
            temperature_variable (Union[str, list]): Name(s) of the temperature variable(s).
            energy_variable (str): Name of the energy/power variable.
            range_comfort_winter (Tuple[int,int]): Temperature comfort range for cold season. Depends on environment you are using.
            range_comfort_summer (Tuple[int,int]): Temperature comfort range for hot season. Depends on environment you are using.
            summer_start (Tuple[int,int]): Summer session tuple with month and day start. Defaults to (6,1).
            summer_final (Tuple[int,int]): Summer session tuple with month and day end. defaults to (9,30).
            min_energy_weight (float, optional): Minimum weight given to the energy term. Defaults to 0.5.
            lambda_energy (float, optional): Constant for removing dimensions from power(1/W). Defaults to 1e-4.
            lambda_temperature (float, optional): Constant for removing dimensions from temperature(1/C). Defaults to 1.0.
            range_comfort_hours (tuple, optional): Hours where thermal comfort is considered. Defaults to (9, 19).
        """

        super(MyHourlyExpReward, self).__init__(
            env,
            temperature_variable,
            energy_variable,
            range_comfort_winter,
            range_comfort_summer,
            summer_start,
            summer_final,
            min_energy_weight,
            lambda_energy,
            lambda_temperature
        )



        # Reward parameters
        self.range_comfort_hours = range_comfort_hours

    def __call__(self) -> Tuple[float, Dict[str, Any]]:
        """Calculate the reward function.

        Returns:
            Tuple[float, Dict[str, Any]]: Reward and dict with reward terms.
            """
        # Current observation
        obs_dict = self.env.obs_dict.copy()

        # Energy term
        #reward_energy = - self.lambda_energy * obs_dict[self.energy_name]
        reward_energy = - self.lambda_energy * obs_dict['Facility Total HVAC Electricity Demand Rate(Whole Building)']
        # Comfort
        comfort, temps = self._get_comfort(obs_dict)

        if comfort == 0:
            reward_comfort = 0
        else:   
            reward_comfort = - self.lambda_temp * comfort

        # Determine energy weight depending on the hour
        hour = obs_dict['hour']
        if hour >= self.range_comfort_hours[0] and hour <= self.range_comfort_hours[1]:
            weight = self.W_energy
        else:
            weight = 1


        # Weighted sum of both terms
        reward = weight * reward_energy + (1.0 - weight) * reward_comfort

        reward_terms = {
            'reward_energy': reward_energy,
            'total_energy': obs_dict[self.energy_name],
            'reward_comfort': reward_comfort,
            'temperatures': temps
        }

        return reward, reward_terms

class MyHourlyLinearReward(MyLinearReward):

    def __init__(
        self,
        env: Env,
        temperature_variable: Union[str, list],
        energy_variable: str,
        range_comfort_winter: Tuple[int, int],
        range_comfort_summer: Tuple[int, int],
        summer_start: Tuple[int, int] = (6, 1),
        summer_final: Tuple[int, int] = (9, 30),
        min_energy_weight: float = 0.3,
        lambda_energy: float = 2e-4,
        lambda_temperature: float = 2,
        range_comfort_hours: tuple = (8, 19),
    ):
        """
        Linear reward function with a time-dependent weight for consumption and energy terms.

        Args:
            env (Env): Gym environment.
            temperature_variable (Union[str, list]): Name(s) of the temperature variable(s).
            energy_variable (str): Name of the energy/power variable.
            range_comfort_winter (Tuple[int,int]): Temperature comfort range for cold season. Depends on environment you are using.
            range_comfort_summer (Tuple[int,int]): Temperature comfort range for hot season. Depends on environment you are using.
            summer_start (Tuple[int,int]): Summer session tuple with month and day start. Defaults to (6,1).
            summer_final (Tuple[int,int]): Summer session tuple with month and day end. defaults to (9,30).
            min_energy_weight (float, optional): Minimum weight given to the energy term. Defaults to 0.5.
            lambda_energy (float, optional): Constant for removing dimensions from power(1/W). Defaults to 1e-4.
            lambda_temperature (float, optional): Constant for removing dimensions from temperature(1/C). Defaults to 1.0.
            range_comfort_hours (tuple, optional): Hours where thermal comfort is considered. Defaults to (9, 19).
        """

        super(MyHourlyLinearReward, self).__init__(
            env,
            temperature_variable,
            energy_variable,
            range_comfort_winter,
            range_comfort_summer,
            summer_start,
            summer_final,
            min_energy_weight,
            lambda_energy,
            lambda_temperature
        )

        # Reward parameters
        self.range_comfort_hours = range_comfort_hours

    def __call__(self) -> Tuple[float, Dict[str, Any]]:
        """Calculate the reward function.

        Returns:
            Tuple[float, Dict[str, Any]]: Reward and dict with reward terms.
            """
        # Current observation
        obs_dict = self.env.obs_dict.copy()

        # Energy term
        reward_energy = - self.lambda_energy * obs_dict[self.energy_name]

        # Comfort
        comfort, temps = self._get_comfort(obs_dict)

        if comfort == 0:
            reward_comfort = 0
        else:
            reward_comfort = - self.lambda_temp * (comfort)**2

        # Determine energy weight depending on the hour
        hour = obs_dict['hour']
        if hour >= self.range_comfort_hours[0] and hour <= self.range_comfort_hours[1]:
            weight = self.W_energy
        else:
            weight = 1


        # Weighted sum of both terms
        reward = weight * reward_energy + (1.0 - weight) * reward_comfort

        reward_terms = {
            'reward_energy': reward_energy,
            'total_energy': obs_dict[self.energy_name],
            'reward_comfort': reward_comfort,
            'temperatures': temps
        }

        return reward, reward_terms



In [6]:
from sinergym.utils.controllers import RBC5Zone

class MyRuleBasedController(RBC5Zone):

    def act(self, observation: List[Any]) -> Sequence[Any]:
        """Select action based on outdoor air drybulb temperature and daytime.

        Args:
            observation (List[Any]): Perceived observation.

        Returns:
            Sequence[Any]: Action chosen.
        """
        obs_dict = dict(zip(self.variables['observation'], observation))

        out_temp = obs_dict['Site Outdoor Air Drybulb Temperature(Environment)']

        begin_working_hour = 8
        end_working_hour = 19

        day = int(obs_dict['day'])
        month = int(obs_dict['month'])
        hour = int(obs_dict['hour'])
        year = int(obs_dict['year'])

        summer_start_date = datetime(year, 6, 1)
        summer_final_date = datetime(year, 9, 30)

        current_dt = datetime(year, month, day)

        if current_dt.weekday() >= 5 or hour not in range(begin_working_hour,end_working_hour):
            action = (18, 25)
        else:
            if current_dt >= summer_start_date and current_dt <= summer_final_date:
               action = (23,26)
            else:
                action = (20,23)


        season_comfort_range = get_season_comfort_range(1991,month, day)
  
        
        return action

In [7]:

experiment_date = datetime.today().strftime('%Y-%m-%d %H:%M')
#environment = 'Eplus-demo-v1'
environment  = "Eplus-5Zone-hot-continuous-v1"
#weather_file = "USA_NY_New.York-J.F.Kennedy.Intl.AP.744860_TMY3.epw"

#Granada = Amstedam
weather = "ESP_Granada.084190_SWEC.epw"

#choose the simulation period
begin_day = 1
begin_month = 1
begin_year = 2021
end_day = 1
end_month = 2
end_year = 2022

#need to change the schedule
#check for weekends
new_observation_variables=[
    'Site Outdoor Air Drybulb Temperature(Environment)',
    'Site Diffuse Solar Radiation Rate per Area(Environment)',
    'Zone Thermostat Heating Setpoint Temperature(SPACE1-1)',
    'Zone Thermostat Cooling Setpoint Temperature(SPACE1-1)',
    'Zone Air Temperature(PLENUM-1',
    'Zone Air Temperature(SPACE1-1)',
    'Zone Air Temperature(SPACE2-1)',
    'Zone Air Temperature(SPACE3-1)',
    'Zone Air Temperature(SPACE4-1',
    'Zone Air Temperature(SPACE4-1',
    'Zone People Occupant Count(SPACE1-1)',
    'Facility Total HVAC Electricity Demand Rate(Whole Building)']

new_observation_space = gym.spaces.Box(
    low=-5e6,
    high=5e6,
    shape=(len(new_observation_variables) + 4,),
    dtype=np.float32)


extra_params={'timesteps_per_hour' : 4,
              'runperiod' : (begin_day,begin_month,begin_year,end_day,end_month,end_year)}

env = gym.make(environment, 
                weather_file=weather,
                reward=MyHourlyExpReward,
                config_params = extra_params,
                )
                
env = LoggerWrapper(env)

# create rule-based controller
agent = MyRuleBasedController(env)

for i in range(1):
    obs = env.reset()
    rewards = []
    done = False
    current_month = 0
    while not done:
        action = agent.act(obs)
       # a = env.action_space.sample()
        obs, reward, done, info = env.step(action)
        rewards.append(reward)
        if info['month'] != current_month:  # display results every month
            current_month = info['month']
            print('Reward: ', sum(rewards), info)

env.close()

[2023-02-09 13:12:24,362] EPLUS_ENV_5Zone-mixed-continuous-v1_MainThread_ROOT INFO:Updating idf ExternalInterface object if it is not present...
[2023-02-09 13:12:24,362] EPLUS_ENV_5Zone-mixed-continuous-v1_MainThread_ROOT INFO:Updating idf ExternalInterface object if it is not present...
[2023-02-09 13:12:24,370] EPLUS_ENV_5Zone-mixed-continuous-v1_MainThread_ROOT INFO:Updating idf Site:Location and SizingPeriod:DesignDay(s) to weather and ddy file...
[2023-02-09 13:12:24,370] EPLUS_ENV_5Zone-mixed-continuous-v1_MainThread_ROOT INFO:Updating idf Site:Location and SizingPeriod:DesignDay(s) to weather and ddy file...
[2023-02-09 13:12:24,386] EPLUS_ENV_5Zone-mixed-continuous-v1_MainThread_ROOT INFO:Updating idf OutPut:Variable and variables XML tree model for BVCTB connection.
[2023-02-09 13:12:24,386] EPLUS_ENV_5Zone-mixed-continuous-v1_MainThread_ROOT INFO:Updating idf OutPut:Variable and variables XML tree model for BVCTB connection.
[2023-02-09 13:12:24,400] EPLUS_ENV_5Zone-mixed-co

  logger.warn(


[2023-02-09 13:12:24,632] EPLUS_ENV_5Zone-mixed-continuous-v1_MainThread_ROOT INFO:EnergyPlus working directory is in /workspaces/sinergym/examples/Eplus-env-5Zone-mixed-continuous-v1-res4/Eplus-env-sub_run1
[2023-02-09 13:12:24,632] EPLUS_ENV_5Zone-mixed-continuous-v1_MainThread_ROOT INFO:EnergyPlus working directory is in /workspaces/sinergym/examples/Eplus-env-5Zone-mixed-continuous-v1-res4/Eplus-env-sub_run1
Reward:  -0.12794805467167467 {'timestep': 1, 'time_elapsed': 900, 'year': 2021, 'month': 1, 'day': 1, 'hour': 0, 'total_power': 639.7402733583733, 'total_power_no_units': -0.12794805467167467, 'comfort_penalty': 0, 'abs_comfort': None, 'temperatures': [19.59610779743866], 'out_temperature': 5.275, 'action_': [18, 25]}
Reward:  -1731.6916554342445 {'timestep': 2976, 'time_elapsed': 2678400, 'year': 2021, 'month': 2, 'day': 1, 'hour': 0, 'total_power': 381.2043095500852, 'total_power_no_units': -0.07624086191001704, 'comfort_penalty': 0, 'abs_comfort': None, 'temperatures': [18.

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)


[2023-02-09 13:17:15,808] EPLUS_ENV_5Zone-mixed-continuous-v1_MainThread_ROOT INFO:EnergyPlus simulation closed successfully. 
[2023-02-09 13:17:15,808] EPLUS_ENV_5Zone-mixed-continuous-v1_MainThread_ROOT INFO:EnergyPlus simulation closed successfully. 


In [11]:
if 8 in range(8,19):
    print("ja")
else:
    print("nee")

ja
