## This notebook covers the CityLearn class

This first part with the import statements and the RBC_Agent, auto_size function and building_loader function can be skipped as these were discussed during the previous notebook

In [97]:
import gym
from gym.utils import seeding
import numpy as np
import pandas as pd
import json
from gym import spaces
from energy_models import HeatPump, ElectricHeater, EnergyStorage, Building
from reward_function import reward_function_sa, reward_function_ma
from pathlib import Path
from citylearn import  CityLearn
gym.logger.set_level(40)

# To get rid of the VisibleDeprecationWarning
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning) 

# Reference Rule-based controller. Used as a baseline to calculate the costs in CityLearn
# It requires, at least, the hour of the day as input state
class RBC_Agent:
    def __init__(self, actions_spaces):
        self.actions_spaces = actions_spaces
        self.reset_action_tracker()
        
    def reset_action_tracker(self):
        self.action_tracker = []
        
    def select_action(self, states):
        """
        This method is called every hour of the day such that each hour, the action is determined
        """
        hour_day = states[0]
        
        # Daytime: release stored energy when hour of the day is between 9 and 21 (13 hours)
        # a is a list with seperate action lists per building (initialized with zeros)
        a = [[0.0 for _ in range(len(self.actions_spaces[i].sample()))] for i in range(len(self.actions_spaces))]
        if hour_day >= 9 and hour_day <= 21:
            # the list a is filled with values of -0.08 when the hour_day is during day time
            a = [[-0.08 for _ in range(len(self.actions_spaces[i].sample()))] for i in range(len(self.actions_spaces))]
        
        # Early nightime: store DHW and/or cooling energy when hour of the day is past 21h and before 9h (11 hours)
        if (hour_day >= 1 and hour_day <= 8) or (hour_day >= 22 and hour_day <= 24):
            a = []
            for i in range(len(self.actions_spaces)):
                if len(self.actions_spaces[i].sample()) == 2:
                    a.append([0.091, 0.091])
                else:
                    a.append([0.091])
   
        self.action_tracker.append(a)
        
        return np.array(a)

def auto_size(buildings):
    for building in buildings.values():
        
        # Autosize guarantees that the DHW device is large enough to always satisfy the maximum DHW demand
        if building.dhw_heating_device.nominal_power == 'autosize':
            
            # If the DHW device is a HeatPump
            if isinstance(building.dhw_heating_device, HeatPump):
                
                #We assume that the heat pump is always large enough to meet the highest heating or cooling demand of the building
                building.dhw_heating_device.nominal_power = np.array(building.sim_results['dhw_demand']/building.dhw_heating_device.cop_heating).max()
                
            # If the device is an electric heater
            elif isinstance(building.dhw_heating_device, ElectricHeater):
                building.dhw_heating_device.nominal_power = (np.array(building.sim_results['dhw_demand'])/building.dhw_heating_device.efficiency).max()
        
        # Autosize guarantees that the cooling device device is large enough to always satisfy the maximum DHW demand
        if building.cooling_device.nominal_power == 'autosize':

            building.cooling_device.nominal_power = (np.array(building.sim_results['cooling_demand'])/building.cooling_device.cop_cooling).max()
        
        # Defining the capacity of the storage devices as a number of times the maximum demand
        building.dhw_storage.capacity = max(building.sim_results['dhw_demand'])*building.dhw_storage.capacity
        building.cooling_storage.capacity = max(building.sim_results['cooling_demand'])*building.cooling_storage.capacity
        
        # Done in order to avoid dividing by 0 if the capacity is 0
        if building.dhw_storage.capacity <= 0.00001:
            building.dhw_storage.capacity = 0.00001
        if building.cooling_storage.capacity <= 0.00001:
            building.cooling_storage.capacity = 0.00001
        
        
def building_loader(data_path, building_attributes, weather_file, solar_profile, building_ids, buildings_states_actions, save_memory = True):
    with open(building_attributes) as json_file:
        data = json.load(json_file)

    buildings, observation_spaces, action_spaces = {},[],[]
    s_low_central_agent, s_high_central_agent, appended_states = [], [], []
    a_low_central_agent, a_high_central_agent, appended_actions = [], [], []
    for uid, attributes in zip(data, data.values()):
        if uid in building_ids:
            heat_pump = HeatPump(nominal_power = attributes['Heat_Pump']['nominal_power'], 
                                 eta_tech = attributes['Heat_Pump']['technical_efficiency'], 
                                 t_target_heating = attributes['Heat_Pump']['t_target_heating'], 
                                 t_target_cooling = attributes['Heat_Pump']['t_target_cooling'], save_memory = save_memory)

            electric_heater = ElectricHeater(nominal_power = attributes['Electric_Water_Heater']['nominal_power'], 
                                             efficiency = attributes['Electric_Water_Heater']['efficiency'], save_memory = save_memory)

            chilled_water_tank = EnergyStorage(capacity = attributes['Chilled_Water_Tank']['capacity'],
                                               loss_coeff = attributes['Chilled_Water_Tank']['loss_coefficient'], save_memory = save_memory)

            dhw_tank = EnergyStorage(capacity = attributes['DHW_Tank']['capacity'],
                                     loss_coeff = attributes['DHW_Tank']['loss_coefficient'], save_memory = save_memory)

            building = Building(buildingId = uid, dhw_storage = dhw_tank, cooling_storage = chilled_water_tank, dhw_heating_device = electric_heater, cooling_device = heat_pump, save_memory = save_memory)

            data_file = str(uid) + '.csv'
            simulation_data = data_path / data_file
            with open(simulation_data) as csv_file:
                data = pd.read_csv(csv_file)

            building.sim_results['cooling_demand'] = list(data['Cooling Load [kWh]'])
            building.sim_results['dhw_demand'] = list(data['DHW Heating [kWh]'])
            building.sim_results['non_shiftable_load'] = list(data['Equipment Electric Power [kWh]'])
            building.sim_results['month'] = list(data['Month'])
            building.sim_results['day'] = list(data['Day Type'])
            building.sim_results['hour'] = list(data['Hour'])
            building.sim_results['daylight_savings_status'] = list(data['Daylight Savings Status'])
            building.sim_results['t_in'] = list(data['Indoor Temperature [C]'])
            building.sim_results['avg_unmet_setpoint'] = list(data['Average Unmet Cooling Setpoint Difference [C]'])
            building.sim_results['rh_in'] = list(data['Indoor Relative Humidity [%]'])
            
            with open(weather_file) as csv_file:
                weather_data = pd.read_csv(csv_file)
                
            building.sim_results['t_out'] = list(weather_data['Outdoor Drybulb Temperature [C]'])
            building.sim_results['rh_out'] = list(weather_data['Outdoor Relative Humidity [%]'])
            building.sim_results['diffuse_solar_rad'] = list(weather_data['Diffuse Solar Radiation [W/m2]'])
            building.sim_results['direct_solar_rad'] = list(weather_data['Direct Solar Radiation [W/m2]'])
            
            # Reading weather forecasts
            building.sim_results['t_out_pred_6h'] = list(weather_data['6h Prediction Outdoor Drybulb Temperature [C]'])
            building.sim_results['t_out_pred_12h'] = list(weather_data['12h Prediction Outdoor Drybulb Temperature [C]'])
            building.sim_results['t_out_pred_24h'] = list(weather_data['24h Prediction Outdoor Drybulb Temperature [C]'])
            
            building.sim_results['rh_out_pred_6h'] = list(weather_data['6h Prediction Outdoor Relative Humidity [%]'])
            building.sim_results['rh_out_pred_12h'] = list(weather_data['12h Prediction Outdoor Relative Humidity [%]'])
            building.sim_results['rh_out_pred_24h'] = list(weather_data['24h Prediction Outdoor Relative Humidity [%]'])
            
            building.sim_results['diffuse_solar_rad_pred_6h'] = list(weather_data['6h Prediction Diffuse Solar Radiation [W/m2]'])
            building.sim_results['diffuse_solar_rad_pred_12h'] = list(weather_data['12h Prediction Diffuse Solar Radiation [W/m2]'])
            building.sim_results['diffuse_solar_rad_pred_24h'] = list(weather_data['24h Prediction Diffuse Solar Radiation [W/m2]'])
            
            building.sim_results['direct_solar_rad_pred_6h'] = list(weather_data['6h Prediction Direct Solar Radiation [W/m2]'])
            building.sim_results['direct_solar_rad_pred_12h'] = list(weather_data['12h Prediction Direct Solar Radiation [W/m2]'])
            building.sim_results['direct_solar_rad_pred_24h'] = list(weather_data['24h Prediction Direct Solar Radiation [W/m2]'])
            
            # Reading the building attributes
            building.building_type = attributes['Building_Type']
            building.climate_zone = attributes['Climate_Zone']
            building.solar_power_capacity = attributes['Solar_Power_Installed(kW)']

            with open(solar_profile) as csv_file:
                data = pd.read_csv(csv_file)

            building.sim_results['solar_gen'] = list(attributes['Solar_Power_Installed(kW)']*data['Hourly Data: AC inverter power (W)']/1000)
            
            # Finding the max and min possible values of all the states, which can then be used by the RL agent to scale the states and train any function approximators more effectively
            s_low, s_high = [], []
            for state_name, value in zip(buildings_states_actions[uid]['states'], buildings_states_actions[uid]['states'].values()):
                if value == True:
                    if state_name == "net_electricity_consumption":
                        # lower and upper bounds of net electricity consumption are rough estimates and may not be completely accurate. Scaling this state-variable using these bounds may result in normalized values above 1 or below 0.
                        _net_elec_cons_upper_bound = max(np.array(building.sim_results['non_shiftable_load']) - np.array(building.sim_results['solar_gen']) + np.array(building.sim_results['dhw_demand'])/.8 + np.array(building.sim_results['cooling_demand']) + building.dhw_storage.capacity/.8 + building.cooling_storage.capacity/2)
                        s_low.append(0.)
                        s_high.append(_net_elec_cons_upper_bound)
                        s_low_central_agent.append(0.)
                        s_high_central_agent.append(_net_elec_cons_upper_bound)
                        
                    elif state_name != 'cooling_storage_soc' and state_name != 'dhw_storage_soc':
                        s_low.append(min(building.sim_results[state_name]))
                        s_high.append(max(building.sim_results[state_name]))
                        
                        # Create boundaries of the observation space of a centralized agent (if a central agent is being used instead of decentralized ones). We include all the weather variables used as states, and use the list appended_states to make sure we don't include any repeated states (i.e. weather variables measured by different buildings)
                        if state_name in ['t_in', 'avg_unmet_setpoint', 'rh_in', 'non_shiftable_load', 'solar_gen']:
                            s_low_central_agent.append(min(building.sim_results[state_name]))
                            s_high_central_agent.append(max(building.sim_results[state_name]))
                            
                        elif state_name not in appended_states:
                            s_low_central_agent.append(min(building.sim_results[state_name]))
                            s_high_central_agent.append(max(building.sim_results[state_name]))
                            appended_states.append(state_name)
                    else:
                        s_low.append(0.0)
                        s_high.append(1.0)
                        s_low_central_agent.append(0.0)
                        s_high_central_agent.append(1.0)
            
            '''The energy storage (tank) capacity indicates how many times bigger the tank is compared to the maximum hourly energy demand of the building (cooling or DHW respectively), which sets a lower bound for the action of 1/tank_capacity, as the energy storage device can't provide the building with more energy than it will ever need for a given hour. The heat pump is sized using approximately the maximum hourly energy demand of the building (after accounting for the COP, see function autosize). Therefore, we make the fair assumption that the action also has an upper bound equal to 1/tank_capacity. This boundaries should speed up the learning process of the agents and make them more stable rather than if we just set them to -1 and 1. I.e. if Chilled_Water_Tank.Capacity is 3 (3 times the max. hourly demand of the building in the entire year), its actions will be bounded between -1/3 and 1/3'''
            a_low, a_high = [], []    
            for action_name, value in zip(buildings_states_actions[uid]['actions'], buildings_states_actions[uid]['actions'].values()):
                if value == True:
                    if action_name =='cooling_storage':
                        
                        # Avoid division by 0
                        if attributes['Chilled_Water_Tank']['capacity'] > 0.000001:                            
                            a_low.append(max(-1.0/attributes['Chilled_Water_Tank']['capacity'], -1.0))
                            a_high.append(min(1.0/attributes['Chilled_Water_Tank']['capacity'], 1.0))
                            a_low_central_agent.append(max(-1.0/attributes['Chilled_Water_Tank']['capacity'], -1.0))
                            a_high_central_agent.append(min(1.0/attributes['Chilled_Water_Tank']['capacity'], 1.0))
                        else:
                            a_low.append(-1.0)
                            a_high.append(1.0)
                            a_low_central_agent.append(-1.0)
                            a_high_central_agent.append(1.0)
                    else:
                        if attributes['DHW_Tank']['capacity'] > 0.000001:
                            a_low.append(max(-1.0/attributes['DHW_Tank']['capacity'], -1.0))
                            a_high.append(min(1.0/attributes['DHW_Tank']['capacity'], 1.0))
                            a_low_central_agent.append(max(-1.0/attributes['DHW_Tank']['capacity'], -1.0))
                            a_high_central_agent.append(min(1.0/attributes['DHW_Tank']['capacity'], 1.0))
                        else:
                            a_low.append(-1.0)
                            a_high.append(1.0)
                            a_low_central_agent.append(-1.0)
                            a_high_central_agent.append(1.0)
                        
            building.set_state_space(np.array(s_high), np.array(s_low))
            building.set_action_space(np.array(a_high), np.array(a_low))
            
            observation_spaces.append(building.observation_space)
            action_spaces.append(building.action_space)
            
            buildings[uid] = building
    
    observation_space_central_agent = spaces.Box(low=np.float32(np.array(s_low_central_agent)), high=np.float32(np.array(s_high_central_agent)), dtype=np.float32)
    action_space_central_agent = spaces.Box(low=np.float32(np.array(a_low_central_agent)), high=np.float32(np.array(a_high_central_agent)), dtype=np.float32)
        
    for building in buildings.values():

        # If the DHW device is a HeatPump
        if isinstance(building.dhw_heating_device, HeatPump):
                
            # Calculating COPs of the heat pumps for every hour
            building.dhw_heating_device.cop_heating = building.dhw_heating_device.eta_tech*(building.dhw_heating_device.t_target_heating + 273.15)/(building.dhw_heating_device.t_target_heating - weather_data['Outdoor Drybulb Temperature [C]'])
            building.dhw_heating_device.cop_heating[building.dhw_heating_device.cop_heating < 0] = 20.0
            building.dhw_heating_device.cop_heating[building.dhw_heating_device.cop_heating > 20] = 20.0
            building.dhw_heating_device.cop_heating = building.dhw_heating_device.cop_heating.to_numpy()

        building.cooling_device.cop_cooling = building.cooling_device.eta_tech*(building.cooling_device.t_target_cooling + 273.15)/(weather_data['Outdoor Drybulb Temperature [C]'] - building.cooling_device.t_target_cooling)
        building.cooling_device.cop_cooling[building.cooling_device.cop_cooling < 0] = 20.0
        building.cooling_device.cop_cooling[building.cooling_device.cop_cooling > 20] = 20.0
        building.cooling_device.cop_cooling = building.cooling_device.cop_cooling.to_numpy()
        
        building.reset()
        
    auto_size(buildings)

    return buildings, observation_spaces, action_spaces, observation_space_central_agent, action_space_central_agent


### 1. Input attributes to the CityLearn class

In [98]:
# Load environment
climate_zone = 1
data_path = Path("data/Climate_Zone_"+str(climate_zone))
building_attributes = data_path / 'building_attributes.json'
weather_file = data_path / 'weather_data.csv'
solar_profile = data_path / 'solar_generation_1kW.csv'
buildings_states_actions_json = 'buildings_state_action_space.json'
building_ids = ["Building_1","Building_2","Building_3","Building_4","Building_5","Building_6","Building_7","Building_8","Building_9"]
cost_function = ['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption','quadratic']
simulation_period = (0,8759)
central_agent = False # Decide whether or not there is a central single agent or there are decentralized, multi agents
verbose = 0

### 2. Init function
Fill all the internal variables with their corresponding values from the input variables and instantiate some new variables as well which will be needed later on

In [99]:
with open(buildings_states_actions_json) as json_file:
    buildings_states_actions = json.load(json_file)
        
buildings_states_actions_filename = buildings_states_actions_json
buildings_net_electricity_demand = []
building_attributes = building_attributes
solar_profile = solar_profile
building_ids = building_ids
cost_function = cost_function
cost_rbc = None
data_path = data_path
weather_file = weather_file
central_agent = central_agent
loss = []
verbose = verbose

buildings, observation_spaces, action_spaces, observation_space, action_space = building_loader(data_path, building_attributes, weather_file, solar_profile, building_ids, buildings_states_actions)

simulation_period = simulation_period
uid = None
n_buildings = len([i for i in buildings])
#reset() #this resets variables and makes that the variable hour is initialized

Skip this function get_building_information() at the moment, but this one is used in the reset() function below.
Actually this function also does not need any input variables, but here I had to give it the buildings variable because we're not able to just pass the self parameter as were not working with classes.

In [100]:
def get_building_information(buildings):
    """
    Returns a dictionary with a dictionary per building with info regarding its annual electricity demand, annual
    DHW demand, annual Cooling demand and correlations between the buildings.
    """
    np.seterr(divide='ignore', invalid='ignore')
    
    # Calculates the annual DHW demand, Annual Cooling Demand, Annual Electricity Demand
    building_info = {}
    for uid, building in buildings.items():
        building_info[uid] = {}
        building_info[uid]['building_type'] = building.building_type
        building_info[uid]['climate_zone'] = building.climate_zone
        building_info[uid]['solar_power_capacity (kW)'] = round(building.solar_power_capacity, 3)
        building_info[uid]['Annual_DHW_demand (kWh)'] = round(sum(building.sim_results['dhw_demand']), 3)
        building_info[uid]['Annual_cooling_demand (kWh)'] = round(sum(building.sim_results['cooling_demand']), 3)
        building_info[uid]['Annual_nonshiftable_electrical_demand (kWh)'] = round(sum(building.sim_results['non_shiftable_load']), 3)
        
        building_info[uid]['Correlations_DHW'] = {}
        building_info[uid]['Correlations_cooling_demand'] = {}
        building_info[uid]['Correlations_non_shiftable_load'] = {}
        
        # Calculates the correlation coefficient between the DHW_demand, cooling_demand and non_shiftable_load
        # Gives an indication on how similar buildings are
        for uid_corr, building_corr in buildings.items():
            if uid_corr != uid:
                building_info[uid]['Correlations_DHW'][uid_corr] = round((np.corrcoef(np.array(building.sim_results['dhw_demand']), np.array(building_corr.sim_results['dhw_demand'])))[0][1], 3)
                building_info[uid]['Correlations_cooling_demand'][uid_corr] = round((np.corrcoef(np.array(building.sim_results['cooling_demand']), np.array(building_corr.sim_results['cooling_demand'])))[0][1], 3)
                building_info[uid]['Correlations_non_shiftable_load'][uid_corr] = round((np.corrcoef(np.array(building.sim_results['non_shiftable_load']), np.array(building_corr.sim_results['non_shiftable_load'])))[0][1], 3)

    # returns a dictionary with a dictionary per building with info regarding its annual consumption, generation and correlations
    return building_info

### 3. reset()
As we saw in the initialization function, at the end, the reset() function is called. Now what does this reset function exactly do? Let's have a look at it below:
1. Initialization of the hour variable
2. Function next_hour() is called which sets the internal variable time_step to the hour
3. Instantiate all the variables which relate to the electric consumption and generation
4. Calculate the inital state for each building. 
    * When working with a central single agent (central_agent = True), this means that we just get one array which consists of all the states of the different buildings, but appended to one another. So when having 3 buildings, this would look as one array(states_buildings).
    * When working with multi agents (central_agent = False) the output is an array of states which has a state for each building separately so when having 3 buildings, this would look as array(states_building_1, states_building_2, states_building_3)
5. Return the corresponding initial state

In [101]:
#1) Initialization of the hour variable
#hour is an iterator object which can be iterated through by just calling next(hour) and this returns the next hour
#so the first time we call this function, it'll return 0, the next time 1 etc.
hour = iter(np.array(range(simulation_period[0], simulation_period[1] + 1)))  

#2) The function next_hour() is called which sets the internal variable time_step to the hour
# as you work with self. in classes, we have to write the function in full because otherwise the variable is not instantiated
# next_hour()
time_step = next(hour)
for building in buildings.values():
    time_step = time_step

#3) instantiate the following variables which will be used to store all the electric consumptions
net_electric_consumption = []
net_electric_consumption_no_storage = []
net_electric_consumption_no_pv_no_storage = []
electric_consumption_dhw_storage = []
electric_consumption_cooling_storage = []
electric_consumption_dhw = []
electric_consumption_cooling = []
electric_consumption_appliances = []
electric_generation = []

cumulated_reward_episode = 0

#4) Calculate the inital state for each building
# Use a central, single agent
if central_agent:
    s, s_appended = [], [] #two lists which keep track of the current initial state s and a list with the common state variables
    # which are the same for all the buildings such as month, day, hour, solar radiation..
    
    # Loop over the different buildings
    for uid, building in buildings.items():
        building.reset() # resets the building attributes, but also calculates the net_electricity_consumption at the start
        
        # Loop over the different state names and their corresponding values True/False (whether or not the building has it)
        for state_name, value in buildings_states_actions[uid]['states'].items():
            if state_name not in s_appended:
                if value == True:
                    #Building specific variables (t_in, avg_unmet_setpoint, rh_in, non_shiftable_load, solar_gen)
                    if state_name in ['t_in', 'avg_unmet_setpoint', 'rh_in', 'non_shiftable_load', 'solar_gen']:
                        s.append(building.sim_results[state_name][time_step])
                    #Building specific variable net_electricity_consumption
                    elif state_name == 'net_electricity_consumption':
                        s.append(building.current_net_electricity_demand)
                    #Most of the variables that are left (except cooling storage soc and dhw storage soc) are the 
                    #'general variables' and these will only be added once to the variable s (state) 
                    elif state_name != 'cooling_storage_soc' and state_name != 'dhw_storage_soc':
                        s.append(building.sim_results[state_name][time_step])
                        s_appended.append(state_name)
                        
                    # The initial cooling and dhw storage soc is set to 0
                    elif state_name == 'cooling_storage_soc':
                        s.append(0.0)
                    elif state_name == 'dhw_storage_soc':
                        s.append(0.0)
    # This is one array of 81 values (the general weather data which is the same for all buildings and then all the building 
    # specific data such as solar_gen, non_shiftable_load, t_in, ..)
    state = np.array(s, dtype="object")

# Use decentralized, multi agents
else:
    # reward function is an object of the class reward_function_ma (see reward_function.py file) and takes as input
    # the number of agents (number of buildings) and the building information (slight adaption of the function)
    reward_function = reward_function_ma(len(building_ids), get_building_information(buildings))

    state = []
    for uid, building in buildings.items():
        building.reset()
        s = []
        # This zip function is just the same as above in which we call the .items() method
        for state_name, value in zip(buildings_states_actions[uid]['states'], buildings_states_actions[uid]['states'].values()):
            if value == True:
                if state_name == 'net_electricity_consumption':
                    s.append(building.current_net_electricity_demand)
                elif state_name != 'cooling_storage_soc' and state_name != 'dhw_storage_soc':
                    s.append(building.sim_results[state_name][time_step])
                elif state_name == 'cooling_storage_soc':
                    s.append(0.0)
                elif state_name == 'dhw_storage_soc':
                    s.append(0.0)

        state.append(np.array(s, dtype=np.float32))

    state = np.array(state, dtype="object")

# the _get_obj() function is called which basically just returns the state variable
# _get_obj()

Do some print statements to check how everything looks like

In [102]:
print(len(state))

9


In [103]:
print(state[0])

[  1.     8.     1.    17.81  16.19  25.29  18.31  68.12  76.47  44.1
  67.32   0.     0.   171.45   0.     0.     0.   577.41   0.    23.77
  42.87   9.89   0.     0.     0.     9.89]


In [104]:
print(buildings['Building_1'].action_space.low)
print(buildings['Building_1'].action_space.high)
print(buildings['Building_1'].observation_space.low)
print(buildings['Building_1'].observation_space.high)

[-0.33333334 -0.33333334]
[0.33333334 0.33333334]
[ 1.    1.    1.   -1.37 -1.55 -1.77 -2.2  29.75 29.57 29.39 28.26  0.
  0.    0.    0.    0.    0.    0.    0.   17.97 21.13  7.44  0.    0.
  0.    0.  ]
[1.2000000e+01 8.0000000e+00 2.4000000e+01 3.5540001e+01 3.5650002e+01
 3.5770000e+01 3.6110001e+01 1.0000000e+02 1.0000000e+02 1.0000000e+02
 1.0000000e+02 4.9356000e+02 4.9714001e+02 5.0534000e+02 5.2245001e+02
 1.0111200e+03 1.0165400e+03 1.0305100e+03 1.0584700e+03 2.6340000e+01
 6.7459999e+01 7.0910004e+01 9.9675598e+01 1.0000000e+00 1.0000000e+00
 3.2535483e+02]


### 4. get_baseline_cost() method
This method computes the costs for the Rule-based controller, which are used to normalized the actual costs.
Costs which are calculated relate to the following ones:
* Ramping: sum(|e(t)-e(t-1)|), where e is the net non-negative electricity consumption every time-step
* 1-load factor: load factor is the average net electricity load divided by the maximum electricity load.
* Average daily peak: Average of all the daily peaks of the 365 day of the year. 
* Peak demand: the max peak demand over all days (the whole year)
* Net electricity consumption: total electricity consumption (without negative values)
* Quadratic: quadratic version of total electricity consumption (not used by CityLearn challenge)

In [105]:
def get_baseline_cost():
        
    # Computes the costs for the Rule-based controller, which are used to normalized the actual costs.
    cost = {}
    # sum(|e(t)-e(t-1)|), where e is the net non-negative electricity consumption every time-step
    # the first element of this list should be dropped as it doesn't represent anything?
    if 'ramping' in cost_function:
        cost['ramping'] = np.abs((net_electric_consumption - np.roll(net_electric_consumption,1))[1:]).sum()
    
    # load factor is the average net electricity load divided by the maximum electricity load.
    # Finds the load factor for every month (average monthly demand divided by its maximum peak), 
    # and averages all the load factors across the 12 months. The metric is one minus the load factor.
    if '1-load_factor' in cost_function:
        cost['1-load_factor'] = np.mean([1 - np.mean(net_electric_consumption[i:i+int(8760/12)]) \
                                         / np.max(net_electric_consumption[i:i+int(8760/12)]) \
                                         for i in range(0, len(net_electric_consumption), int(8760/12))])
    
    # Average of all the daily peaks of the 365 day of the year. 
    # The peaks are calculated using the net energy demand of the whole district of buildings.
    if 'average_daily_peak' in cost_function:
        cost['average_daily_peak'] = np.mean([net_electric_consumption[i:i+24].max() for i in range(0, len(net_electric_consumption), 24)])
    
    # the max peak demand over all days (the whole year)
    if 'peak_demand' in cost_function:
        cost['peak_demand'] = net_electric_consumption.max()
    
    # total electricity consumption (without negative values)
    # Positive net electricity consumption for the whole district. It is clipped at a min. 
    # value of 0 because the objective is to minimize the energy consumed in the district, 
    # not to profit from the excess generation. (Island operation is therefore incentivized)
    if 'net_electricity_consumption' in cost_function:
        cost['net_electricity_consumption'] = net_electric_consumption.clip(min=0).sum()
    
    # quadratic version of total electricity consumption (without negative values) (not used for the challenge)
    if 'quadratic' in cost_function:
        cost['quadratic'] = (net_electric_consumption.clip(min=0)**2).sum()

    return cost

### 5. cost() method
First part of the cost() method runs an episode with the rule based controller to find the baseline cost (is shown below). The second part, uses the net_electric_consumption which is collected by the reinforcement learning agent and calculates the same costs as for the RBC_agent, but divides them by the RBC_agent costs to get a normalized cost (>0 is worse than RBC, <0 is better than RBC)

In [106]:
# Running the reference rule-based controller to find the baseline cost
if cost_rbc is None:
    env_rbc = CityLearn(data_path, building_attributes, weather_file, solar_profile, building_ids, 
                        buildings_states_actions = buildings_states_actions_filename, 
                        simulation_period = simulation_period, cost_function = cost_function, central_agent = False)
    _, actions_spaces = env_rbc.get_state_action_spaces()

    #Instantiatiing the control agent(s)
    agent_rbc = RBC_Agent(actions_spaces)

    state = env_rbc.reset()
    done = False
    while not done:
        # the attribute which is passed in the select_action, is the time_step of the environment in brackets ex. [1]
        actions = agent_rbc.select_action([list(env_rbc.buildings.values())[0].sim_results['hour'][env_rbc.time_step]])
        next_state, rewards, done, _ = env_rbc.step(actions)
        state = next_state
    cost_rbc = env_rbc.get_baseline_cost()

In [107]:
cost_rbc

{'ramping': 277558.34,
 '1-load_factor': 0.5675647407770157,
 'average_daily_peak': 297.39902,
 'peak_demand': 496.474,
 'net_electricity_consumption': 1497238.6,
 'quadratic': 321472800.0}

In [108]:
len(env_rbc.net_electric_consumption)

8759

In [109]:
actions

array([list([0.091, 0.091]), list([0.091, 0.091]), list([0.091]),
       list([0.091]), list([0.091, 0.091]), list([0.091, 0.091]),
       list([0.091, 0.091]), list([0.091, 0.091]), list([0.091, 0.091])],
      dtype=object)

### 6. step() method 
This method takes as input an array with actions => check above to see an example of the RBC agent.
1. Instantiate several variables which are used here in this method
2. Loop over the actions provided by the agent and the buildings. (seperate loop for central_agent vs multi agent)
    * Check if the building has cooling storage and get the electric demand for the cooling
    * Check if the building has heating storage and get the electric demand for the heating
    * Calculate total electricity consumption, electricity consumption by appliances, for cooling and dhw
3. Create the next state (state which is the result of taking action in the previous state). Calculate the corresponding reward and append to the cumulated rewards list.
4. Calculate if the next state is terminal or not
5. Return the state, reward and terminal

In [None]:
central_agent = False

In [117]:
#1) instantiate several variables

buildings_net_electricity_demand = [] # list with net electricity demand per building: -(cooling + dhw + appliances - generation)
electric_demand = 0 # total electricity consumption of all buildings (summed)
elec_consumption_dhw_storage = 0 # total electricity consumption by the dhw storage devices
elec_consumption_cooling_storage = 0 # total electricity consumption by the cooling storage devices
elec_consumption_dhw_total = 0 # total electricity consumption by the dhw devices (can be supplied from grid or from storage)
elec_consumption_cooling_total = 0 # total electricity consumption by the cooling devices
elec_consumption_appliances = 0 # total electricity consumption by the appliances
elec_generation = 0 # total electricity generation 

#2) Loop over the actions provided by the agent and the buildings. 
if central_agent:
    # If the agent is centralized, all the actions for all the buildings are provided as an ordered list of numbers. 
    # The order corresponds to the order of the buildings as they appear on the file building_attributes.json, 
    # and only considering the buildings selected for the simulation by the user (building_ids).
    for a, (uid, building) in zip(actions, buildings.items()):
        
        # Check if the building has cooling storage and if so, get the electric demand for cooling at this time step
        if buildings_states_actions[uid]['actions']['cooling_storage']:
            # Cooling (returns electric demand for the cooling)
            _electric_demand_cooling = building.set_storage_cooling(a[0])
            
            # discard the cooling action, such that now we only have the heating action left 
            actions = actions[1:]
            elec_consumption_cooling_storage += building._electric_consumption_cooling_storage
            
        # If there is no cooling storage, we just set the electric demand for cooling equal to 0
        else:
            _electric_demand_cooling = 0

        if buildings_states_actions[uid]['actions']['dhw_storage']:
            # DHW (returns electric demand for the heating)
            _electric_demand_dhw = building.set_storage_heating(a[0])
            
            # discard the heating action, ideally the list which is left now, should be empty (will be checked
            # later on in the code by an assert statement)
            actions = actions[1:]
            elec_consumption_dhw_storage += building._electric_consumption_dhw_storage
        else:
            _electric_demand_dhw = 0

        # Total heating and cooling electrical loads
        elec_consumption_cooling_total += _electric_demand_cooling
        elec_consumption_dhw_total += _electric_demand_dhw

        # Electrical appliances
        _non_shiftable_load = building.get_non_shiftable_load()
        elec_consumption_appliances += _non_shiftable_load

        # Solar generation
        _solar_generation = building.get_solar_power()
        elec_generation += _solar_generation

        # Adding loads from appliances and subtracting solar generation to the net electrical load of each building
        building_electric_demand = round(_electric_demand_cooling + _electric_demand_dhw 
                                         + _non_shiftable_load - _solar_generation, 4)

        # Electricity consumed by every building
        building.current_net_electricity_demand = building_electric_demand
        buildings_net_electricity_demand.append(-building_electric_demand) # >0 if solar generation > electricity consumption

        # Total electricity consumption
        electric_demand += building_electric_demand
    
    # Check if we didn't skip any actions
    assert len(actions) == 0, 'Some of the actions provided were not used'

# When central_agent = False, we still do the same?
else:
    
    # Check if the length of the actions and buildings are both as long as one another
    assert len(actions) == n_buildings, "The length of the list of actions should match the length of the list of buildings."

    for a, (uid, building) in zip(actions, buildings.items()):
        
        # Check if the length of the actions provided in the input file = number of actions provided by the agent
        assert sum(buildings_states_actions[uid]['actions'].values()) == len(a), "The number of input actions for building "+str(uid)+" must match the number of actions defined in the list of building attributes."

        if buildings_states_actions[uid]['actions']['cooling_storage']:
            # Cooling
            _electric_demand_cooling = building.set_storage_cooling(a[0])
            elec_consumption_cooling_storage += building._electric_consumption_cooling_storage

            if buildings_states_actions[uid]['actions']['dhw_storage']:
                # DHW
                _electric_demand_dhw = building.set_storage_heating(a[1])
                elec_consumption_dhw_storage += building._electric_consumption_dhw_storage

            else:
                _electric_demand_dhw = 0

        else:
            _electric_demand_cooling = 0
            # DHW
            _electric_demand_dhw = building.set_storage_heating(a[0])
            elec_consumption_dhw_storage += building._electric_consumption_dhw_storage

        # Total heating and cooling electrical loads
        elec_consumption_cooling_total += _electric_demand_cooling
        elec_consumption_dhw_total += _electric_demand_dhw

        # Electrical appliances
        _non_shiftable_load = building.get_non_shiftable_load()
        elec_consumption_appliances += _non_shiftable_load

        # Solar generation
        _solar_generation = building.get_solar_power()
        elec_generation += _solar_generation

        # Adding loads from appliances and subtracting solar generation to the net electrical load of each building
        building_electric_demand = round(_electric_demand_cooling + _electric_demand_dhw 
                                         + _non_shiftable_load - _solar_generation, 4)

        # Electricity consumed by every building
        building.current_net_electricity_demand = building_electric_demand
        buildings_net_electricity_demand.append(-building_electric_demand)    

        # Total electricity consumption
        electric_demand += building_electric_demand

# Advance to the next hour (time_step of each building is being updated)
# next_hour()
time_step = next(hour)
for building in buildings.values():
    time_step = time_step

if central_agent:
    s, s_appended = [], []
    for uid, building in buildings.items():
        # Create the next state
        # If the agent is centralized, we append the states avoiding repetition. 
        # I.e. if multiple buildings share the outdoor temperature as a state, we only append it once to 
        # the states of the central agent. The variable s_appended is used for this purpose.
        for state_name, value in buildings_states_actions[uid]['states'].items():
            if value == True:
                if state_name not in s_appended:
                    if state_name in ['t_in', 'avg_unmet_setpoint', 'rh_in', 'non_shiftable_load', 'solar_gen']:
                        s.append(building.sim_results[state_name][time_step])
                    elif state_name == 'net_electricity_consumption':
                        s.append(building.current_net_electricity_demand)
                    elif state_name != 'cooling_storage_soc' and state_name != 'dhw_storage_soc':
                        s.append(building.sim_results[state_name][time_step])
                        s_appended.append(state_name)
                    elif state_name == 'cooling_storage_soc':
                        s.append(building.cooling_storage._soc/building.cooling_storage.capacity)
                    elif state_name == 'dhw_storage_soc':
                        s.append(building.dhw_storage._soc/building.dhw_storage.capacity)
    # Get the next state
    state = np.array(s)
    # Get the reward for taking the previous action
    rewards = reward_function_sa(buildings_net_electricity_demand)
    # Add the reward to the cumulated_reward_episode
    cumulated_reward_episode += rewards

else:
    # If the controllers are decentralized, we append all the states to each associated agent's list of states.
    state = []
    for uid, building in buildings.items():
        s = []
        for state_name, value in buildings_states_actions[uid]['states'].items():
            if value == True:
                if state_name == 'net_electricity_consumption':
                    s.append(building.current_net_electricity_demand)
                elif state_name != 'cooling_storage_soc' and state_name != 'dhw_storage_soc':
                    s.append(building.sim_results[state_name][time_step])
                elif state_name == 'cooling_storage_soc':
                    s.append(building.cooling_storage._soc/building.cooling_storage.capacity)
                elif state_name == 'dhw_storage_soc':
                    s.append(building.dhw_storage._soc/building.dhw_storage.capacity)

        state.append(np.array(s))
    state = np.array(state)
    rewards = reward_function.get_rewards(buildings_net_electricity_demand)
    cumulated_reward_episode += sum(rewards)

# Control variables which are used to display the results and the behavior of the buildings at the district level.
net_electric_consumption.append(np.float32(electric_demand))
electric_consumption_dhw_storage.append(np.float32(elec_consumption_dhw_storage))
electric_consumption_cooling_storage.append(np.float32(elec_consumption_cooling_storage))
electric_consumption_dhw.append(np.float32(elec_consumption_dhw_total))
electric_consumption_cooling.append(np.float32(elec_consumption_cooling_total))
electric_consumption_appliances.append(np.float32(elec_consumption_appliances))
electric_generation.append(np.float32(elec_generation))
net_electric_consumption_no_storage.append(np.float32(electric_demand-elec_consumption_cooling_storage-elec_consumption_dhw_storage))
net_electric_consumption_no_pv_no_storage.append(np.float32(electric_demand + elec_generation - elec_consumption_cooling_storage - elec_consumption_dhw_storage))

# Calculate whether or not the next state is a terminal one
# terminal = _terminal()

# Returns the following variables (_get_ob(), rewards, terminal, {}) in which _get_ob() returns the state variable

In [119]:
print(rewards)

[-1034.1323, -224.52672, -148.30537, -141.03072, -867.30084, -1508.8646, -1430.623, -1134.186, -1402.536]


In [118]:
print(state)

[array([1.00000000e+00, 8.00000000e+00, 3.00000000e+00, 1.61000000e+01,
       1.87600000e+01, 2.59000000e+01, 1.50700000e+01, 7.58800000e+01,
       6.75400000e+01, 4.19600000e+01, 8.25400000e+01, 0.00000000e+00,
       1.01650000e+02, 1.18760000e+02, 0.00000000e+00, 0.00000000e+00,
       3.29890000e+02, 5.95700000e+02, 0.00000000e+00, 2.35000000e+01,
       4.38200000e+01, 8.22000000e+00, 0.00000000e+00, 4.49572662e-01,
       4.47778007e-01, 2.37436000e+01])
 array([1.00000000e+00, 8.00000000e+00, 3.00000000e+00, 1.61000000e+01,
       1.87600000e+01, 2.59000000e+01, 1.50700000e+01, 7.58800000e+01,
       6.75400000e+01, 4.19600000e+01, 8.25400000e+01, 0.00000000e+00,
       1.01650000e+02, 1.18760000e+02, 0.00000000e+00, 0.00000000e+00,
       3.29890000e+02, 5.95700000e+02, 0.00000000e+00, 2.50800000e+01,
       3.84200000e+01, 2.47000000e+00, 0.00000000e+00, 4.49572662e-01,
       4.47778007e-01, 1.10635000e+01])
 array([1.00000000e+00, 8.00000000e+00, 3.00000000e+00, 1.61000000