In [1]:
import datetime
import sys

import networkx as nx
import numpy as np
import os
import pandas as pd
import plotly.offline as py
import pytz

sys.path.insert(0, '../')
# import utils
# from utils import plotly_figure

import itertools
import xbos_services_getter as xsg

import time

import datetime
import pytz
import calendar
import itertools
import pandas as pd

# Check that correct data is given

In [2]:
def check_data(data, start, end, window):
    """Checks if data has right times and does not contain any Nan values. 
    
    :param data: pd.df or pd.series
    :param start: datetime (timezone aware)
    :param end: datetime (timezone aware)
    :param window: (string)
    :return: str"""
    window = xsg.get_window_in_sec(window)
    if not isinstance(data, pd.DataFrame) and not isinstance(data, pd.Series):
        return "Is not a pd.DataFrame/pd.Series"
    if (start not in data.index) or ((end - datetime.timedelta(seconds=window)) not in data.index):
        return "Does not have valid start or/and end time."
    if data.isna().values.any():
        return "Nan values in data."
    time_diffs = data.index.to_series(keep_tz=True).diff()
    if (time_diffs.shape[0] > 1) and ((time_diffs.min() != time_diffs.max()) or (time_diffs.min().seconds != window)):
        return "Missing rows or/and bad time frequency."
    return None

def check_data_zones(zones, data_dict, start, end, window):
    for zone in zones:
        if zone not in data_dict:
            return "Is missing zone " + zone
        err = check_data(data_dict[zone], start, end, window)
        if err is not None:
            return err
    return None


# Define OptimizerParent
The optimizer parent can be used accross optimizers. It makes it easy to get the relevant data for training and testing purposes. 

Assumes that an Optimizer instance is created for every time the optimization is called.

In [8]:
class OptimizerParent:
    def __init__(self, building, zones, start, end, window, non_controllable_data={}):
        
        self.start = start
        self.unix_start = start.timestamp() * 1e9
        self.end = end
        self.unix_end = end.timestamp() * 1e9
        self.window = window  # timedelta
        
        self.building = building
        self.zones = zones 
        
        if non_controllable_data is None:
            non_controllable_data = {}
        # TODO add error checking. check that the right zones are given in non_controllable_data and that the start/end/window are right. 
        
        # Documentation: All data here is in timeseries starting exactly at start and every step corresponds to one 
        # interval. The end is not inclusive.

        # temperature band
        temperature_band_stub = xsg.get_temperature_band_stub()
        
        if "comfortband" not in non_controllable_data:
            self.comfortband = {iter_zone: xsg.get_comfortband(temperature_band_stub, self.building, iter_zone, self.start, self.end, self.window)
                                  for iter_zone in self.zones}
        else:
            self.comfortband = non_controllable_data["comfortband"]
        err = check_data_zones(self.zones, self.comfortband, start, end, window)
        if err is not None:
            raise Exception("Bad comfortband given. " + err)
            
        if "do_not_exceed" not in non_controllable_data:
            self.do_not_exceed = {iter_zone: xsg.get_do_not_exceed(temperature_band_stub, self.building, iter_zone, self.start, self.end, self.window)
                                  for iter_zone in self.zones}
        else:
            self.do_not_exceed = non_controllable_data["do_not_exceed"]
        err = check_data_zones(self.zones, self.do_not_exceed, start, end, window)
        if err is not None:
            raise Exception("Bad DoNotExceed given. " + err)
        
        # occupancy
        if non_controllable_data is None or "occupancy" not in non_controllable_data:
            occupancy_stub = xsg.get_occupancy_stub()
            self.occupancy = {iter_zone: xsg.get_occupancy(occupancy_stub, self.building, iter_zone, self.start, self.end, self.window)
                             for iter_zone in self.zones}
        else:
            self.occupancy = non_controllable_data["occupancy"]
        err = check_data_zones(self.zones, self.occupancy, start, end, window)
        if err is not None:
            raise Exception("Bad occupancy given. " + err)
        
        # outdoor temperatures
        if "outdoor_temperature" not in non_controllable_data:
            outdoor_historic_stub = xsg.get_outdoor_historic_stub()
            self.outdoor_temperature = xsg.get_outdoor_temperature_historic(outdoor_historic_stub, self.building,
                                                                           self.start, self.end, self.window)
        err = check_data(self.outdoor_temperature, start, end, window)
        if err is not None:
            raise Exception("Bad outdoor temperature given. " + err)
#         outdoor_prediction_channel = grpc.insecure_channel(OUTSIDE_PREDICTION)
#         outdoor_prediction_stub = outdoor_temperature_prediction_pb2_grpc.OutdoorTemperatureStub(outdoor_prediction_channel)

#         self.outdoor_temperatures = get_outside_temperature(
#             outdoor_historic_stub, outdoor_prediction_stub, self.building, self.start, self.end, self.window)

        # discomfort channel 
        self.discomfort_stub = xsg.get_discomfort_stub()
        
        # HVAC Consumption TODO ERROR CHECK?
        hvac_consumption_stub = xsg.get_hvac_consumption_stub()
        self.hvac_consumption = {iter_zone: xsg.get_hvac_consumption(hvac_consumption_stub, building, iter_zone) 
                                 for iter_zone in self.zones}
        
        # TODO Prices
        


# DRL 

DRL is good for demand charge problem since we don't need to incorporate the demand charge cost into every reward. We will incorporate it only into the final states. 

State:
- Last k indoor temperatures of all zones (For now just use current and last)
- Last k outdoor temperatures (For now just use current)
- Last k actions  (For now just use current)
- Time of Month (For demand charge)
- Max Consumption so far
- Comfortband for t steps into the future
- Do not exceed for t steps into the future
- occupancy for t steps into the future
- price t steps into future

Actions: 
[0,1,2] x num_zones

We limit our observation space to one month. disregarding sesonality. 

- Add random gaussian noise to all temperatures. Gaussian noise should be distributed according to our uncertainty (historic uncertainty for outdoor temperature for last years etc). 
- Comfortband/DoNotExceed should be set for one month? 
- Occupancy should have random noise added i guess. for now just assume schedule. 

Outdoor temperature we want to find distribution:
$$P(T_{t+1} | T_{t})$$ so that we can sample from it. 
For now we could assume:
$$P(T_{t+1} | T_{t}) = P(\delta t_{t+1}) $$
which is distributed according to gaussian distribution which has the same variance as our data. 

## How is this adding to MPC
- Easier to make demand charges happen. Do not need to incorporate into objective function at every step. Will be rewarded at the end of month. 
- Will learn a much longer predictive horizon. 
- Can use more complex models for predicting indoor temperature. MPC would loose DP possibility if using mmore complex and higher order models. 
- Could learn underlying effects of occupancy/comfortband which MPC could not catch. 

In [7]:
import numpy as np
import gym
from ray.rllib.models import FullyConnectedNetwork, Model, ModelCatalog
from gym.spaces import Discrete, Box

import ray
from ray import tune
from ray.tune import grid_search

In [None]:
class ThermalControlDRL(gym.Env, OptimizerParent):

    def __init__(self, start, end, window, config):
        
        # config has ["t_in_past_steps, t_out_past_steps, action_past_steps"]
        self.end_pos = config["corridor_length"]
        self.cur_pos = 0
        self.action_space = Discrete(2)
        self.observation_space = Box(
            0.0, self.end_pos, shape=(1, ), dtype=np.float32)

    def reset(self):
        self.cur_pos = 0
        return [self.cur_pos]

    def step(self, action):
        assert action in [0, 1], action
        if action == 0 and self.cur_pos > 0:
            self.cur_pos -= 1
        elif action == 1:
            self.cur_pos += 1
        done = self.cur_pos >= self.end_pos
        return [self.cur_pos], 1 if done else 0, done, {}

In [None]:
# Can also register the env creator function explicitly with:
# register_env("corridor", lambda config: SimpleCorridor(config))
ray.init()
ModelCatalog.register_custom_model("my_model", CustomModel)
tune.run(
    "PPO",
    stop={
        "timesteps_total": 10000,
    },
    config={
        "env": SimpleCorridor,  # or "corridor" if registered above
        "model": {
            "custom_model": "my_model",
        },
        "lr": grid_search([1e-2, 1e-4, 1e-6]),  # try different lrs
        "num_workers": 1,  # parallelism
        "env_config": {
            "corridor_length": 5,
        },
    },
)