In [1]:
import datetime
import sys

import networkx as nx
import numpy as np
import os
import pandas as pd
import plotly.offline as py
import pytz

sys.path.insert(0, '../')
# import utils
# from utils import plotly_figure

import itertools
import xbos_services_getter as xsg

import time

import datetime
import pytz
import calendar
import itertools
import pandas as pd

from DataManager.DataManager import DataManager
from Thermostat import Tstat

# DRL 

DRL is good for demand charge problem since we don't need to incorporate the demand charge cost into every reward. We will incorporate it only into the final states. 

State:
- Last k indoor temperatures of all zones (For now just use current and last)
- Last k outdoor temperatures (For now just use current)
- Last k actions  (For now just use current)
- Time of Month (For demand charge)
- Max Consumption so far
- Comfortband for t steps into the future
- Do not exceed for t steps into the future
- occupancy for t steps into the future
- price t steps into future

Actions: 
[0,1,2] x num_zones

We limit our observation space to one month. disregarding sesonality. 

- Add random gaussian noise to all temperatures. Gaussian noise should be distributed according to our uncertainty (historic uncertainty for outdoor temperature for last years etc). 
- Comfortband/DoNotExceed should be set for one month? 
- Occupancy should have random noise added i guess. for now just assume schedule. 

Outdoor temperature we want to find distribution:
$$P(T_{t+1} | T_{t})$$ so that we can sample from it. 
For now we could assume:
$$P(T_{t+1} | T_{t}) = P(\delta t_{t+1}) $$
which is distributed according to gaussian distribution which has the same variance as our data. 

In [2]:
class ThermalControlDRL(gym.Env, DataManager):

    def __init__(self, start, end, window, config):
        
        super()
        
        # config has ["t_in_past_steps, t_out_past_steps, action_past_steps"]
        self.end_pos = config["corridor_length"]
        self.cur_pos = 0
        self.action_space = Discrete(2)
        self.observation_space = Box(
            0.0, self.end_pos, shape=(1, ), dtype=np.float32)

    def reset(self):
        self.cur_pos = 0
        return [self.cur_pos]

    def step(self, action):
        # takes thermostat step with the given action. 

SyntaxError: unexpected EOF while parsing (<ipython-input-2-9535512fd595>, line 19)

## How is this adding to MPC
- Easier to make demand charges happen. Do not need to incorporate into objective function at every step. Will be rewarded at the end of month. 
- Will learn a much longer predictive horizon. 
- Can use more complex models for predicting indoor temperature. MPC would loose DP possibility if using mmore complex and higher order models. 
- Could learn underlying effects of occupancy/comfortband which MPC could not catch. 

In [None]:
def time_dimension(start, end, window):
    pass

def time_to_one_hot(start, end, curr_time, window):
    pass


        
            

In [3]:
import gym, ray
from ray.rllib.agents import ppo
from gym.spaces import Discrete, Box


class MyEnv(gym.Env, DataManager): 
    def __init__(self, env_config):
        
        super().__init__(env_config["building"], env_config["zones"], 
                         env_config["start"], env_config["end"], env_config["window"])

        # assert self.zones == all zones in building. this is because of thermal model stuff. 
        self.curr_timestep = 0

        
        self.action_space = Discrete(3)
        
        self.indoor_starting_temperatures = env_config["indoor_starting_temperatures"] # to get starting temperatures [last, current]
        self.outdoor_starting_temperature = env_config["outdoor_starting_temperature"]
        
        self.tstats = {}
        for iter_zone in self.zones:
            self.tstats[iter_zone] = Tstat(self.building, iter_zone, 
                                           self.indoor_starting_temperatures[iter_zone]["current"], 
                                          last_temperature=self.indoor_starting_temperatures[iter_zone]["last"])
        
        
        assert 60*60 % xsg.get_window_in_sec(self.window) == 0 # window divides an hour
        assert (self.end - self.start).total_seconds() % xsg.get_window_in_sec(self.window) == 0 # window divides the timeframe
        
        self.num_timesteps = int((self.end - self.start).total_seconds() / xsg.get_window_in_sec(self.window))
        
        self.unit = env_config["unit"]
        assert self.unit == "F"
                
        # all zones current and last temperature = 2*num_zones
        # building outside temperature -> make a class for how this behaves = 1
        # timestep -> do one hot encoding of week, day, hour, window  \approx 4 + 7 + 24 + 60*60 / window
        low_bound = [32] * (2*len(self.zones) + 1) # we could use parametric temperature bounds... for now we will give negative inft reward
        high_bound = [100] * (2*len(self.zones) + 1) # plus one for building
        
        low_bound += [0] * self.num_timesteps
        high_bound += [1] * self.num_timesteps
        
        self.observation_space = Box(
            low=np.array(low_bound), high=np.array(high_bound), dtype=np.float32)
        
        
    def reset(self):
        self.curr_timestep = 0
        
        for iter_zone in self.zones:
            self.tstats[iter_zone].reset(self.indoor_starting_temperatures[iter_zone]["current"], 
                                          last_temperature=self.indoor_starting_temperatures[iter_zone]["last"])            
        self.outdoor_temperature = self.outdoor_starting_temperature
        
        return self._create_obs(self.tstats, self.outdoor_temperature. self.curr_timestep)


    def step(self, action):
        
        # find what new temperature would be. use thermal model with uncertainty. use reset if exceeding 
        # do_not_exceed. can't force it to take a different action anymore. 
        
        
#         return <obs>, <reward: float>, <done: bool>, <info: dict>
        pass

    def _create_obs(tstats, outdoor_temperature, curr_time_step):
        obs = np.zeros(self.observation_space.low.shape)
        idx = 0
        for iter_zone in zones:
            obs[idx] = tstats[iter_zone].last_temperature 
            idx += 1
            obs[idx] = tstats[iter_zone].temperature
            idx += 1
        
        obs[idx] = self.outdoor_temperature
        idx += 1
        obs[idx + curr_timestep] = 1
        
        return obs
        
# ray.init()
# trainer = ppo.PPOTrainer(env=MyEnv, config={
#     "env_config": {},  # config to pass to env class
# })

# while True:
#     print(trainer.train())

In [7]:
start = datetime.datetime(year=2019, month=1, day=1).replace(tzinfo=pytz.utc)
end = start + datetime.timedelta(days=1)
window = "15m"
building = "avenal-animal-shelter"
zones = ["hvac_zone_shelter_corridor"]
indoor_starting_temperatures = {iter_zone: {"last":70, "current":71} for iter_zone in zones}
outdoor_starting_temperature = 60
unit = "F"

In [8]:
config = {
    "start": start,
    "end": end,
    "window": window,
    "building": building,
    "zones": zones,
    "indoor_starting_temperatures": indoor_starting_temperatures,
    "outdoor_starting_temperature": outdoor_starting_temperature,
    "unit": unit
}

In [9]:
e = MyEnv(config)

_Rendezvous: <_Rendezvous of RPC that terminated with:
	status = StatusCode.UNAVAILABLE
	details = "Connect Failed"
	debug_error_string = "{"created":"@1556762144.392878000","description":"Failed to create subchannel","file":"src/core/ext/filters/client_channel/client_channel.cc","file_line":2721,"referenced_errors":[{"created":"@1556762144.392871000","description":"Pick Cancelled","file":"src/core/ext/filters/client_channel/lb_policy/pick_first/pick_first.cc","file_line":241,"referenced_errors":[{"created":"@1556762144.392863000","description":"Connect Failed","file":"src/core/ext/filters/client_channel/subchannel.cc","file_line":689,"grpc_status":14,"referenced_errors":[{"created":"@1556762144.392809000","description":"Handshake failed","file":"src/core/lib/security/transport/security_handshaker.cc","file_line":248,"tsi_code":10,"tsi_error":"TSI_PROTOCOL_FAILURE"}]}]}]}"
>

In [None]:
e.reset()

# Ray Example

In [None]:
import numpy as np
import gym
from ray.rllib.models import FullyConnectedNetwork, Model, ModelCatalog
from gym.spaces import Discrete, Box

import ray
from ray import tune
from ray.tune import grid_search

In [None]:
ray.init()

In [None]:
class SimpleCorridor(gym.Env):

    def __init__(self, start, end, window, config):
        
        # config has ["t_in_past_steps, t_out_past_steps, action_past_steps"]
        self.end_pos = config["corridor_length"]
        self.cur_pos = 0
        self.action_space = Discrete(2)
        self.observation_space = Box(
            0.0, self.end_pos, shape=(1, ), dtype=np.float32)

    def reset(self):
        self.cur_pos = 0
        return [self.cur_pos]

    def step(self, action):
        assert action in [0, 1], action
        if action == 0 and self.cur_pos > 0:
            self.cur_pos -= 1
        elif action == 1:
            self.cur_pos += 1
        done = self.cur_pos >= self.end_pos
        return [self.cur_pos], 1 if done else 0, done, {}
    
    
class CustomModel(Model):
    """Example of a custom model.
    This model just delegates to the built-in fcnet.
    """

    def _build_layers_v2(self, input_dict, num_outputs, options):
        self.obs_in = input_dict["obs"]
        self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space,
                                           self.action_space, num_outputs,
                                           options)
        return self.fcnet.outputs, self.fcnet.last_layer

In [None]:
# Can also register the env creator function explicitly with:
# register_env("corridor", lambda config: SimpleCorridor(config))
ModelCatalog.register_custom_model("my_model", CustomModel)
tune.run(
    "PPO",
    stop={
        "timesteps_total": 10000,
    },
    config={
        "env": SimpleCorridor,  # or "corridor" if registered above
        "model": {
            "custom_model": "my_model",
        },
        "lr": grid_search([1e-2, 1e-4, 1e-6]),  # try different lrs
        "num_workers": 1,  # parallelism
        "env_config": {
            "corridor_length": 5,
        },
    },
)