In [1]:
import datetime
import sys

import networkx as nx
import numpy as np
import os
import pandas as pd
import plotly.offline as py
import pytz

sys.path.insert(0, '../')
# import utils
# from utils import plotly_figure

import itertools
import xbos_services_getter as xsg

import time

import datetime
import pytz
import calendar
import itertools
import pandas as pd

from DataManager.DataManager import DataManager
from Thermostat import Tstat

In [10]:
import numpy as np
import gym
from ray.rllib.models import FullyConnectedNetwork, Model, ModelCatalog
from gym.spaces import Discrete, Box

import ray
from ray import tune
from ray.tune import grid_search

# DRL 

DRL is good for demand charge problem since we don't need to incorporate the demand charge cost into every reward. We will incorporate it only into the final states. 

State:
- Last k indoor temperatures of all zones (For now just use current and last)
- Last k outdoor temperatures (For now just use current)
- Last k actions  (For now just use current)
- Time of Month (For demand charge)
- Max Consumption so far
- Comfortband for t steps into the future
- Do not exceed for t steps into the future
- occupancy for t steps into the future
- price t steps into future

Actions: 
[0,1,2] x num_zones

We limit our observation space to one month. disregarding sesonality. 

- Add random gaussian noise to all temperatures. Gaussian noise should be distributed according to our uncertainty (historic uncertainty for outdoor temperature for last years etc). 
- Comfortband/DoNotExceed should be set for one month? 
- Occupancy should have random noise added i guess. for now just assume schedule. 

Outdoor temperature we want to find distribution:
$$P(T_{t+1} | T_{t})$$ so that we can sample from it. 
For now we could assume:
$$P(T_{t+1} | T_{t}) = P(\delta t_{t+1}) $$
which is distributed according to gaussian distribution which has the same variance as our data. 

## How is this adding to MPC
- Easier to make demand charges happen. Do not need to incorporate into objective function at every step. Will be rewarded at the end of month. 
- Will learn a much longer predictive horizon. 
- Can use more complex models for predicting indoor temperature. MPC would loose DP possibility if using mmore complex and higher order models. 
- Could learn underlying effects of occupancy/comfortband which MPC could not catch. 

In [7]:
import gym, ray
from ray.rllib.agents import ppo
from gym.spaces import Discrete, Box


class MyEnv(gym.Env, DataManager): 
    def __init__(self, env_config):
        
        super().__init__(env_config["building"], env_config["zones"], 
                         env_config["start"], env_config["end"], env_config["window"])
        self.lambda_val = env_config["lambda_val"]

        # assert self.zones == all zones in building. this is because of the thermal model needing other zone temperatures. 
        
        self.curr_timestep = 0
        
        self.action_space = Discrete(3)
        
        self.indoor_starting_temperatures = env_config["indoor_starting_temperatures"] # to get starting temperatures [last, current]
        self.outdoor_starting_temperature = env_config["outdoor_starting_temperature"]
        
        self.tstats = {}
        for iter_zone in self.zones:
            self.tstats[iter_zone] = Tstat(self.building, iter_zone, 
                                           self.indoor_starting_temperatures[iter_zone]["current"], 
                                          last_temperature=self.indoor_starting_temperatures[iter_zone]["last"])
        
        
        assert 60*60 % xsg.get_window_in_sec(self.window) == 0 # window divides an hour
        assert (self.end - self.start).total_seconds() % xsg.get_window_in_sec(self.window) == 0 # window divides the timeframe
        
        # the number of timesteps 
        self.num_timesteps = int((self.end - self.start).total_seconds() / xsg.get_window_in_sec(self.window))
        
        self.unit = env_config["unit"]
        assert self.unit == "F"
                
        # all zones current and last temperature = 2*num_zones
        # building outside temperature -> make a class for how this behaves = 1
        # timestep -> do one hot encoding of week, day, hour, window  \approx 4 + 7 + 24 + 60*60 / window
        low_bound = [32] * (2*len(self.zones) + 1) # we could use parametric temperature bounds... for now we will give negative inft reward
        high_bound = [100] * (2*len(self.zones) + 1) # plus one for building
        
        low_bound += [0] * (self.num_timesteps + 1) # total timesteps plus the final timestep
        high_bound += [1] * (self.num_timesteps + 1) # total timesteps plus the final timestep
        
        self.observation_space = Box(
            low=np.array(low_bound), high=np.array(high_bound), dtype=np.float32)
        
        
    def reset(self):
        self.curr_timestep = 0
        
        for iter_zone in self.zones:
            self.tstats[iter_zone].reset(self.indoor_starting_temperatures[iter_zone]["current"], 
                                          last_temperature=self.indoor_starting_temperatures[iter_zone]["last"])            
        self.outdoor_temperature = self.outdoor_starting_temperature
        
        return self.create_curr_obs()


    def step(self, action):
        
        # find what new temperature would be. use thermal model with uncertainty. use reset if exceeding 
        # do_not_exceed. can't force it to take a different action anymore. 
        
        # update temperatures 
        for iter_zone in self.zones:
            tstats[iter_zone].next_temperature(aciton[iter_zone])
            self.outdoor_temperature = np.random.normal() # TODO we should make a thermostat for the outdoor temperature.
        self.curr_timestep += 1
        
        # if we reach the end time. 
        if self.curr_timestep == self.num_timesteps + 1:
            return self.create_curr_obs(), 0, True, {}
        
        # check that in saftey temperature band
        for iter_zone in self.zones:
            curr_safety = self.do_not_exceed[iter_zone].iloc[node.timestep]
            if not (curr_safety["t_low"] <= tstats[iter_zone].temperature <= curr_safety["t_high"]):
                return self.create_curr_obs(), -float('inf'), True, {} # TODO do we want to add info? 
        
        # get reward by calling discomfort and consumption model ... 
        reward = self.get_reward(action)
        
        return self.create_curr_obs(), reward, False, {} # obs, reward, done, info

    def get_reward(self, action):
        """Get the reward for the given action with the current observation parameters."""
        # get discomfort across edge
        discomfort = {}
        for iter_zone in self.zones:
            curr_comfortband = self.comfortband[iter_zone].iloc[self.curr_timestep]
            curr_occupancy = self.occupancy[iter_zone].iloc[self.curr_timestep]
            curr_tstat = self.tstats[iter_zone]
            average_edge_temperature = (curr_tstat.temperature + curr_tstat.last_temperature)/2.

            discomfort[iter_zone] = xsg.get_discomfort(
                self.discomfort_stub, self.building, average_edge_temperature,
                curr_comfortband["t_low"], curr_comfortband["t_high"], 
                curr_occupancy)

        # Get consumption across edge
        price = 1  # self.prices.iloc[root.timestep] TODO also add right unit conversion, and duration
        consumption_cost = {self.zones[i]: price * self.hvac_consumption[self.zones[i]][action[i]] 
                           for i in range(len(self.zones))}
        
        cost = ((1 - self.lambda_val) * (sum(consumption_cost.values()))) + (
                self.lambda_val * (sum(discomfort.values())))
        return -cost
        

    def create_curr_obs(self):
        return _create_obs(self.tstats, self.outdoor_temperature, self.curr_timestep)

    def _create_obs(self, tstats, outdoor_temperature, curr_time_step):
        obs = np.zeros(self.observation_space.low.shape)
        idx = 0
        for iter_zone in zones:
            obs[idx] = tstats[iter_zone].last_temperature 
            idx += 1
            obs[idx] = tstats[iter_zone].temperature
            idx += 1
        
        obs[idx] = self.outdoor_temperature
        idx += 1
        obs[idx + curr_timestep] = 1
        
        return obs
        
# ray.init()
# trainer = ppo.PPOTrainer(env=MyEnv, config={
#     "env_config": {},  # config to pass to env class
# })

# while True:
#     print(trainer.train())

In [3]:
start = datetime.datetime(year=2019, month=1, day=1).replace(tzinfo=pytz.utc)
end = start + datetime.timedelta(days=1)
window = "15m"
building = "avenal-animal-shelter"
zones = ["hvac_zone_shelter_corridor"]
indoor_starting_temperatures = {iter_zone: {"last":70, "current":71} for iter_zone in zones}
outdoor_starting_temperature = 60
unit = "F"
lambda_val = 0.999

In [4]:
config = {
    "start": start,
    "end": end,
    "window": window,
    "building": building,
    "zones": zones,
    "indoor_starting_temperatures": indoor_starting_temperatures,
    "outdoor_starting_temperature": outdoor_starting_temperature,
    "unit": unit, 
    "lambda_val": lambda_val
}

In [6]:
e = MyEnv(config)

In [8]:
ray.init()

2019-05-07 20:15:10,630	INFO node.py:423 -- Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-05-07_20-15-10_18115/logs.
2019-05-07 20:15:10,748	INFO services.py:363 -- Waiting for redis server at 127.0.0.1:38862 to respond...
2019-05-07 20:15:10,867	INFO services.py:363 -- Waiting for redis server at 127.0.0.1:26349 to respond...
2019-05-07 20:15:10,872	INFO services.py:760 -- Starting Redis shard with 6.87 GB max memory.
2019-05-07 20:15:10,897	INFO services.py:1384 -- Starting the Plasma object store with 10.31 GB memory using /tmp.


{'node_ip_address': None,
 'redis_address': '10.142.36.180:38862',
 'object_store_address': '/tmp/ray/session_2019-05-07_20-15-10_18115/sockets/plasma_store',
 'webui_url': None,
 'raylet_socket_name': '/tmp/ray/session_2019-05-07_20-15-10_18115/sockets/raylet'}

In [12]:
# Can also register the env creator function explicitly with:
# register_env("corridor", lambda config: SimpleCorridor(config))
# ModelCatalog.register_custom_model("my_model", CustomModel)
tune.run(
    "PPO",
    stop={
        "timesteps_total": 10000,
    },
    config={
        "env": MyEnv,  # or "corridor" if registered above
        "lr": grid_search([1e-2, 1e-4, 1e-6]),  # try different lrs
        "num_workers": 1,  # parallelism
        "env_config": config,
    },
)

2019-05-07 20:16:49,274	INFO tune.py:60 -- Tip: to resume incomplete experiments, pass resume='prompt' or resume=True to run()
2019-05-07 20:16:49,274	INFO tune.py:211 -- Starting a new experiment.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/12 CPUs, 0/0 GPUs
Memory usage on this node: 15.6/34.4 GB

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/12 CPUs, 0/0 GPUs
Memory usage on this node: 15.6/34.4 GB
Result logdir: /Users/daniellengyel/ray_results/PPO
Number of trials: 3 ({'RUNNING': 1, 'PENDING': 2})
PENDING trials:
 - PPO_MyEnv_1_lr=0.0001:	PENDING
 - PPO_MyEnv_2_lr=1e-06:	PENDING
RUNNING trials:
 - PPO_MyEnv_0_lr=0.01:	RUNNING



2019-05-07 20:16:51,384	ERROR trial_runner.py:460 -- Error processing event.
Traceback (most recent call last):
  File "/Users/daniellengyel/miniconda3/envs/venv-dr3/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 409, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/Users/daniellengyel/miniconda3/envs/venv-dr3/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 314, in fetch_result
    result = ray.get(trial_future[0])
  File "/Users/daniellengyel/miniconda3/envs/venv-dr3/lib/python3.6/site-packages/ray/worker.py", line 2316, in get
    raise value
ray.exceptions.RayTaskError: [36mray_worker[39m (pid=18135, host=Daniels-MacBook-Pro-4.local)
  File "pyarrow/serialization.pxi", line 448, in pyarrow.lib.deserialize
  File "pyarrow/serialization.pxi", line 411, in pyarrow.lib.deserialize_from
  File "pyarrow/serialization.pxi", line 262, in pyarrow.lib.SerializedPyObject.deserialize
  File "pyarrow/serialization.pxi", line 171, in

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/12 CPUs, 0/0 GPUs
Memory usage on this node: 15.8/34.4 GB
Result logdir: /Users/daniellengyel/ray_results/PPO
Number of trials: 3 ({'ERROR': 3})
ERROR trials:
 - PPO_MyEnv_0_lr=0.01:	ERROR, 1 failures: /Users/daniellengyel/ray_results/PPO/PPO_MyEnv_0_lr=0.01_2019-05-07_20-16-4925ijjntn/error_2019-05-07_20-16-51.txt
 - PPO_MyEnv_1_lr=0.0001:	ERROR, 1 failures: /Users/daniellengyel/ray_results/PPO/PPO_MyEnv_1_lr=0.0001_2019-05-07_20-16-49y8ajrqyt/error_2019-05-07_20-16-51.txt
 - PPO_MyEnv_2_lr=1e-06:	ERROR, 1 failures: /Users/daniellengyel/ray_results/PPO/PPO_MyEnv_2_lr=1e-06_2019-05-07_20-16-49s2ahrlk_/error_2019-05-07_20-16-51.txt



TuneError: ('Trials did not complete', [PPO_MyEnv_0_lr=0.01, PPO_MyEnv_1_lr=0.0001, PPO_MyEnv_2_lr=1e-06])

# Ray Example

In [13]:
import numpy as np
import gym
from ray.rllib.models import FullyConnectedNetwork, Model, ModelCatalog
from gym.spaces import Discrete, Box

import ray
from ray import tune
from ray.tune import grid_search

In [14]:
ray.init()



Exception: Perhaps you called ray.init twice by accident? This error can be suppressed by passing in 'ignore_reinit_error=True' or by calling 'ray.shutdown()' prior to 'ray.init()'.

In [15]:
class SimpleCorridor(gym.Env):

    def __init__(self, start, end, window, config):
        
        # config has ["t_in_past_steps, t_out_past_steps, action_past_steps"]
        self.end_pos = config["corridor_length"]
        self.cur_pos = 0
        self.action_space = Discrete(2)
        self.observation_space = Box(
            0.0, self.end_pos, shape=(1, ), dtype=np.float32)

    def reset(self):
        self.cur_pos = 0
        return [self.cur_pos]

    def step(self, action):
        assert action in [0, 1], action
        if action == 0 and self.cur_pos > 0:
            self.cur_pos -= 1
        elif action == 1:
            self.cur_pos += 1
        done = self.cur_pos >= self.end_pos
        return [self.cur_pos], 1 if done else 0, done, {}
    
    
class CustomModel(Model):
    """Example of a custom model.
    This model just delegates to the built-in fcnet.
    """

    def _build_layers_v2(self, input_dict, num_outputs, options):
        self.obs_in = input_dict["obs"]
        self.fcnet = FullyConnectedNetwork(input_dict, self.obs_space,
                                           self.action_space, num_outputs,
                                           options)
        return self.fcnet.outputs, self.fcnet.last_layer

In [17]:
# Can also register the env creator function explicitly with:
# register_env("corridor", lambda config: SimpleCorridor(config))
ModelCatalog.register_custom_model("my_model", CustomModel)
tune.run(
    "PPO",
    stop={
        "timesteps_total": 10000,
    },
    config={
        "env": SimpleCorridor,  # or "corridor" if registered above
        "model": {
            "custom_model": "my_model",
        },
        "lr": grid_search([1e-2, 1e-4, 1e-6]),  # try different lrs
        "num_workers": 1,  # parallelism
        "env_config": {
            "corridor_length": 5,
        },
    },
    reuse_actors=True
)

2019-05-07 20:28:07,884	INFO tune.py:60 -- Tip: to resume incomplete experiments, pass resume='prompt' or resume=True to run()
2019-05-07 20:28:07,885	INFO tune.py:211 -- Starting a new experiment.


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/12 CPUs, 0/0 GPUs
Memory usage on this node: 15.8/34.4 GB

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/12 CPUs, 0/0 GPUs
Memory usage on this node: 15.8/34.4 GB
Result logdir: /Users/daniellengyel/ray_results/PPO
Number of trials: 3 ({'RUNNING': 1, 'PENDING': 2})
PENDING trials:
 - PPO_SimpleCorridor_1_lr=0.0001:	PENDING
 - PPO_SimpleCorridor_2_lr=1e-06:	PENDING
RUNNING trials:
 - PPO_SimpleCorridor_0_lr=0.01:	RUNNING



2019-05-07 20:28:10,046	ERROR trial_runner.py:460 -- Error processing event.
Traceback (most recent call last):
  File "/Users/daniellengyel/miniconda3/envs/venv-dr3/lib/python3.6/site-packages/ray/tune/trial_runner.py", line 409, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/Users/daniellengyel/miniconda3/envs/venv-dr3/lib/python3.6/site-packages/ray/tune/ray_trial_executor.py", line 314, in fetch_result
    result = ray.get(trial_future[0])
  File "/Users/daniellengyel/miniconda3/envs/venv-dr3/lib/python3.6/site-packages/ray/worker.py", line 2316, in get
    raise value
ray.exceptions.RayTaskError: [36mray_worker[39m (pid=18138, host=Daniels-MacBook-Pro-4.local)
  File "/Users/daniellengyel/miniconda3/envs/venv-dr3/lib/python3.6/site-packages/ray/rllib/agents/agent.py", line 276, in __init__
    Trainable.__init__(self, config, logger_creator)
  File "/Users/daniellengyel/miniconda3/envs/venv-dr3/lib/python3.6/site-packages/ray/tune/trainable.py", 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/12 CPUs, 0/0 GPUs
Memory usage on this node: 16.0/34.4 GB
Result logdir: /Users/daniellengyel/ray_results/PPO
Number of trials: 3 ({'ERROR': 3})
ERROR trials:
 - PPO_SimpleCorridor_0_lr=0.01:	ERROR, 1 failures: /Users/daniellengyel/ray_results/PPO/PPO_SimpleCorridor_0_lr=0.01_2019-05-07_20-28-07juldidjd/error_2019-05-07_20-28-10.txt
 - PPO_SimpleCorridor_1_lr=0.0001:	ERROR, 1 failures: /Users/daniellengyel/ray_results/PPO/PPO_SimpleCorridor_1_lr=0.0001_2019-05-07_20-28-08xpf725ru/error_2019-05-07_20-28-10.txt
 - PPO_SimpleCorridor_2_lr=1e-06:	ERROR, 1 failures: /Users/daniellengyel/ray_results/PPO/PPO_SimpleCorridor_2_lr=1e-06_2019-05-07_20-28-08jlehu0ik/error_2019-05-07_20-28-10.txt



TuneError: ('Trials did not complete', [PPO_SimpleCorridor_0_lr=0.01, PPO_SimpleCorridor_1_lr=0.0001, PPO_SimpleCorridor_2_lr=1e-06])



In [None]:
3**16

In [13]:
np.random.normal()

-1.3982164924625597

In [9]:
import xbos_services_getter
XBOS_MICROSERVICES_HOST_ADDRESS="ms.xbos.io:9001"
import time
discomfort_stub = xbos_services_getter.get_discomfort_stub()
a = time.time()
for i in range(0,1000):
    discomfort = xbos_services_getter.get_discomfort(discomfort_stub,'bldg',95,90,94,1.0)
print("time", time.time() - a)

time 3.584411859512329


In [5]:
import os

In [6]:
os.environ["DISCOMFORT_HOST_ADDRESS"]

'ms.xbos.io:9001'

In [None]:
!source /Users/daniellengyel/.bash_profile

In [19]:
isinstance(np.int64, int) 

False