In [1]:
import sys

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
%load_ext autoreload

In [2]:
%autoreload 2
from lbc.analysis import run_analysis, plot_costs

In [3]:
%autoreload 2
from lbc.experiments import (
    DPCRunner, MPCOneShotRunner, MPCRunner, RLCRunner, CPLRunner, RBCRunner)

In [4]:
%autoreload 2
from lbc.experiments.config import get_config

In [5]:
# COMMON CONFIG used by all policies.
common = {
    "dr_program": "PC",
    "batch_size": 31,
    "results_dir": "./_scratch",
    "dry_run": 0
}

# MPCOneShot

In [6]:
config = get_config("MPCOneShot", **common)
config

{'policy_type': 'MPCOneShot',
 'policy_config': {'tee': False},
 'batch_size': 31,
 'dr_program': 'PC',
 'scenario_config': {'start_time': '00:05:00',
  'end_time': '23:55:00',
  'zone_temp_init_mean': 26.0},
 'dry_run': 0,
 'results_dir': './_scratch'}

In [7]:
runner0 = MPCOneShotRunner(**config)
loss0, rollout0, meta0 = runner0.run()

INFO:/Users/dbiagion/gitrepos/learning-building-control/lbc/policies/mpc.py:one-shot: batch sample (1/31)
INFO:/Users/dbiagion/gitrepos/learning-building-control/lbc/policies/mpc.py:one-shot: batch sample (2/31)
INFO:/Users/dbiagion/gitrepos/learning-building-control/lbc/policies/mpc.py:one-shot: batch sample (3/31)
INFO:/Users/dbiagion/gitrepos/learning-building-control/lbc/policies/mpc.py:one-shot: batch sample (4/31)
INFO:/Users/dbiagion/gitrepos/learning-building-control/lbc/policies/mpc.py:one-shot: batch sample (5/31)
INFO:/Users/dbiagion/gitrepos/learning-building-control/lbc/policies/mpc.py:one-shot: batch sample (6/31)
INFO:/Users/dbiagion/gitrepos/learning-building-control/lbc/policies/mpc.py:one-shot: batch sample (7/31)
INFO:/Users/dbiagion/gitrepos/learning-building-control/lbc/policies/mpc.py:one-shot: batch sample (8/31)
INFO:/Users/dbiagion/gitrepos/learning-building-control/lbc/policies/mpc.py:one-shot: batch sample (9/31)
INFO:/Users/dbiagion/gitrepos/learning-buildin

In [8]:
# As a sanity check, we can compare the planned zone temp in MPC and 
# the realized (rollout) zone temp.  They should be quite close since
# MPC has an idealized model of dynamics and exogenous variables.

df = meta0["df"][0]
z1 = df[[x for x in df.columns if x.startswith("zone_temp")]]

z2 = rollout0.data["zone_temp"][:, 0, :].squeeze()

relerr = np.linalg.norm(z1-z2) / np.linalg.norm(z1) * 100.
print(f"relative error: {relerr:.1f}%")

relative error: 0.3%


In [9]:
# _, df, figs = run_analysis(rollout0, dr, secondary=True)

In [10]:
# _ = plot_costs(rollout0, secondary="pc_limit")

# MPC

In [11]:
config = get_config("MPC", **common)
config

{'policy_type': 'MPC',
 'policy_config': {'num_lookahead_steps': 4, 'tee': False},
 'batch_size': 31,
 'dr_program': 'PC',
 'scenario_config': {'start_time': '00:05:00',
  'end_time': '23:55:00',
  'zone_temp_init_mean': 26.0,
  'dr_program': <lbc.demand_response.DemandResponseProgram at 0x7fcf2c16b8e0>},
 'dry_run': 0,
 'results_dir': './_scratch'}

In [12]:
runner1 = MPCRunner(**config)
loss1, rollout1, meta1 = runner1.run()

100%|████████████████████████████████████████████████████████████████████████████████████████████████| 287/287 [15:10<00:00,  3.17s/it]
INFO:lbc.experiments.runner:[MPC-PC-4] bsz=31, loss=5665.970, time=910.7
INFO:lbc.experiments.runner:saved to ./_scratch/MPC-PC-4.p


In [13]:
# _, df, figs = run_analysis(rollout1, dr, secondary=True)

In [14]:
# _ = plot_costs(rollout1, secondary="pc_limit")

# CPL - No learning

In [15]:
config = get_config("CPL", **common)
config

{'policy_type': 'CPL',
 'policy_config': {'lookahead': 4,
  'lr': 10,
  'num_epochs': 50,
  'use_value_function': 0,
  'num_time_windows': 24},
 'batch_size': 31,
 'dr_program': 'PC',
 'scenario_config': {'start_time': '00:05:00',
  'end_time': '23:55:00',
  'zone_temp_init_mean': 26.0,
  'dr_program': <lbc.demand_response.DemandResponseProgram at 0x7fcf2c16ba00>},
 'dry_run': 0,
 'results_dir': './_scratch'}

In [16]:
runner2 = CPLRunner(**config)

# This runs the policy without training (q and Q_sqrt are 0)
loss2, rollout2, meta2 = runner2.run()

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.
This code path has been hit 1 times so far.

INFO:lbc.experiments.runner:[CPL-PC-4-0] bsz=31, loss=5787.065, time=46.7
INFO:lbc.experiments.runner:saved to ./_scratch/CPL-PC-4-0.p


In [17]:
# _, df, figs = run_analysis(rollout2, dr, secondary=True)

In [18]:
# _ = plot_costs(rollout2, secondary="pc_limit")

# CPL - Learning

In [19]:
# Set config params for learning
config["policy_config"]["use_value_function"] = 1
config["policy_config"]["num_time_windows"] = 24
config["policy_config"]["lr"] = 1
config["policy_config"]["num_epochs"] = 20  # increase this if truly trying to train
config

{'policy_type': 'CPL',
 'policy_config': {'lookahead': 4,
  'lr': 1,
  'num_epochs': 20,
  'use_value_function': 1,
  'num_time_windows': 24},
 'batch_size': 31,
 'dr_program': 'PC',
 'scenario_config': {'start_time': '00:05:00',
  'end_time': '23:55:00',
  'zone_temp_init_mean': 26.0,
  'dr_program': <lbc.demand_response.DemandResponseProgram at 0x7fcf2c6d0be0>},
 'dry_run': 0,
 'results_dir': './_scratch'}

In [20]:
runner3 = CPLRunner(**config)

# Note the use of train_policy() rather than run()
loss3, rollout3, meta3 = runner3.train_policy()

This use of ``*`` has resulted in matrix multiplication.
Using ``*`` for matrix multiplication has been deprecated since CVXPY 1.1.
    Use ``*`` for matrix-scalar and vector-scalar multiplication.
    Use ``@`` for matrix-matrix and matrix-vector multiplication.
    Use ``multiply`` for elementwise multiplication.
This code path has been hit 2 times so far.

2923.674, 4155.171: 100%|███████████████████████████████████████████████████████████████████████████| 20/20 [1:06:28<00:00, 199.42s/it]


In [21]:
_ = runner3.run()

INFO:lbc.experiments.runner:[CPL-PC-4-1] bsz=31, loss=4155.171, time=53.3
INFO:lbc.experiments.runner:saved to ./_scratch/CPL-PC-4-1.p


In [22]:
# _, df, figs = run_analysis(rollout3, dr, secondary=True)

In [23]:
# _ = plot_costs(rollout3, secondary="pc_limit")

# DPC

In [24]:
config = get_config("DPC", **common)
config["policy_config"]["model_config"]["num_time_windows"] = 96
config["policy_config"]["num_epochs"] = 100
config["policy_config"]["lr"] = 1e-2
config

{'policy_type': 'DPC',
 'policy_config': {'model_config': {'hidden_dim': 128, 'num_time_windows': 96},
  'lr': 0.01,
  'num_epochs': 100},
 'batch_size': 31,
 'dr_program': 'PC',
 'scenario_config': {'start_time': '00:05:00',
  'end_time': '23:55:00',
  'zone_temp_init_mean': 26.0,
  'dr_program': <lbc.demand_response.DemandResponseProgram at 0x7fcf2d64d3d0>},
 'dry_run': 0,
 'results_dir': './_scratch'}

In [25]:
runner4 = DPCRunner(**config)
loss4, rollout4, meta4 = runner4.train_policy()

2771.603, 4014.342,: 100%|███████████████████████████████████████████████████████████████████████████| 100/100 [02:08<00:00,  1.28s/it]


In [26]:
l4, r4, m4 = runner4.run()

INFO:lbc.experiments.runner:[DPC-PC] bsz=31, loss=4014.342, time=0.4
INFO:lbc.experiments.runner:saved to ./_scratch/DPC-PC.p


In [27]:
# fig, ax = plt.subplots()
# _ = pd.DataFrame(_meta4["losses"], columns=["train loss"]).plot(ax=ax)
# _ = pd.DataFrame(_meta4["test_losses"], columns=["test loss"]).plot(ax=ax)

In [27]:
# _, df, figs = run_analysis(rollout4, dr, secondary=True)

In [28]:
# _ = plot_costs(rollout4, secondary="pc_limit")

Code below can be used to validate that MPC cost matches actions from other policies.

In [28]:
# Verify that running the DPC actions through MPCOneShot yields the same cost.
# This helps validate consistency between the models in terms of both dynamics
# and cost. We do this by adding hard constraints that MPC actions == DPC actions 
# and run the MPC model again.
# config = get_config("MPCOneShot", **common)
# config["policy_config"]["action_sequence"] = r4.data["clipped_action"].copy()

# runner0 = MPCOneShotRunner(**config)
# l0, r0, m0 = runner0.run()

# Compare the loss computed in MPCOneShot using DPC actions

In [29]:
# print("DPC loss:      ", l4.mean().item())
# print("MPCOneShotLoss:", l0.mean().item())

# RLC

In [30]:
config = get_config("RLC", **common)

# Shut down ray in case it's still running, fail silently
try:
    import ray
    ray.shutdown()
except:
    pass

runner5 = RLCRunner(**config)
loss5, rollout5, meta5 = runner5.run()

2022-02-13 16:17:42,305	INFO trainer.py:722 -- Your framework setting is 'tf', meaning you are using static-graph mode. Set framework='tf2' to enable eager execution with tf2.x. You may also want to then set `eager_tracing=True` in order to reach similar execution speed as with static-graph mode.
2022-02-13 16:17:42,306	INFO ppo.py:166 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2022-02-13 16:17:42,307	INFO trainer.py:743 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=37437)[0m   logger.warn(
2022-02-13 16:17:50,335	INFO trainable.py:467 -- Restored on 127.0.0.1 from checkpoint: /Users/dbiagion/gitrepos/learning-building-control/lbc/policies/rlc_checkpoints/power_constrained/checkpoint/checkpoint
2022-02-13 16:17:50,336	INFO trainable.py:475 -- Current state after restoring: 

In [31]:
#_, df, figs = run_analysis(rollout5, dr, secondary=True)

In [32]:
# _ = plot_costs(rollout5, secondary="pc_limit")

# RBC

In [33]:
config = get_config("RBC", **common)
config

{'policy_type': 'RBC',
 'policy_config': {'setpoints': [(0, 27),
   (83, 24),
   (131, 21),
   (143, 24),
   (215, 24),
   (215, 27)],
  'p_flow': 1.0,
  'p_temp': 1.0},
 'batch_size': 31,
 'dr_program': 'PC',
 'scenario_config': {'start_time': '00:05:00',
  'end_time': '23:55:00',
  'zone_temp_init_mean': 26.0,
  'dr_program': <lbc.demand_response.DemandResponseProgram at 0x7fcf1ac46af0>},
 'dry_run': 0,
 'results_dir': './_scratch'}

In [34]:
runner6 = RBCRunner(**config)
loss6, rollout6, meta6 = runner6.run()

INFO:lbc.experiments.runner:[RBC-PC-1.000-1.000] bsz=31, loss=451170.344, time=0.3
INFO:lbc.experiments.runner:saved to ./_scratch/RBC-PC-1.000-1.000.p
