In [None]:
import sys

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline
%load_ext autoreload

In [None]:
%autoreload 2
from lbc.analysis import run_analysis, plot_costs

In [None]:
%autoreload 2
from lbc.experiments import (
    DPCRunner, MPCOneShotRunner, MPCRunner, RLCRunner, CPLRunner, RBCRunner)

In [None]:
%autoreload 2
from lbc.experiments.config import get_config, SCENARIO_TEST

In [None]:
# COMMON CONFIG used by all policies.
common = {
    "dr_program": "PC",
    "batch_size": 3,
    "results_dir": "./_scratch",
    # "scenario_config": SCENARIO_TEST  # this is how you would run the shorter test scenario
}

# MPCOneShot

In [None]:
config = get_config("MPCOneShot", **common)
config

In [None]:
runner0 = MPCOneShotRunner(**config)
loss0, rollout0, meta0 = runner0.run()

In [None]:
# As a sanity check, we can compare the planned zone temp in MPC and 
# the realized (rollout) zone temp.  They should be quite close since
# MPC has an idealized model of dynamics and exogenous variables.

df = meta0["df"][0]
z1 = df[[x for x in df.columns if x.startswith("zone_temp")]]

z2 = rollout0.data["zone_temp"][:, 0, :].squeeze()

relerr = np.linalg.norm(z1-z2) / np.linalg.norm(z1) * 100.
print(f"relative error: {relerr:.1f}%")

In [None]:
# _, df, figs = run_analysis(rollout0, dr, secondary=True)

In [None]:
# _ = plot_costs(rollout0, secondary="pc_limit")

# MPC

In [None]:
config = get_config("MPC", **common)
config

In [None]:
runner1 = MPCRunner(**config)
loss1, rollout1, meta1 = runner1.run()

In [None]:
# _, df, figs = run_analysis(rollout1, dr, secondary=True)

In [None]:
# _ = plot_costs(rollout1, secondary="pc_limit")

# CPL - No learning

In [None]:
config = get_config("CPL", **common)
config

In [None]:
runner2 = CPLRunner(**config)

# This runs the policy without training (q and Q_sqrt are 0)
loss2, rollout2, meta2 = runner2.run()

In [None]:
# _, df, figs = run_analysis(rollout2, dr, secondary=True)

In [None]:
# _ = plot_costs(rollout2, secondary="pc_limit")

# CPL - Learning

In [None]:
config = get_config("CPL", **common)
config

In [None]:
# Set config params for learning
config["policy_config"]["use_value_function"] = 1
config["policy_config"]["num_time_windows"] = 24
config["policy_config"]["lr"] = 1
config["policy_config"]["num_epochs"] = 1  # increase this if truly trying to train
config

In [None]:
runner3 = CPLRunner(**config)

# Note the use of train_policy() rather than run()
loss3, rollout3, meta3 = runner3.train_policy()

In [None]:
_ = runner3.run()

In [None]:
# _, df, figs = run_analysis(rollout3, dr, secondary=True)

In [None]:
# _ = plot_costs(rollout3, secondary="pc_limit")

# DPC

In [None]:
config = get_config("DPC", **common)
config["policy_config"]["model_config"]["num_time_windows"] = 96
config["policy_config"]["num_epochs"] = 10
config["policy_config"]["lr"] = 1e-2
config

In [None]:
runner4 = DPCRunner(**config)
loss4, rollout4, meta4 = runner4.train_policy()

In [None]:
l4, r4, m4 = runner4.run()

In [None]:
# fig, ax = plt.subplots()
# _ = pd.DataFrame(_meta4["losses"], columns=["train loss"]).plot(ax=ax)
# _ = pd.DataFrame(_meta4["test_losses"], columns=["test loss"]).plot(ax=ax)

In [None]:
# _, df, figs = run_analysis(rollout4, dr, secondary=True)

In [None]:
# _ = plot_costs(rollout4, secondary="pc_limit")

Code below can be used to validate that MPC cost matches actions from other policies.

In [None]:
# Verify that running the DPC actions through MPCOneShot yields the same cost.
# This helps validate consistency between the models in terms of both dynamics
# and cost. We do this by adding hard constraints that MPC actions == DPC actions 
# and run the MPC model again.
# config = get_config("MPCOneShot", **common)
# config["policy_config"]["action_sequence"] = r4.data["clipped_action"].copy()

# runner0 = MPCOneShotRunner(**config)
# l0, r0, m0 = runner0.run()

# Compare the loss computed in MPCOneShot using DPC actions

In [None]:
# print("DPC loss:      ", l4.mean().item())
# print("MPCOneShotLoss:", l0.mean().item())

# RLC

In [None]:
config = get_config("RLC", **common)

# Shut down ray in case it's still running, fail silently
try:
    import ray
    ray.shutdown()
except:
    pass

runner5 = RLCRunner(**config)
loss5, rollout5, meta5 = runner5.run()

In [None]:
#_, df, figs = run_analysis(rollout5, dr, secondary=True)

In [None]:
# _ = plot_costs(rollout5, secondary="pc_limit")

# RBC

In [None]:
config = get_config("RBC", **common)
config

In [None]:
runner6 = RBCRunner(**config)
loss6, rollout6, meta6 = runner6.run()