In [1]:
import warnings
warnings.simplefilter("always")

# Run this again after editing submodules so Colab uses the updated versions
from citylearn import  CityLearn
from pathlib import Path
from TD3 import Agent as Agent
import numpy as np                                                                                                                                                                                      
import torch
import matplotlib.pyplot as plt
import time
from tqdm import tqdm
from utils import agent_checkpoint_cost
import seaborn as sns
import os

  from collections import Mapping


In [2]:
# Load environment
climate_zone = 1
end_time = 8759 # total number of hours to train for
cost_analysis = 24 # analyze costs every x hours
checkpoint = 24 * 7 # save parameters every x hours

params = {'data_path':Path("data/Climate_Zone_"+str(climate_zone)), 
        'building_attributes':'building_attributes.json', 
        'weather_file':'weather_data.csv', 
        'solar_profile':'solar_generation_1kW.csv', 
        'carbon_intensity':'carbon_intensity.csv',
        'building_ids':["Building_"+str(i) for i in [1,2,3,4,5,6,7,8,9]],
        'buildings_states_actions':'buildings_state_action_space.json', 
        'simulation_period': (0, end_time), 
        'cost_function': ['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption','carbon_emissions'], 
        'central_agent': False,
        'cost_analysis' : cost_analysis,
        'save_memory': False }

# Contain the lower and upper bounds of the states and actions, to be provided to the agent to normalize the variables between 0 and 1.
env = CityLearn(**params)
env.pretrain_baseline_model((0, end_time))

observations_spaces, actions_spaces = env.get_state_action_spaces()

# Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
building_info = env.get_building_information()

Time taken to add RBC pretrained model (sec): 17.982


In [3]:
def get_mse_critic():
    if not agent.did_i_just_finish_training():
            return None, None
    try:
        data = {}
        NUM_DAYS = len(agent.critic_optim.debug['ramping_cost'])

        for key, _d in agent.critic_optim.debug.items():
            data[key] = []
            for day in _d:
                data[key].append(day.value)
            data[key] = np.reshape(data[key], (NUM_DAYS * 24))

        rc = data['ramping_cost']
        peak = data['peak_net_electricity_cost']
        ec = data['electricity_cost']

        A = np.vstack((-rc, -peak)).T
        y = np.reshape(agent.critic_optim.problem.param_dict['y_r'].value, (NUM_DAYS * 24))

        x, residuals, rank, s = np.linalg.lstsq(A, y, rcond=None)
        # print(f"Rank: {rank}\tCondition Number: {np.linalg.cond(A)}")
        # print(f"MSE across {NUM_DAYS} days: {round(np.linalg.norm(A@x - y), 5)}")
        return round(np.linalg.norm(A@x - y), 5), round(np.linalg.cond(A), 3)
    except:
        return None, None

In [4]:
env._get_ob().shape

(9, 30)

In [None]:
params_agent = {'building_ids':["Building_"+str(i) for i in [1,2,3,4,5,6,7,8,9]],
                 'buildings_states_actions':'buildings_state_action_space.json', 
                 'building_info':building_info,
                 'observation_spaces':observations_spaces,
                 'agent_checkpoint' : checkpoint,
                 'action_spaces':actions_spaces}

# Instantiating the control agent(s)
rewards = []
critic_mse = [] # only last building to be added MSE will be recorded

agent = Agent(**params_agent)

state = env.reset()
done = False

ZO_iRL_costs = np.load("ZOiRL_cost_curve.npy")

with tqdm(total=end_time) as pbar:
    while not done:
        
        # get agent
        # day_type = env.buildings['Building_1'].sim_results['day'][env.time_step]
        # agent = agents.get_agent(day_type)
        # get agent
        
        action, _ = agent.select_action(state)
        next_state, reward, done, _ = env.step(action)
        agent.add_to_buffer(state, action, reward, next_state, done)
        state = next_state

        rewards.append(reward)

        try:
            mse, condition = get_mse_critic()
            pim = round(100 * (ZO_iRL_costs[env.time_step] - env.costs_periodic[-1]['total']) / ZO_iRL_costs[env.time_step], 3)
            txt = f"iAC: {round(env.costs_periodic[-1]['total'], 3)} , ZOiRL: {round(ZO_iRL_costs[env.time_step], 3)} , %ipr: {pim}"
            if len(critic_mse) > 0:
                txt += f", MSE: {round(critic_mse[-1][0], 3)} , CN: {round(critic_mse[-1][1], 3)}"
                
            if mse is not None:
                critic_mse.append([mse, condition])
                
            pbar.set_description(txt)
        except:
            pass
        
        pbar.update(1)

rewards = np.array(rewards)

iAC: 1.413 , ZOiRL: 1.406 , %ipr: -0.503:   5%|▌         | 479/8759 [00:29<16:31,  8.35it/s] 

E2E	Building: 0, r: 22our: 23

iAC: 1.406 , ZOiRL: 1.403 , %ipr: -0.209:   5%|▌         | 480/8759 [02:33<85:38:57, 37.24s/it]

E2E	Building: 0, r: 23Time taken for training: 123.56
MODEL COSTS:Building 0: -27811.399

iAC: 1.38 , ZOiRL: 1.373 , %ipr: -0.53, MSE: 139919.75 , CN: 21.614:   7%|▋         | 574/8759 [02:51<17:09,  7.95it/s]     

E2E	Building: 0, r: 22our: 23

iAC: 1.372 , ZOiRL: 1.37 , %ipr: -0.123, MSE: 139919.75 , CN: 21.614:   7%|▋         | 576/8759 [04:57<66:39:53, 29.33s/it]

E2E	Building: 0, r: 23Time taken for training: 126.6
MODEL COSTS:Building 0: -17980.97

iAC: 1.343 , ZOiRL: 1.337 , %ipr: -0.407, MSE: 24355.751 , CN: 17.252:   8%|▊         | 671/8759 [05:15<26:28,  5.09it/s]   

E2E	Building: 0, r: 22our: 23

iAC: 1.338 , ZOiRL: 1.335 , %ipr: -0.218, MSE: 24355.751 , CN: 17.252:   8%|▊         | 672/8759 [07:17<82:38:12, 36.79s/it]

E2E	Building: 0, r: 23Time taken for training: 121.98
MODEL COSTS:Building 0: -5097.383

iAC: 1.279 , ZOiRL: 1.286 , %ipr: 0.57, MSE: 4062.97 , CN: 11.544:   9%|▉         | 767/8759 [07:33<17:12,  7.74it/s]       

E2E	Building: 0, r: 15our: 23

In [None]:
plt.plot(np.array(critic_mse)[:, 0])

In [None]:
np.array(critic_mse)[:, 0]

In [None]:
E_grid, E_grid_true, E_grid_prevhour = agent.critic_target[0].temp

In [None]:
agent.critic_optim.problem.param_dict['E_grid'].value

In [None]:
E_grid_prevhour.shape

In [None]:
agent.critic[0].normalize(E_grid_true[:, 0], _E_grid)

In [None]:
agent.memory.sample()[0][0]["E_grid_pkhist"].shape

In [None]:
x = agent.actor.debug[0](*agent.actor.debug[1].values())[0]

In [None]:
x.detach().numpy()

In [None]:
agent.critic_optim.test['E_grid'].min()

In [None]:
x.data = torch.tensor(agent.critic[0].normalize(x.detach().numpy(), x.detach().numpy()))

In [None]:
agent.critic_optim.problem.param_dict['E_grid'].value.min()

In [None]:
agent.critic[0].debug

In [None]:
i=23
agent.critic[0].prob[i].solve('SCS')

In [None]:
agent.critic[0].prob[i].solve('SCS', eps=5e-2)

In [None]:
x = agent.critic[0].prob[i]

In [None]:
-r * ramping_cost - electricity_cost

In [None]:
x.solve(solver='SCS', max_iters=10_000_000, eps=5e-2)

In [None]:
np.sum(e[t:] * E_grid)

In [None]:
_type = 0
Q_value, ramping_cost, peak_hist_cost, electricity_cost, E_grid, E_grid_true, E_grid_prevhour, (bid, t) = agent.critic_target[_type].debug
r, e = agent.critic[_type].alpha_ramp[bid], agent.critic[_type].alpha_elec[bid]

In [None]:
Q_value

In [None]:
np.mean(agent.actor._grads[0]['p_ele_grad'], 1)

In [None]:
plt.plot(np.array(critic_mse)[:, 0])
plt.xlabel("Meta-episode #")
plt.ylabel("Cost")

In [None]:
agent.actor.debug[1]["E_grid_prevhour"]

In [None]:
np.array(rewards)[23:, 0].sum()

In [None]:
np.shape(critic_mse)

In [None]:
for k, v in agent.actor._losses.items():
    print(k, v)

In [None]:
np.array(rewards).sum(0)

In [None]:
total = [x["total"] for x in env.costs_periodic]

In [None]:
np.mean(agent.actor._grads[0]['p_ele_grad'], 1)

In [None]:
env.cost_detailed_view()['Building_9']['cost']

In [None]:
env.cost((0, end_time))

In [None]:
plt.plot(total, label="iAC")
plt.plot(ZO_iRL_costs[::24][1:], label="ZOiRL")
plt.legend()
plt.ylim(0.96, 1.5)
plt.xlabel("Day")
plt.ylabel("Cost")
plt.show()

In [None]:
np.where(ZO_iRL_costs[::24][1:] < total)

In [None]:
a, b = agent.memory.sample()

In [None]:
len(b)

In [None]:
data = {}
NUM_DAYS = len(agent.critic_optim.debug['ramping_cost'])

for key, _d in agent.critic_optim.debug.items():
    data[key] = []
    for day in _d:
        data[key].append(day.value)
    data[key] = np.reshape(data[key], (NUM_DAYS * 24))

In [None]:
agent.critic[0].alpha_ramp

In [None]:
rc = data['ramping_cost']
peak = data['peak_net_electricity_cost']
ec = data['electricity_cost']

A = np.vstack((-rc, -peak)).T
y = np.reshape(agent.critic_optim.problem.param_dict['y_r'].value, (NUM_DAYS * 24))

x, residuals, rank, s = np.linalg.lstsq(A, y, rcond=None)
print(f"Rank: {rank}\tCondition Number: {np.linalg.cond(A)}")
print(f"MSE across {NUM_DAYS} days: {round(np.linalg.norm(A@x - y), 5)}")

plt.plot(A@x, label='y_hat')
plt.plot(y, label='y')
plt.legend()
plt.xlabel("Hours")
plt.ylabel("Q-value")
plt.show()

In [None]:
(A@x).max(), y.max()

In [None]:
p = agent.critic_optim.problem

In [None]:
p.solve()

In [None]:
import cvxpy
for i in range(len(p.constraints)):
    x = p.constraints[i]
    if isinstance(x.args[0], cvxpy.atoms.affine.add_expr.AddExpression):
        assert x.args[0].value <= 1, (i, x.args[0].value)

In [None]:
agent.critic[0].alpha_ramp, agent.critic[0].alpha_peak1

In [None]:
agent.critic_target[0].alpha_ramp, agent.critic_target[0].alpha_peak1

In [None]:
agent.memory.replay_memory[0]['E_grid']

In [None]:
agent.critic[0].alpha_ramp

In [None]:
_t = np.array(agent._actor_zetas['p_ele_local']).T

In [None]:
plt.scatter(range(24), agent.actor.zeta['p_ele'][:, 0], c='k', label="Iteration 3")
plt.bar(range(24), _t[0, :, 0], label="Initial")
plt.bar(range(24), _t[0, :, 1], label="Iteration 1")
plt.bar(range(24), _t[0, :, 2], label="Iteration 2")
plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.15), ncol=4)
plt.show()

In [None]:
#set seaborn plotting aesthetics as default
sns.set(rc={'figure.figsize':(5,5)})
df = np.array(agent._actor_zetas['p_ele_local'])
df = np.append(df, np.expand_dims(agent.actor.zeta['p_ele'], 0)).reshape((9, 24, 9)).T

fig, axn = plt.subplots(2, 2, sharex=True, sharey=True)
cbar_ax = fig.add_axes([.91, .3, .03, .4])

for i, ax in enumerate(axn.flat):
    sns.heatmap(df[i], ax=ax,
                cbar=i == 0,
                cmap='viridis',
                cbar_ax=None if i else cbar_ax)
    ax.set(title=f"Building {i + 1}")

axn.flat[0].set(ylabel='Hour of Day')
axn.flat[2].set(ylabel='Hour of Day')
axn.flat[2].set(xlabel='Month')
axn.flat[3].set(xlabel='Month')


fig.tight_layout(rect=[0, 0, .9, 1])

In [None]:
plt.bar(range(24), agent.actor.zeta['p_ele'][:, 0])

In [None]:
np.array(agent._actor_zetas["p_ele_local"])[:, :, 0].min(1)

In [None]:
np.array(agent._actor_zetas["p_ele_target"])[:, :, 0].min(1)

In [None]:
agent.actor.scs_cnt, agent.actor.fail_cnt, agent.critic_optim.fail_cnt

In [None]:
agent.actor.zeta["p_ele"][:, 0]

In [None]:
agent.critic[0].alpha_peak1

In [None]:
agent.critic_target[0].alpha_peak1

In [None]:
agent.actor.zeta

In [None]:
t = np.random.rand(101, 30)

In [None]:
v = np.expand_dims(t, axis=(2, 3))

In [None]:
np.linalg.matrix_rank(t)

In [None]:
total[100]