In [1]:
# Run this again after editing submodules so Colab uses the updated versions
from citylearn import  CityLearn
from pathlib import Path
from TD3 import Agent as Agent
import numpy as np                                                                                                                                                                                      
import torch
import matplotlib.pyplot as plt
import time
from tqdm import tqdm
from utils import agent_checkpoint_cost
import seaborn as sns

In [2]:
# Load environment
climate_zone = 1
end_time = 8759 # total number of hours to train for
cost_analysis = 24 # analyze costs every x hours
checkpoint = 24 * 7 # save parameters every x hours

params = {'data_path':Path("data/Climate_Zone_"+str(climate_zone)), 
        'building_attributes':'building_attributes.json', 
        'weather_file':'weather_data.csv', 
        'solar_profile':'solar_generation_1kW.csv', 
        'carbon_intensity':'carbon_intensity.csv',
        'building_ids':["Building_"+str(i) for i in [1,2,3,4,5,6,7,8,9]],
        'buildings_states_actions':'buildings_state_action_space.json', 
        'simulation_period': (0, end_time), 
        'cost_function': ['ramping','1-load_factor','average_daily_peak','peak_demand','net_electricity_consumption','carbon_emissions'], 
        'central_agent': False,
        'cost_analysis' : cost_analysis,
        'save_memory': False }

# Contain the lower and upper bounds of the states and actions, to be provided to the agent to normalize the variables between 0 and 1.
env = CityLearn(**params)
env.pretrain_baseline_model((0, end_time))

observations_spaces, actions_spaces = env.get_state_action_spaces()

# Provides information on Building type, Climate Zone, Annual DHW demand, Annual Cooling Demand, Annual Electricity Demand, Solar Capacity, and correllations among buildings
building_info = env.get_building_information()

Time taken to add RBC pretrained model (sec): 18.773


In [3]:
def get_mse_critic():
    if not agent.did_i_just_finish_training():
            return None, None
    try:
        data = {}
        NUM_DAYS = len(agent.critic_optim.debug['ramping_cost'])

        for key, _d in agent.critic_optim.debug.items():
            data[key] = []
            for day in _d:
                data[key].append(day.value)
            data[key] = np.reshape(data[key], (NUM_DAYS * 24))

        rc = data['ramping_cost']
        peak = data['peak_net_electricity_cost']
        ec = data['electricity_cost']

        A = np.vstack((-rc, -peak, -ec)).T
        y = np.reshape(agent.critic_optim.problem.param_dict['y_r'].value, (NUM_DAYS * 24))

        x, residuals, rank, s = np.linalg.lstsq(A, y, rcond=None)
        # print(f"Rank: {rank}\tCondition Number: {np.linalg.cond(A)}")
        # print(f"MSE across {NUM_DAYS} days: {round(np.linalg.norm(A@x - y), 5)}")
        return round(np.linalg.norm(A@x - y), 5), round(np.linalg.cond(A), 3)
    except:
        return None, None

In [None]:
params_agent = {'building_ids':["Building_"+str(i) for i in [1,2,3,4,5,6,7,8,9]],
                 'buildings_states_actions':'buildings_state_action_space.json', 
                 'building_info':building_info,
                 'observation_spaces':observations_spaces,
                 'agent_checkpoint' : checkpoint,
                 'action_spaces':actions_spaces}

# Instantiating the control agent(s)
rewards = []
critic_mse = [] # only last building to be added MSE will be recorded

agent = Agent(**params_agent)

state = env.reset()
done = False

with tqdm(total=end_time) as pbar:
    while not done:
        
        # get agent
        # day_type = env.buildings['Building_1'].sim_results['day'][env.time_step]
        # agent = agents.get_agent(day_type)
        # get agent
        
        action, _ = agent.select_action(state)
        next_state, reward, done, _ = env.step(action)
        agent.add_to_buffer(state, action, reward, next_state, done)
        state = next_state

        rewards.append(reward)

        try:
            mse, condition = get_mse_critic()
            txt = f"Cost: {round(env.costs_periodic[-1]['total'], 5)}"
            if len(critic_mse) > 0:
                txt += f"\tCritic MSE: {critic_mse[-1][0]}\tCondition number: {critic_mse[-1][1]}"
                
            if mse is not None:
                critic_mse.append([mse, condition])
                
            pbar.set_description(txt)
        except:
            pass
        
        pbar.update(1)

Cost: 1.40566:   6%|▌         | 502/8759 [00:34<16:00,  8.59it/s] 

L2 Optim	Building: 2	Hour: 23
L2 Optim	Building: 2	Hour: 01S at t = 23 for building 2

In [None]:
agent.actor._grads[0]['p_ele_grad'][0].mean()

In [None]:
plt.plot(np.array(critic_mse)[:, 0])

In [None]:
for k, v in agent.actor._losses.items():
    print(k, v)

In [None]:
total = [x["total"] for x in env.costs_periodic]

In [None]:
np.mean(agent.actor._grads[0]['p_ele_grad'], 1)

In [None]:
plt.plot(total)

In [None]:
data = {}
NUM_DAYS = len(agent.critic_optim.debug['ramping_cost'])

for key, _d in agent.critic_optim.debug.items():
    data[key] = []
    for day in _d:
        data[key].append(day.value)
    data[key] = np.reshape(data[key], (NUM_DAYS * 24))

In [None]:
rc = data['ramping_cost']
peak = data['peak_net_electricity_cost']
ec = data['electricity_cost']

A = np.vstack((-rc, -peak, -ec)).T
y = np.reshape(agent.critic_optim.problem.param_dict['y_r'].value, (NUM_DAYS * 24))

x, residuals, rank, s = np.linalg.lstsq(A, y, rcond=None)
print(f"Rank: {rank}\tCondition Number: {np.linalg.cond(A)}")
print(f"MSE across {NUM_DAYS} days: {round(np.linalg.norm(A@x - y), 5)}")

plt.plot(A@x, label='y_hat')
plt.plot(y, label='y')
plt.legend()
plt.xlabel("Hours")
plt.ylabel("Q-value")
plt.show()

In [None]:
agent.critic[0].alpha_peak1

In [None]:
np.mean(agent.critic[0].alpha_elec, 1)

In [None]:
agent.critic[0].alpha_elec[0]

In [None]:
agent.critic_target[0].alpha_peak1

In [None]:
_t = np.array(agent._actor_zetas['p_ele_local']).T

In [None]:
plt.bar(range(24), _t[0, :, 0], label="Initial")
plt.bar(range(24), _t[0, :, 1], label="Iteration 1")
plt.bar(range(24), _t[0, :, 2], label="Iteration 2")
plt.plot(range(24), agent.actor.zeta['p_ele'][:, 0], c='k', label="Iteration 3")
plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.15), ncol=4)
plt.show()

In [None]:
#set seaborn plotting aesthetics as default
sns.set(rc={'figure.figsize':(5,5)})
df = np.array(agent._actor_zetas['p_ele_local'])
df = np.append(df, np.expand_dims(agent.actor.zeta['p_ele'], 0)).reshape((9, 24, 9)).T

fig, axn = plt.subplots(2, 2, sharex=True, sharey=True)
cbar_ax = fig.add_axes([.91, .3, .03, .4])

for i, ax in enumerate(axn.flat):
    sns.heatmap(df[i], ax=ax,
                cbar=i == 0,
                cmap='viridis',
                cbar_ax=None if i else cbar_ax)
    ax.set(title=f"Building {i + 1}")

axn.flat[0].set(ylabel='Hour of Day')
axn.flat[2].set(ylabel='Hour of Day')
axn.flat[2].set(xlabel='Month')
axn.flat[3].set(xlabel='Month')


fig.tight_layout(rect=[0, 0, .9, 1])