In [1]:
from citylearn import  CityLearn
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
np.random.seed(3)

In [2]:
# Load environment
data_folder = Path("data/")
building_attributes = data_folder / 'building_attributes.json'
solar_profile = data_folder / 'solar_generation_1kW.csv'
building_state_actions = 'buildings_state_action_space.json'
building_ids = ["Building_1","Building_2","Building_3","Building_4","Building_5","Building_6","Building_7","Building_8","Building_9"]
env = CityLearn(building_attributes, solar_profile, building_ids, buildings_states_actions = building_state_actions, cost_function = ['ramping','1-load_factor','peak_to_valley_ratio','peak_demand','net_electricity_consumption'])
observations_spaces,actions_spaces = env.get_state_action_spaces()

In [4]:
# RL CONTROLLER
from reward_function import reward_function
from agent import TD3_Agents
import time

#Instantiating the control agent(s)
agents = TD3_Agents(observations_spaces,actions_spaces)

k, c = 0, 0
cost, cum_reward = {}, {}

start = time.time()
episodes = 300
for e in range(episodes): #A stopping criterion can be added, which is based on whether the cost has reached some specific threshold or is no longer improving
    cum_reward[e] = 0
    rewards = []
    state = env.reset()
    done = False
    while not done:
        if k%(40000*4)==0:
            print('hour: '+str(k)+' of '+str(8760*episodes))
            
        action = agents.select_action(state)
        next_state, reward, done, _ = env.step(action)
        reward = reward_function(reward) #See comments in reward_function.py
        agents.add_to_buffer(state, action, reward, next_state, done)
        state = next_state
        cum_reward[e] += reward[0]
        rewards.append(reward)
        k+=1
        cur_time = time.time()
        if k > 1000:
            print("congrats")
        # print("average time : {}s/iteration at iteration {}".format((cur_time - start) / (60.0*k), k))
    cost[e] = env.cost()
    if c%20==0:
        print(cost[e])
    c+=1
        
end = time.time()
print((end-start)/60.0)

hour: 0 of 2628000


StopIteration: 

In [None]:
#Prints the cost of every episode. The lower the better. Ideally it will reach the same cost or lower than the RBC: 156.88
cost

In [None]:
#Prints the actor and critic losses
fig, (plot1, plot2, plot3) = plt.subplots(1,3)
fig.set_size_inches(12,4)
plot1.plot(agents.critic1_loss_list[0],'b')
plot2.plot(agents.critic2_loss_list[0],'g')
plot3.plot(agents.actor_loss_list[0],'y')
plot1.set_xlabel('hours*iterations')
plot1.set_ylabel('Critic 1 loss')
plot2.set_xlabel('hours*iterations')
plot2.set_ylabel('Critic 2 loss')
plot3.set_xlabel('hours*iterations')
plot3.set_ylabel('Actor loss')

In [None]:
#Plots for the last 100 hours of the simulation
plt.plot(env.buildings[0].cooling_device.electrical_consumption_cooling[2400:])
plt.plot(env.buildings[0].sim_results['cooling_demand'][3500:6000].values[2400:])
plt.plot(env.buildings[0].cooling_device.cooling_supply[2400:])
plt.legend(['Electrical consumption cooling','Building cooling demand','Heat pump cooling supply'])

In [None]:
interval = range(0,8759)
plt.figure(figsize=(18,8))
plt.plot(env.net_electric_consumption[interval]+env.electric_generation[interval]-env.electric_consumption_cooling_storage[interval]-env.electric_consumption_dhw_storage[interval])
plt.plot(env.net_electric_consumption[interval]-env.electric_consumption_cooling_storage[interval]-env.electric_consumption_dhw_storage[interval])
plt.plot(env.net_electric_consumption[interval], '--')
plt.legend(['Electricity demand without storage or generation (kW)', 'Electricity demand with PV generation and without storage(kW)', 'Electricity demand using RBC for storage(kW)'])