In [1]:
from citylearn import  CityLearn
import matplotlib.pyplot as plt
from pathlib import Path
import numpy as np
np.random.seed(3)

In [2]:
# Load environment
data_folder = Path("data/")
building_attributes = data_folder / 'building_attributes.json'
solar_profile = data_folder / 'solar_generation_1kW.csv'
building_state_actions = 'buildings_state_action_space.json'
building_ids = ["Building_1","Building_2","Building_3","Building_4","Building_5","Building_6","Building_7","Building_8","Building_9"]
env = CityLearn(building_attributes, solar_profile, building_ids, buildings_states_actions = building_state_actions, cost_function = ['ramping','1-load_factor','peak_to_valley_ratio','peak_demand','net_electricity_consumption'])
observations_spaces,actions_spaces = env.get_state_action_spaces()

In [None]:
# RL CONTROLLER
from reward_function import reward_function
from agent import TD3_Agents
import time

#Instantiating the control agent(s)
agents = TD3_Agents(observations_spaces,actions_spaces)

k, c = 0, 0
cost, cum_reward = {}, {}

start = time.time()
episodes = 300
for e in range(episodes): #A stopping criterion can be added, which is based on whether the cost has reached some specific threshold or is no longer improving
    cum_reward[e] = 0
    rewards = []
    state = env.reset()
    done = False
    while not done:
        if k%(40000*4)==0:
            print('hour: '+str(k)+' of '+str(8760*episodes))
            
        action = agents.select_action(state)
        next_state, reward, done, _ = env.step(action)
        reward = reward_function(reward) #See comments in reward_function.py
        agents.add_to_buffer(state, action, reward, next_state, done)
        state = next_state
        cum_reward[e] += reward[0]
        rewards.append(reward)
        k+=1
        cur_time = time.time()
        print("average time : {}s/iteration at iteration {}".format((cur_time - start) / (60.0*k), k))
    cost[e] = env.cost()
    if c%20==0:
        print(cost[e])
    c+=1
        
end = time.time()
print((end-start)/60.0)

TD3 Agents set to GPU!
Buffer set to GPU
Buffer set to GPU
Buffer set to GPU
Buffer set to GPU
Buffer set to GPU
Buffer set to GPU
Buffer set to GPU
Buffer set to GPU
Buffer set to GPU
hour: 0 of 26280
average time : 0.002737549940745036s/iteration at iteration 1
average time : 0.0015674054622650147s/iteration at iteration 2
average time : 0.0010833501815795898s/iteration at iteration 3
average time : 0.000841642419497172s/iteration at iteration 4
average time : 0.0006994891166687011s/iteration at iteration 5
average time : 0.0006057586934831408s/iteration at iteration 6
average time : 0.000537667955671038s/iteration at iteration 7
average time : 0.0004850536584854126s/iteration at iteration 8
average time : 0.00044275522232055663s/iteration at iteration 9
average time : 0.0004082997639973958s/iteration at iteration 10
average time : 0.00038017504142992423s/iteration at iteration 11
average time : 0.0003570771879620022s/iteration at iteration 12
average time : 0.0003374821100479517s/it

average time : 0.0001330890105320857s/iteration at iteration 130
average time : 0.00013293407042214584s/iteration at iteration 131
average time : 0.00013268906058687153s/iteration at iteration 132
average time : 0.00013274590771897394s/iteration at iteration 133
average time : 0.00013261719129571868s/iteration at iteration 134
average time : 0.00013255063398384754s/iteration at iteration 135
average time : 0.00013237066713033937s/iteration at iteration 136
average time : 0.0001322413883070006s/iteration at iteration 137
average time : 0.00013201950828810246s/iteration at iteration 138
average time : 0.00013198632416393545s/iteration at iteration 139
average time : 0.00013260373047419957s/iteration at iteration 140
average time : 0.00013263245282725523s/iteration at iteration 141
average time : 0.00013264213369485917s/iteration at iteration 142
average time : 0.0001326639335472267s/iteration at iteration 143
average time : 0.00013266713530929s/iteration at iteration 144
average time : 0

average time : 0.00012655943339946223s/iteration at iteration 272
average time : 0.00012649677611969328s/iteration at iteration 273
average time : 0.00012643894139867628s/iteration at iteration 274
average time : 0.00012645702651052765s/iteration at iteration 275
average time : 0.00012643252305938427s/iteration at iteration 276
average time : 0.00012635894917695817s/iteration at iteration 277
average time : 0.00012627704252156136s/iteration at iteration 278
average time : 0.0001262674479786498s/iteration at iteration 279
average time : 0.00012621016729445685s/iteration at iteration 280
average time : 0.00012615196504061192s/iteration at iteration 281
average time : 0.0001260942882961697s/iteration at iteration 282
average time : 0.0001260556096322966s/iteration at iteration 283
average time : 0.00012602121897146735s/iteration at iteration 284
average time : 0.00012598677685386257s/iteration at iteration 285
average time : 0.00012594561476807494s/iteration at iteration 286
average time 

average time : 0.00017499358874009435s/iteration at iteration 401
average time : 0.00022959754637027065s/iteration at iteration 402
average time : 0.0002935395938882757s/iteration at iteration 403
average time : 0.00036101900901731486s/iteration at iteration 404
average time : 0.00041332654992248786s/iteration at iteration 405
average time : 0.0004697150001776434s/iteration at iteration 406
average time : 0.0005332234352949113s/iteration at iteration 407
average time : 0.0005916167026251749s/iteration at iteration 408
average time : 0.0006484654344187195s/iteration at iteration 409
average time : 0.000702792018409667s/iteration at iteration 410
average time : 0.0007564335553895527s/iteration at iteration 411
average time : 0.0008091961874545199s/iteration at iteration 412
average time : 0.0008601314507731514s/iteration at iteration 413
average time : 0.0009107555264244141s/iteration at iteration 414
average time : 0.0009631400031737056s/iteration at iteration 415
average time : 0.00101

In [None]:
#Prints the cost of every episode. The lower the better. Ideally it will reach the same cost or lower than the RBC: 156.88
cost

In [None]:
#Prints the actor and critic losses
fig, (plot1, plot2, plot3) = plt.subplots(1,3)
fig.set_size_inches(12,4)
plot1.plot(agents.critic1_loss_list[0],'b')
plot2.plot(agents.critic2_loss_list[0],'g')
plot3.plot(agents.actor_loss_list[0],'y')
plot1.set_xlabel('hours*iterations')
plot1.set_ylabel('Critic 1 loss')
plot2.set_xlabel('hours*iterations')
plot2.set_ylabel('Critic 2 loss')
plot3.set_xlabel('hours*iterations')
plot3.set_ylabel('Actor loss')

In [None]:
#Plots for the last 100 hours of the simulation
plt.plot(env.buildings[0].cooling_device.electrical_consumption_cooling[2400:])
plt.plot(env.buildings[0].sim_results['cooling_demand'][3500:6000].values[2400:])
plt.plot(env.buildings[0].cooling_device.cooling_supply[2400:])
plt.legend(['Electrical consumption cooling','Building cooling demand','Heat pump cooling supply'])

In [None]:
interval = range(0,8759)
plt.figure(figsize=(18,8))
plt.plot(env.net_electric_consumption[interval]+env.electric_generation[interval]-env.electric_consumption_cooling_storage[interval]-env.electric_consumption_dhw_storage[interval])
plt.plot(env.net_electric_consumption[interval]-env.electric_consumption_cooling_storage[interval]-env.electric_consumption_dhw_storage[interval])
plt.plot(env.net_electric_consumption[interval], '--')
plt.legend(['Electricity demand without storage or generation (kW)', 'Electricity demand with PV generation and without storage(kW)', 'Electricity demand using RBC for storage(kW)'])