In [1]:
from buildingenv import *
from ppoagent import *
from create_env import *
from datapreprocess import *
from plotutils import *
import os
from modelretrain import *

Using TensorFlow backend.


In [2]:
# Specifying logging location & Parameters
weekstart = 0  # create 12 week train and 1 week test data set
slicepoint = 12
weekend = 13
# specify logging directory
logdir = './rl_results/'
# clear old files
try:
    os.mkdir(logdir)
except FileExistsError:
    files = os.listdir(logdir)
    for f in files:
        os.remove(logdir + f)

In [3]:
traindata = 'RL_relearn_dataV2.pkl'
# chunk data frame into weeks
dfchunks, spacelb, spaceub = datachunks(traindata, period=1, lag=-1, outputcolumn= 'TotalE',
                                        subsequence=True, smoothing=True, days=7, Wn =0.02)

In [4]:
spacelb, spaceub

([15.8, 16.982016, 0.0, 54.25], [97.2, 96.0, 1033.0, 87.45])

In [5]:
num_steps=2040  # training over 12 weeks or approximately 3 months for 5 times
episode_length = int(10080 / 5)
train_X, train_y, test_X, test_y, train_df, test_df = \
    dflist2array(dfchunks, weekstart, slicepoint, weekend, scaling=True)  # select data

In [6]:
env = Env(train_df, test_df, spacelb, spaceub, modelpath='weights.best.hdf5',
          episodelength=episode_length)   # instantiating the environment
env = wrap_env(env, logdir)  # wrapping environment for logging information
agent = get_agent(env)  # Instantiating the agent for learning the control policy

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use keras.layers.flatten instead.


In [7]:
trained_model = train_agent(agent, env, steps=num_steps)  # train agent

---------------------------------------
| approxkl           | 1.8593579e-05  |
| clipfrac           | 0.0            |
| explained_variance | 0.0208         |
| fps                | 65             |
| n_updates          | 1              |
| policy_entropy     | 1.4198767      |
| policy_loss        | -0.00088913017 |
| serial_timesteps   | 128            |
| time_elapsed       | 0              |
| total_timesteps    | 128            |
| value_loss         | 941.88135      |
---------------------------------------
---------------------------------------
| approxkl           | 5.784424e-06   |
| clipfrac           | 0.0            |
| explained_variance | -0.0264        |
| fps                | 321            |
| n_updates          | 2              |
| policy_entropy     | 1.4213961      |
| policy_loss        | -0.00077724294 |
| serial_timesteps   | 256            |
| time_elapsed       | 1.95           |
| total_timesteps    | 256            |
| value_loss         | 932.174        |


In [8]:
# save fixed agent weights
trained_model.save(logdir+'fixedweights.pkl')
# save updating agent weights
trained_model.save(logdir+'updating_weights_week{}.pkl'.format(weekend))

In [9]:
# necessary steps to adjust the env for testing
env.env_method("testenv")
env.env_method("reset")

# results for updated controller
test_perf_log = test_agent(logdir+'updating_weights_week{}.pkl'.format(weekend), env)  # do testing

rl_perf_save(test_perf_log, logdir + 'Week' + str(weekend) + 'updated_')  # Store performance of updated control

# necessary steps to adjust the env for testing
env.env_method("testenv")
env.env_method("reset")

# results for fixed controller
test_perf_log = test_agent(logdir+'fixedweights.pkl', env)  # do testing

rl_perf_save(test_perf_log, logdir + 'Week' + str(weekend) + 'fixed_')  # Store performance of fixed control

in callback
rl_energy
2016
old_energy
2016
oat
2016
dat
2016
episode
1
terminal_observation
1
in callback
rl_energy
2016
old_energy
2016
oat
2016
dat
2016
episode
1
terminal_observation
1


In [11]:
int(0.68*len(dfchunks))

15

In [12]:
# Now we run a loop where we retrain the agent and compare it with fixed agent performance
weekstart = 8  # for 1 month retraining periods
common = True
while weekend<int(0.68*len(dfchunks)):

    # advance 1 week
    weekstart += 1
    slicepoint += 1
    weekend += 1

    # create new data
    train_X, train_y, test_X, test_y, train_df, test_df = \
        dflist2array(dfchunks, weekstart, slicepoint, weekend, scaling=True)  # select data
    num_steps = 5000  # 4 weeks for 5 times

    # continue LSTM model training
    lstm = load_model('weights.best.hdf5')
    retrain(lstm, train_X, train_y, test_X, test_y, epochs = 5)

    # create the HVAC environment with new data
    env = Env(train_df, test_df, spacelb, spaceub, modelpath='weights.best.hdf5')  # instantiating the environment
    env = wrap_env(env, logdir)  # wrapping environment for logging information

    # do training
    trained_model = train_agent(agent, env, steps=num_steps)
    # save updating agent weights
    trained_model.save(logdir + 'updating_weights_week{}.pkl'.format(weekend))

    # necessary steps to adjust the env for testing
    env.env_method("testenv")

    # results for updated controller
    test_perf_log = test_agent(logdir + 'updating_weights_week{}.pkl'.format(weekend), env)  # do testing

    rl_perf_save(test_perf_log, logdir + 'Week' + str(weekend) + 'updated_')  # Store performance of updated control

    # necessary steps to adjust the env for testing
    env.env_method("testenv")

    # results for fixed controller
    test_perf_log = test_agent(logdir + 'fixedweights.pkl', env)  # do testing

    rl_perf_save(test_perf_log, logdir + 'Week' + str(weekend) + 'fixed_')  # Store performance of fixed control


Epoch 00001: val_loss improved from inf to 0.04375, saving model to weights.best.hdf5

Epoch 00002: val_loss did not improve from 0.04375

Epoch 00003: val_loss did not improve from 0.04375

Epoch 00004: val_loss did not improve from 0.04375

Epoch 00005: val_loss did not improve from 0.04375
--------------------------------------
| approxkl           | 5.877785e-06  |
| clipfrac           | 0.0           |
| explained_variance | -0.0716       |
| fps                | 98            |
| n_updates          | 1             |
| policy_entropy     | 1.4141568     |
| policy_loss        | -0.0004695783 |
| serial_timesteps   | 128           |
| time_elapsed       | 0             |
| total_timesteps    | 128           |
| value_loss         | 842.71106     |
--------------------------------------
---------------------------------------
| approxkl           | 8.9537876e-05  |
| clipfrac           | 0.0            |
| explained_variance | -0.00839       |
| fps                | 291            

2016 timesteps
Best mean reward: -inf - Last mean reward per episode: -5887.83
Saving new best model
---------------------------------------
| approxkl           | 1.8909126e-05  |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.02e+03       |
| ep_reward_mean     | -5.89e+03      |
| explained_variance | 0.00769        |
| fps                | 304            |
| n_updates          | 17             |
| policy_entropy     | 1.4027319      |
| policy_loss        | -0.00083641696 |
| serial_timesteps   | 2176           |
| time_elapsed       | 7.62           |
| total_timesteps    | 2176           |
| value_loss         | 833.0967       |
---------------------------------------
---------------------------------------
| approxkl           | 5.022195e-06   |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.02e+03       |
| ep_reward_mean     | -5.89e+03      |
| explained_variance | -0.00268       |
| fps                | 290            |
| n_updates        

--------------------------------------
| approxkl           | 3.4271965e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.02e+03      |
| ep_reward_mean     | -5.89e+03     |
| explained_variance | 5.35e-05      |
| fps                | 318           |
| n_updates          | 31            |
| policy_entropy     | 1.3886049     |
| policy_loss        | -0.0009397131 |
| serial_timesteps   | 3968          |
| time_elapsed       | 13.4          |
| total_timesteps    | 3968          |
| value_loss         | 786.9265      |
--------------------------------------
--------------------------------------
| approxkl           | 2.8964338e-05 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.02e+03      |
| ep_reward_mean     | -5.7e+03      |
| explained_variance | -0.000454     |
| fps                | 318           |
| n_updates          | 32            |
| policy_entropy     | 1.3868461     |
| policy_loss        | -0.0011802244 |
| serial_timesteps   | 40

--------------------------------------
| approxkl           | 3.978364e-06  |
| clipfrac           | 0.0           |
| explained_variance | 0.0014        |
| fps                | 293           |
| n_updates          | 6             |
| policy_entropy     | 1.380697      |
| policy_loss        | -0.0003265285 |
| serial_timesteps   | 768           |
| time_elapsed       | 3.58          |
| total_timesteps    | 768           |
| value_loss         | 995.0216      |
--------------------------------------
---------------------------------------
| approxkl           | 3.169339e-05   |
| clipfrac           | 0.0            |
| explained_variance | -8.39e-05      |
| fps                | 296            |
| n_updates          | 7              |
| policy_entropy     | 1.3792865      |
| policy_loss        | -0.00084630493 |
| serial_timesteps   | 896            |
| time_elapsed       | 4.01           |
| total_timesteps    | 896            |
| value_loss         | 920.0006       |
-------------

--------------------------------------
| approxkl           | 4.355894e-05  |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.02e+03      |
| ep_reward_mean     | -5.71e+03     |
| explained_variance | -0.000234     |
| fps                | 311           |
| n_updates          | 22            |
| policy_entropy     | 1.3542426     |
| policy_loss        | -0.0009809879 |
| serial_timesteps   | 2816          |
| time_elapsed       | 10.6          |
| total_timesteps    | 2816          |
| value_loss         | 740.77        |
--------------------------------------
---------------------------------------
| approxkl           | 7.371032e-05   |
| clipfrac           | 0.0            |
| ep_len_mean        | 2.02e+03       |
| ep_reward_mean     | -5.71e+03      |
| explained_variance | 0.0007         |
| fps                | 311            |
| n_updates          | 23             |
| policy_entropy     | 1.3544396      |
| policy_loss        | -0.00057303545 |
| serial_timest

-------------------------------------
| approxkl           | 2.720878e-05 |
| clipfrac           | 0.0          |
| ep_len_mean        | 2.02e+03     |
| ep_reward_mean     | -5.56e+03    |
| explained_variance | 6.91e-06     |
| fps                | 288          |
| n_updates          | 36           |
| policy_entropy     | 1.3339227    |
| policy_loss        | -0.000294731 |
| serial_timesteps   | 4608         |
| time_elapsed       | 16.6         |
| total_timesteps    | 4608         |
| value_loss         | 848.7104     |
-------------------------------------
--------------------------------------
| approxkl           | 1.7029059e-06 |
| clipfrac           | 0.0           |
| ep_len_mean        | 2.02e+03      |
| ep_reward_mean     | -5.56e+03     |
| explained_variance | -7.41e-05     |
| fps                | 284           |
| n_updates          | 37            |
| policy_entropy     | 1.3340434     |
| policy_loss        | -0.0003143425 |
| serial_timesteps   | 4736          |
|

In [13]:
#individual trial plots
readfolderloc = logdir
writefolderloc = logdir

In [15]:
for i in range(13,15):
    week_to_compare = i
    rl_energy_comparev2(readfolderloc+'Week'+str(week_to_compare)+'updated_old_energy.txt',
                  readfolderloc+'Week'+str(week_to_compare)+'updated_oat.txt',
                  readfolderloc+'Week'+str(week_to_compare)+'updated_rl_energy.txt', 
                    readfolderloc+'Week'+str(week_to_compare)+'fixed_rl_energy.txt',
                  writefolderloc,
                   week=str(week_to_compare))
    oat_vs_controlv2(readfolderloc+'Week'+str(week_to_compare)+'updated_dat.txt',
                 readfolderloc+'Week'+str(week_to_compare)+'fixed_dat.txt',
               readfolderloc+'Week'+str(week_to_compare)+'updated_oat.txt',
               writefolderloc,
                   week=str(week_to_compare))

## Execute this on completing several trials

In [None]:
combinedenergyplot(logdirlist,
                        13, 
                        15,
                        './rl_perf_plots/')

In [None]:
combinedtempplot(logdirlist,
                        13, 
                        51,
                        './rl_perf_plots/')

In [None]:
aggregatebarplot(logdirlist,
                        13, 
                        15,
                        './rl_perf_plots/')