In [1]:
from buildingenv import *
from ppoagent import *
from create_env import *
from datapreprocess import *
from plotutils import *
import os
from modelretrain import *

Using TensorFlow backend.


In [2]:
# Specifying logging location & Parameters
weekstart = 0  # create 12 week train and 1 week test data set
slicepoint = 12
weekend = 13
# specify logging directory
logdir = './rl_results/'
# clear old files
try:
    os.mkdir(logdir)
except FileExistsError:
    files = os.listdir(logdir)
    for f in files:
        os.remove(logdir + f)

In [3]:
traindata = 'RL_relearn_dataV2.pkl'
# chunk data frame into weeks
dfchunks, spacelb, spaceub = datachunks(traindata, period=1, lag=-1, outputcolumn= 'TotalE',
                                        subsequence=True, smoothing=True, days=7, Wn =0.02)

In [4]:
len(dfchunks)

58

In [20]:
spacelb, spaceub

([15.8, 16.982016, 0.0, 54.25], [97.2, 96.0, 1033.0, 87.45])

In [None]:
num_steps=2040  # training over 12 weeks or approximately 3 months for 5 times
episode_length = int(10080 / 5)
train_X, train_y, test_X, test_y, train_df, test_df = \
    dflist2array(dfchunks, weekstart, slicepoint, weekend, scaling=True)  # select data

In [None]:
env = Env(train_df, test_df, spacelb, spaceub, modelpath='weights.best.hdf5',
          episodelength=episode_length)   # instantiating the environment
env = wrap_env(env, logdir)  # wrapping environment for logging information
agent = get_agent(env)  # Instantiating the agent for learning the control policy

In [None]:
trained_model = train_agent(agent, env, steps=num_steps)  # train agent

In [None]:
# save fixed agent weights
trained_model.save(logdir+'fixedweights.pkl')
# save updating agent weights
trained_model.save(logdir+'updating_weights_week{}.pkl'.format(weekend))

In [None]:
# necessary steps to adjust the env for testing
env.env_method("testenv")
env.env_method("reset")

# results for updated controller
test_perf_log = test_agent(logdir+'updating_weights_week{}.pkl'.format(weekend), env)  # do testing

rl_perf_save(test_perf_log, logdir + 'Week' + str(weekend) + 'updated_')  # Store performance of updated control

# necessary steps to adjust the env for testing
env.env_method("testenv")
env.env_method("reset")

# results for fixed controller
test_perf_log = test_agent(logdir+'fixedweights.pkl', env)  # do testing

rl_perf_save(test_perf_log, logdir + 'Week' + str(weekend) + 'fixed_')  # Store performance of fixed control

In [9]:
int(0.75*len(dfchunks))

43

In [None]:
# Now we run a loop where we retrain the agent and compare it with fixed agent performance
weekstart = 8  # for 1 month retraining periods
common = True
while weekend<int(0.57*len(dfchunks)):

    # advance 1 week
    weekstart += 1
    slicepoint += 1
    weekend += 1

    # create new data
    train_X, train_y, test_X, test_y, train_df, test_df = \
        dflist2array(dfchunks, weekstart, slicepoint, weekend, scaling=True)  # select data
    num_steps = 5000  # 4 weeks for 5 times

    # continue LSTM model training
    lstm = load_model('weights.best.hdf5')
    retrain(lstm, train_X, train_y, test_X, test_y, epochs = 5)

    # create the HVAC environment with new data
    env = Env(train_df, test_df, spacelb, spaceub, modelpath='weights.best.hdf5')  # instantiating the environment
    env = wrap_env(env, logdir)  # wrapping environment for logging information

    # do training
    trained_model = train_agent(agent, env, steps=num_steps)
    # save updating agent weights
    trained_model.save(logdir + 'updating_weights_week{}.pkl'.format(weekend))

    # necessary steps to adjust the env for testing
    env.env_method("testenv")

    # results for updated controller
    test_perf_log = test_agent(logdir + 'updating_weights_week{}.pkl'.format(weekend), env)  # do testing

    rl_perf_save(test_perf_log, logdir + 'Week' + str(weekend) + 'updated_')  # Store performance of updated control

    # necessary steps to adjust the env for testing
    env.env_method("testenv")

    # results for fixed controller
    test_perf_log = test_agent(logdir + 'fixedweights.pkl', env)  # do testing

    rl_perf_save(test_perf_log, logdir + 'Week' + str(weekend) + 'fixed_')  # Store performance of fixed control

In [None]:
#individual trial plots
readfolderloc = logdir
writefolderloc = logdir

In [None]:
for i in range(13,15):
    week_to_compare = i
    rl_energy_comparev2(readfolderloc+'Week'+str(week_to_compare)+'updated_old_energy.txt',
                  readfolderloc+'Week'+str(week_to_compare)+'updated_oat.txt',
                  readfolderloc+'Week'+str(week_to_compare)+'updated_rl_energy.txt', 
                    readfolderloc+'Week'+str(week_to_compare)+'fixed_rl_energy.txt',
                  writefolderloc,
                   week=str(week_to_compare))
    oat_vs_controlv2(readfolderloc+'Week'+str(week_to_compare)+'updated_dat.txt',
                 readfolderloc+'Week'+str(week_to_compare)+'fixed_dat.txt',
               readfolderloc+'Week'+str(week_to_compare)+'updated_oat.txt',
               writefolderloc,
                   week=str(week_to_compare))

In [None]:
weeklysavings(13,15, readfolderloc, writefolderloc)

## Execute this on completing several trials

In [None]:
combinedenergyplot(logdirlist,
                        13, 
                        15,
                        './rl_perf_plots/')

In [None]:
combinedtempplot(logdirlist,
                        13, 
                        51,
                        './rl_perf_plots/')

In [None]:
aggregatebarplot(logdirlist,
                        13, 
                        15,
                        './rl_perf_plots/')