In [1]:
#from google.colab import drive
#drive.mount('/gdrive')
#%cd /gdrive

# This will prompt for authorization.
#drive.mount('/content/drive')

In [2]:
# Testing the AI
# Installing Keras
# conda install -c conda-forge keras

# Importing the libraries and the other python files
import os
import numpy as np
import random as rn
from keras.models import load_model 
import step1_env as environment
import pandas as pd
import step2_buildingbrain as brain
import step3_dqn_rl_algo as dqn
import pandas as pd

In [3]:
# set a seed value
seed_value = 314159

In [4]:
# Set random seeds to the same value 
# 1. Set `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)
# 2. Set `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)
# 3. Set `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)
# 4. Set `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.random.set_seed(seed_value)
# 5. Configure a new global `tensorflow` session
from keras import backend as K
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

In [5]:
# Comment out if you are not using a local GPU
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [6]:
# Comment out if you are not using a local GPU
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [7]:
## Create a list of hyper parameter values to be tested
h_epsilon = [0.1,0.3,0.6,0.9]
h_learning_rate = [0.000005,0.00001,0.00005]
h_early_stopping = [True,False]
h_epoch = [50,100,150]

In [8]:
#Create list of hyper parmeter values to try that will cover all combinations
grid_search = []
for e in h_epsilon:
    for lr in h_learning_rate:
        for es in h_early_stopping:
            for ep in h_epoch:
                grid_search.append({'epsilon':e,'learning_rate':lr,'number_epochs':ep,'early_stopping':es})

In [9]:
results = []

In [10]:
# Define the timesteps for each episode 2 days was used for hyper parameter tuning.
time_steps = 60*24*2
for parameter in grid_search:
	# Training the AI
	# Installing Keras
	# conda install -c conda-forge keras

	# Importing the libraries and the other python files


	# Setting seeds for reproducibility 

	os.environ['PYTHONHASHSEED'] = '0'
	np.random.seed(42)
	rn.seed(12345)


	# SETTING THE PARAMETERS
	run = 'model' ## name for the run. Used to save the model file.
	epsilon = parameter['epsilon']
	learning_rate = parameter['learning_rate']
	number_actions = 5
	direction_boundary = (number_actions - 1) / 2
	number_epochs = parameter['number_epochs']
	max_memory = 3000 
	batch_size = 512 
	temperature_step = 1.5
	early_stopping = parameter['early_stopping']

	# Creating a string that will be used to store the output of each run. 
	run = 'e_'+ str(epsilon) +'_lr_' + str(learning_rate) + '_ep_' + str(number_epochs) + '_es_' + str(early_stopping)

	# BUILDING THE ENVIRONMENT BY SIMPLY CREATING AN OBJECT OF THE ENVIRONMENT CLASS
	env = environment.Environment(optimal_temperature = (18.0, 24.0),
								initial_month = 0,
								initial_number_users = 20,
								initial_rate_data = 30)

	# BUILDING THE BRAIN BY SIMPLY CREATING AN OBJECT OF THE BRAIN CLASS
	import step2_buildingbrain as brain
	brain = brain.Brain(learning_rate = learning_rate, number_actions = number_actions)


	# BUILDING THE DQN MODEL BY SIMPLY CREATING AN OBJECT OF THE DQN CLASS
	import step3_dqn_rl_algo as dqn
	dqn = dqn.DQN(max_memory = max_memory, discount = 0.9)

	# CHOOSING THE MODE
	train = True

	# TRAINING THE AI
	env.train = train
	model = brain.model

	patience = 10
	best_total_reward = -np.inf
	patience_count = 0


	if (env.train):
		# STARTING THE LOOP OVER ALL THE EPOCHS (1 Epoch = 5 Months)
		for epoch in range(1, number_epochs):
			# INITIALIAZING ALL THE VARIABLES OF BOTH THE ENVIRONMENT AND THE TRAINING LOOP
			total_reward = 0
			loss = 0.
			new_month = np.random.randint(0, 12)
			env.reset(new_month = new_month)
			game_over = False
			current_state, _, _ = env.observe()
			timestep = 0

			# STARTING THE LOOP OVER ALL THE TIMESTEPS (1 Timestep = 1 Minute) IN ONE EPOCH
			while ((not game_over) and timestep <= time_steps):

				# PLAYING THE NEXT ACTION BY EXPLORATION
				if np.random.rand() <= epsilon:
					action = np.random.randint(0, number_actions)
					if (action - direction_boundary < 0): 
						direction = -1
					else:
						direction = 1
					energy_ai = abs(action - direction_boundary) * temperature_step 


				# PLAYING THE NEXT ACTION BY INFERENCE

				else:
					q_values = model.predict(current_state) 
					action = np.argmax(q_values[0])
					if (action - direction_boundary < 0): 
						direction = -1
					else:
						direction = 1
					energy_ai = abs(action - direction_boundary) * temperature_step 


				# UPDATING THE ENVIRONMENT AND REACHING THE NEXT STATE
				next_state, reward, game_over = env.update_env(direction, energy_ai, int(timestep / (30*24*60)))


				total_reward += reward
				# STORING THIS NEW TRANSITION INTO THE MEMORY

				dqn.remember([current_state, action, reward, next_state], game_over)



				# GATHERING IN TWO SEPARATE BATCHES THE INPUTS AND THE TARGETS

				inputs, targets = dqn.get_batch(model, batch_size = batch_size)

				# COMPUTING THE LOSS OVER THE TWO WHOLE BATCHES OF INPUTS AND TARGETS
				loss += model.train_on_batch(inputs, targets)
				timestep += 1
				current_state = next_state


			# PRINTING THE TRAINING RESULTS FOR EACH EPOCH
			#print("\n")
			print(run)
			print("Epoch: {:03d}/{:03d}".format(epoch, number_epochs))
			print("Total Energy spent with an AI: {:.0f}".format(env.total_energy_ai))
			print("Total Energy spent with no AI: {:.0f}".format(env.total_energy_noai)) 

			# EARLY STOPPING
			if (early_stopping):
				if (total_reward <= best_total_reward):
					patience_count += 1
				elif (total_reward > best_total_reward):
					best_total_reward = total_reward
					patience_count = 0
			
			if (patience_count >= patience):
				print("Early Stopping")
				break
	## TESTING THE MODEL
	
	# Setting seeds for reproducibility
	os.environ['PYTHONHASHSEED'] = '0' 
	
	# Note from BREMNER: Should be different random seeds from above in 
	# order to test a different random draw during testing
	
	np.random.seed(42)
	rn.seed(12345)
	
	# SETTING THE PARAMETERS
	number_actions = 5
	direction_boundary = (number_actions - 1) / 2
	temperature_step = 1.5

	# BUILDING THE ENVIRONMENT BY SIMPLY CREATING AN OBJECT OF THE ENVIRONMENT CLASS
	env = environment.Environment(optimal_temperature = (18.0, 24.0), initial_month = 0,
									initial_number_users = 20, initial_rate_data = 30)

	# CHOOSING THE MODE
	train = False
	env.train = train
	current_state, _, _ = env.observe()
	run_results = []
	for timestep in range(0, time_steps):
		q_values = model.predict(current_state)
		action = np.argmax(q_values[0])
		
		if (action - direction_boundary < 0): 
			direction = -1
		else:
			direction = 1

		energy_ai = abs(action - direction_boundary) * temperature_step 
		next_state, reward, game_over = env.update_env(direction, \
														energy_ai, \
														int(timestep / (30*24*60)))

		current_state = next_state
		print("\n")
		print(timestep)
		print("Total Energy spent with an AI: {:.0f}".format(env.total_energy_ai)) 
		print("Total Energy spent with no AI: {:.0f}".format(env.total_energy_noai))
		print("ENERGY SAVED: {:.0f} %".format(100*(env.total_energy_noai - env.total_energy_ai)
											/ env.total_energy_noai))
		# Save the results to a list that will then be passed to a dataframe.
		run_results.append({
			'timestep':timestep,
			'total_energy_ai':env.total_energy_ai,
			'total_energy_noai':env.total_energy_noai,
			'% Savings': (env.total_energy_noai - env.total_energy_ai)/env.total_energy_noai,
			'intrinsic_temp' : env.intrinsic_temperature,
			'temperature_ai' : env.temperature_ai,
			'temperature_noai': env.temperature_noai,
			'direction' : direction,
			'energy_ai' : energy_ai})
	results.append(run_results[-1]['% Savings'])
	# Save the results and the model file for the run
	df = pd.DataFrame(run_results)
	df.to_csv(run +".csv")
	model.save(run+".h5")
	print("\n")
	print(run)
	print('Savings: ' + str(run_results[-1]['% Savings']))

t with no AI: 12724
ENERGY SAVED: 48 %


2679
Total Energy spent with an AI: 6608
Total Energy spent with no AI: 12726
ENERGY SAVED: 48 %


2680
Total Energy spent with an AI: 6609
Total Energy spent with no AI: 12732
ENERGY SAVED: 48 %


2681
Total Energy spent with an AI: 6611
Total Energy spent with no AI: 12733
ENERGY SAVED: 48 %


2682
Total Energy spent with an AI: 6612
Total Energy spent with no AI: 12734
ENERGY SAVED: 48 %


2683
Total Energy spent with an AI: 6614
Total Energy spent with no AI: 12736
ENERGY SAVED: 48 %


2684
Total Energy spent with an AI: 6615
Total Energy spent with no AI: 12740
ENERGY SAVED: 48 %


2685
Total Energy spent with an AI: 6617
Total Energy spent with no AI: 12740
ENERGY SAVED: 48 %


2686
Total Energy spent with an AI: 6618
Total Energy spent with no AI: 12741
ENERGY SAVED: 48 %


2687
Total Energy spent with an AI: 6621
Total Energy spent with no AI: 12746
ENERGY SAVED: 48 %


2688
Total Energy spent with an AI: 6623
Total Energy spent with no 

In [11]:
# Save the summary results of the entire grid search
df=pd.DataFrame(grid_search)
df['Results'] = results
df.to_csv('hyper_parameter_tuning_results.csv')