In [21]:
import pandas as pd
import pickle
import numpy as np
import tensorflow as tf
import keras.backend as K
import tqdm, math, re, os, random, time, glob
from tqdm.keras import TqdmCallback
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, LSTM, concatenate, GRU
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow import keras
from skopt.space import Categorical, Real
from skopt.utils import use_named_args
from skopt import gp_minimize
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import numpy.ma as ma

def remove_dupes(i, o):
    for j in range(len(i)):
        for k in range(len(i)):
            if (i[j]==i[k] and j!=k):
                if (o[j]>o[k]):
                    i.pop(j)
                    o.pop(j)
                    return remove_dupes(i, o)
                else:
                    i.pop(k)
                    o.pop(k)
                    return remove_dupes(i, o)
    return i, o

def average_params(params):
    params=np.array(params)	
    best=ma.array((params[0], params[1], params[2], params[3])).mean(axis=0)	
    return best

best_error=1
bayes_inputs = []
bayes_results = []
bn=0
unitsL=[2,200]
epochsL=[20,200]
learningrateL=[0.001,0.1]
dropoutL=[0.0,0.5]
float_formatter = "{:.5f}".format
np.set_printoptions(formatter={'float_kind':float_formatter})

def descale(scaled, limits, is_int=False):
  descaled = limits[0] + (scaled*(limits[1]-limits[0]))
  if isinstance(limits[1], int):
    descaled = int(round(descaled))
  return descaled

def getOptimizer(opt,lr):  
  if opt=='RMSProp':
    myopt = optimizers.RMSprop(learning_rate=descale(lr,learningrateL))  
  elif opt=='Adam':
    myopt = optimizers.Adam(learning_rate=descale(lr,learningrateL))
  elif opt=='SGD':    
    myopt = optimizers.SGD(learning_rate=descale(lr,learningrateL))
  elif opt=='Adagrad':
    myopt = optimizers.Adagrad(learning_rate=descale(lr,learningrateL))
  elif opt=='Adadelta':
    myopt = optimizers.Adadelta(learning_rate=descale(lr,learningrateL))
  elif opt=='Adamax':
    myopt = optimizers.Adamax(learning_rate=descale(lr,learningrateL))
  elif opt=='Nadam':
    myopt = optimizers.Nadam(learning_rate=descale(lr,learningrateL))
  return myopt  

dimensions  = [Categorical(['tanh','sigmoid','linear','relu', 'elu'], name='Recurrent_Dense_activation'),
               Categorical(['tanh','sigmoid','linear','relu', 'elu'], name='Feedforward_Dense_activation_1'),
               Categorical(['tanh','sigmoid','linear','relu', 'elu'], name='Feedforward_Dense_activation_2'),
               Categorical(['tanh','sigmoid','linear','relu', 'elu'], name='Feedforward_Dense_activation_3'),
               Categorical(['tanh','sigmoid','linear','relu', 'elu'], name='Concatenated_Dense_activation'),
               Categorical(['tanh','sigmoid','linear','relu', 'elu'], name='Output_activation'),               
               Categorical(['RMSProp','Adam','SGD','Adagrad', 'Adadelta', 'Adamax', 'Nadam'], name='optimizer')]
defact = ['tanh','sigmoid','linear','relu', 'elu']
defopt =  ['RMSProp','Adam','SGD','Adagrad', 'Adadelta', 'Adamax', 'Nadam']
default_parameters = [random.choice(defact), random.choice(defact), random.choice(defact), random.choice(defact), random.choice(defact), random.choice(defact), random.choice(defopt)]
@use_named_args(dimensions=dimensions) #Combine Objective function with its search space


def compile_model(Recurrent_Dense_activation, Feedforward_Dense_activation_1, Feedforward_Dense_activation_2, Feedforward_Dense_activation_3, Concatenated_Dense_activation, Output_activation, optimizer):  
  global best_error
  Recurrent = Sequential()
  if myparams[0]>0.5:
    Recurrent.add(LSTM(descale(myparams[1], unitsL), activation='tanh', dropout=descale(myparams[2], dropoutL), recurrent_activation='sigmoid', input_shape=(trainX[0].shape[1], trainX[0].shape[2]), return_sequences=True))  
  Recurrent.add(LSTM(descale(myparams[3], unitsL), activation='tanh', dropout=descale(myparams[4], dropoutL), recurrent_activation='sigmoid', input_shape=(trainX[0].shape[1], trainX[0].shape[2])))  
  Recurrent.add(Flatten())
  if myparams[5]>0.5:
    Recurrent.add(Dense(descale(myparams[6], unitsL), activation=Recurrent_Dense_activation))
    Recurrent.add(Dropout(descale(myparams[7], dropoutL)))      
  FeedForward = Sequential()
  FeedForward.add(Dense(descale(myparams[8], unitsL), input_shape=(trainX[1].shape[1],), activation=Feedforward_Dense_activation_1))
  FeedForward.add(Dropout(descale(myparams[9], dropoutL)))
  if myparams[10]>0.5:
    FeedForward.add(Dense(descale(myparams[11], unitsL), activation=Feedforward_Dense_activation_2))
    FeedForward.add(Dropout(descale(myparams[12], dropoutL)))
  if myparams[13]>0.5:
    FeedForward.add(Dense(descale(myparams[14], unitsL), activation=Feedforward_Dense_activation_3))
    FeedForward.add(Dropout(descale(myparams[15], dropoutL)))

  conc = concatenate([Recurrent.output, FeedForward.output])
  flat = Flatten()(conc)
  if myparams[16]>0.5:
    dense = Dense(descale(myparams[17], unitsL), activation=Concatenated_Dense_activation)(flat)
    dropout = Dropout(descale(myparams[18], dropoutL))(dense)
    prediction = Dense(1, activation=Output_activation)(dropout)
  else:
    prediction = Dense(1, activation=Output_activation)(flat)
  model = Model([Recurrent.input, FeedForward.input], prediction) 

  callback=tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001, restore_best_weights=True)
  mymetrics=['mean_squared_error']

  model.compile(loss='mean_absolute_error', metrics=mymetrics, optimizer=getOptimizer(optimizer,descale(myparams[19], learningrateL)))
  history = model.fit(trainX, trainY, epochs=100, batch_size=320, verbose=0, validation_split=0.2, shuffle=False, callbacks=[callback])
  loss, mse = model.evaluate(testX, testY, verbose=0)
  if (loss<best_error):
    files = glob.glob(home_folder+'models/*')
    for f in files:
      os.remove(f)
    best_error = loss
    model.save(home_folder+'models/Model_'+str(loss)+'.h5')
  return loss


def eval_model(params):
	global best_error, dataset, myparams, bn, bayes_inputs, bayes_results, bayes_time, bt
	calls=3
	myparams=params	
	if not bayes_inputs:
		bayes = gp_minimize(func=compile_model, dimensions=dimensions, acq_func='EI', n_calls=calls, n_random_starts=1, x0=default_parameters, model_queue_size=1)
	else:
		bayes = gp_minimize(func=compile_model, dimensions=dimensions, acq_func='EI', n_calls=calls, n_random_starts=1, x0=bayes_inputs, y0=bayes_results, model_queue_size=1)
	bn=bn+1	
	bayes_inputs.extend(bayes.x_iters)
	bayes_results.extend(bayes.func_vals) 		
	keep=calls*bn
	bayes_inputs=bayes_inputs[-keep:]
	bayes_results=bayes_results[-keep:]
	bayes_inputs, bayes_results=remove_dupes(bayes_inputs, bayes_results)
	pars=bayes_inputs[bayes_results.index(min(bayes_results[-calls:]))]
	error=min(bayes_results[-calls:])
	print(round(bayes_results[bayes_results.index(min(bayes_results[-calls:]))],4))
	return error

def Evolution_Strategy(top_n = 5, n_pop = 20, n_iter = 10, sigma_error = 0.15, error_weight = 1, decay_rate = 0.95, min_error_weight = 0.01 ):
 # Model weights have been randomly initialized at first
 params = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]

 best_params = params
 model_evaluation_values="" 
 i=0
 if os.path.exists(home_folder+'state.pkl'):
    print('Found a previously saved state')
    with open(home_folder+'state.pkl', 'rb') as handle:
        state = pickle.load(handle)
    model_evaluation_values=state['values']
    i=state['iteration']
    best_params=σ['best_params']
    print('resuming from iteration', i)
 while i <n_iter:
 # Generating the population of parameters
	 print(time.time() - timezero, 'gen ', i)
	 pop_params = [best_params + error_weight*sigma_error*np.random.randn(*np.shape(params)) for i in range(n_pop)]
	 pop_params = [x.clip(0, 1)  for x in pop_params] 
 # Evaluating the population of parameters
	 evaluation_values=[eval_model(parameters) for parameters in pop_params]
	 average=np.average(evaluation_values)
	 for element in evaluation_values:
		 model_evaluation_values=model_evaluation_values+str(element)+","
	 model_evaluation_values=model_evaluation_values+("\b\n")
 # Sorting based on evaluation score
	 param_eval_list = zip(evaluation_values, pop_params) 
	 param_eval_list = sorted(param_eval_list, key = lambda x: x[0], reverse = False)
	 evaluation_values, pop_params = zip(*param_eval_list) 
 # Taking the mean of the elite parameters 
	 best_params = average_params(pop_params[:top_n])
 # Decaying the weight
	 error_weight = max(error_weight*decay_rate, min_error_weight) 
	 params = best_params
	 i+=1
	 state={}
	 state['values']=model_evaluation_values
	 state['iteration']=i
	 state['best_params']=best_params
	 state_f = open(home_folder+'state.pkl',"wb")
	 pickle.dump(state,state_f)
	 state_f.close()
	 f = open(home_folder+"EvolutionStrategyConvergence.csv", "w")
	 f.write(model_evaluation_values)
	 f.close()
 print(model_evaluation_values)
 os.remove(home_folder+'state.pkl')
 return best_params, param_eval_list[0][0], average # Instantiating our model object

In [23]:
#expects a trainX list which consists of the numpy RNN input (samples, timesteps, features) and the numpy Feedforward input (samples, features)
#same goes for testX
#trainY and testY should contain a single output (samples, output)
#home_folder is used to save the best model, the models' results and the state at the start of each iteration as a checkpoint so it can resume if needed
home_folder='./results/'
best_params, best_periteration, average_periteration= Evolution_Strategy(top_n = 4, n_pop = 10, n_iter = 10)
print(time.time() - timezero, 'algorithm finished')