In [1]:
#!pip install deap

In [2]:
import os

import tensorflow as tf
import numpy as np
import random as rn

sd = 7 # Here sd means seed.

def reset_random_seeds():
  os.environ['PYTHONHASHSEED']=str(sd)
  np.random.seed(sd)
  rn.seed(sd)
  tf.random.set_seed(sd)


In [3]:
from deap import base, creator, tools, algorithms
from scipy.stats import randint
import math
import time
import pickle
import random
import operator
import pandas as pd

import tensorflow.keras.backend as K
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

ModuleNotFoundError: No module named 'deap'

In [None]:
pred_size = 24

def load_dataset(name=''):
    dataframe = pd.read_csv(name)
    dataframe.set_index('date', inplace=True)
    print('Features:', [i for i in dataframe.columns])
    print('Range: ', dataframe.index[0]," ~ ",dataframe.index[-1])
    return dataframe

def sel_scal_dataset(dataset, features, num_pred=1):
    n = len(features)
    print("Longitud del dataset:", len(dataset))
    dataset = dataset[features].to_numpy()
    dataset = dataset.astype('float32')

    # normalize the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)
    num_samples = len(dataset)/num_pred
    test_size = int(num_samples * 0.1)*num_pred
    num_samples = (len(dataset) - test_size)/num_pred
    train_size = int(num_samples*0.75)*num_pred
    valid_size =  (len(dataset) - test_size) - train_size
    train, valid, test = dataset[0:train_size, :], dataset[train_size:train_size+valid_size, :], dataset[train_size+valid_size:len(dataset), :]
    
    print("Elementos del conjunto de entrenamiento: {}".format(train_size))
    print("Elementos del conjunto de validacion: {}".format(valid_size))
    print("Elementos del conjunto de prueba: {}".format(test_size))

    return train, valid, test, scaler, n

def prepare_dataset(dataset, n, window_size=1, window_pred=1, step=1, pred_24=False):
    dataX, dataY = [], []
    offset = 0
    if pred_24:
      while((offset+window_size)%24 != 0):
        offset += 1
    else:
      step = 1 # Verificar

    if n > 1:
      for i in range(offset, len(dataset) - window_size - window_pred, step):
        aux_after_window = []
        window = dataset[i:(i + window_size)]
        j = i
        aux_after_window = [[j] for j in dataset[(j + window_size):(j + window_size + window_pred), 0]]
        after_window = aux_after_window
        dataX.append(window)
        dataY.append(after_window)
    
    else:
      for i in range(offset, len(dataset) - window_size - window_pred, step):
        window = dataset[i:(i + window_size)]
        after_window = dataset[(i + window_size):(i + window_size + window_pred)]
        dataX.append(window)
        dataY.append(after_window)

    return np.asarray(dataX), np.asarray(dataY)


def evaluationLSTM(chromosome):
  eval_look_back = chromosome[0]
  eval_num_units = chromosome[1]
  eval_neurons_1 = chromosome[2]

  trainX, trainY = prepare_dataset(train, n, eval_look_back, pred, step=24, pred_24=True)
  validX, validY = prepare_dataset(valid, n, eval_look_back, pred, step=24, pred_24=True)
  testX, testY = prepare_dataset(test, n, eval_look_back, pred, step=24, pred_24=True)
  
  reset_random_seeds()
  model = Sequential([CuDNNLSTM(eval_num_units, input_shape=(eval_look_back, n)),
                        Dense(eval_neurons_1, activation='relu'),
                        Dense(pred_size, activation='linear')])

  model.compile(loss="mean_squared_error", optimizer="Adam")

  start = time.time()
  hist = model.fit(trainX, trainY, epochs=100, shuffle=True, batch_size=50, validation_data=(validX, validY),
                     callbacks=[EarlyStopping(monitor='val_loss', patience=30)], verbose=0)

  end = time.time()
  print('Time training:', end-start)

  model.summary()
  testPredict = model.predict(testX)
  # Calculate the RMSE score as fitness score for GA
  rmse = np.sqrt(mean_squared_error(testY[:, :, 0], testPredict))
  print('Validation RMSE: ', rmse,'\n')
  return rmse, 

def initIndividual(min_max_list):
  individual = []
  for i in min_max_list:
    individual.append(randint.rvs(i[0], i[1]))

  return individual

In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [None]:
pred = 24
df = load_dataset('dataset_09-17NEW.csv')
df["Weekday"] = (df["Weekday Name"] > 5).astype(int)
df["SIN_past"] = df["SIN"].shift(periods=(pred*7))
df["temp_t1"] = df["Temperatura.Asuncion"].shift(periods=-pred)
df["Year_t1"] = df["Year"].shift(periods=-pred)
df["Hour_t1"] = df["Hour"].shift(periods=-pred)
df["Month_t1"] = df["Month"].shift(periods=-pred)
df["Weekday_t1"] = df["Weekday"].shift(periods=-pred)
df = df.dropna()

train, valid, test, scaler, n = sel_scal_dataset(df["2009":"2014"], ["SIN", "Temperatura.Asuncion", "Year", "Weekday", "Month",
                                                                     "SIN_past", "temp_t1", "Year_t1", "Weekday_t1", "Month_t1"], pred)

In [None]:
gen_length = 3
looback_size = (1, 168)
num_units = (1, 100)
neurons_1 = (10, 200)

optimization_params = [looback_size, num_units, neurons_1]
lower_bound = [i[0] for i in optimization_params]
upper_bound = [i[1] for i in optimization_params]

population_size = 50
num_generations = 100

CXPB = 0.3
MUTPB = 0.2
FREQ = 1

# Se crea la clase "LSTMOptimization"
creator.create("LSTMOptimization", base.Fitness, weights=(-1.0, ))

# Se crea la clase "Individual"
creator.create("Individual", list, fitness = creator.LSTMOptimization)

#Se crea el "toolbox" para las operaciones
toolbox = base.Toolbox()
toolbox.register("initialization", initIndividual, optimization_params)
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.initialization)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

toolbox.register("mate", tools.cxUniform, indpb=0.5) # Crossover
toolbox.register("mutate", tools.mutUniformInt, low=lower_bound, up=upper_bound, indpb=0.2) # Mutation
toolbox.register("select", tools.selTournament, tournsize=3) # Selection
toolbox.register("evaluate", evaluationLSTM) # Evaluation

In [None]:
def geneticAlgorithm(checkpoint=None):
    best_ind = None
    best_fit = None
    if checkpoint:
        with open(checkpoint, "rb") as cp_file:
            cp = pickle.load(cp_file)
        population = cp["population"]
        start_gen = cp["generation"]
        halloffame = cp["halloffame"]
        logbook = cp["logbook"]
        random.setstate(cp["rndstate"])
    
    else:
        population = toolbox.population(n=population_size)
        start_gen = 0
        halloffame = tools.HallOfFame(maxsize=10)
        logbook = tools.Logbook()

    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    for gen in range(start_gen, num_generations):
        population = algorithms.varAnd(population, toolbox, cxpb=CXPB, mutpb=MUTPB)

        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in population if not ind.fitness.valid]
        fitnesses = toolbox.map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
        
        halloffame.update(population)
        record = stats.compile(population)
            
        best_ind = None
        best_fit = None
        
        for ind in population:
            if best_ind is not None:
                if operator.le(ind.fitness.values[0], best_fit):
                    best_ind = ind
                    best_fit = ind.fitness.values[0]
            
            else:
                if operator.lt(ind.fitness.values, halloffame[0].fitness.values):
                    best_ind = ind
                    best_fit = ind.fitness.values[0]
        
        if best_ind is None and best_fit is None:
            best_ind = halloffame[0]
            best_fit = halloffame[0].fitness.values[0]
        
        logbook.record(gen=gen, evals=len(invalid_ind), best_ind=best_ind, best_fit=best_fit, **record)
        population = toolbox.select(population, k=len(population))

        if gen % FREQ == 0:
            cp = dict(population=population, generation=gen, halloffame=halloffame,
                      logbook=logbook, rndstate=random.getstate())

            with open("model_1HL_FINAL.pkl", "wb") as cp_file:
                pickle.dump(cp, cp_file)

In [None]:
geneticAlgorithm()

In [None]:
file = open('model_2HLV1.pkl', 'rb')

# dump information to that file
data = pickle.load(file)

# close the file
file.close()

In [None]:
data['halloffame'].items

In [None]:
data['halloffame'].keys