## Model generator main file

In [20]:
import numpy as np
import re
import unidecode
import os
import pickle
import sys
import csv

import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection     import GridSearchCV

import keras.backend as K
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints           import maxnorm
from keras                       import regularizers
from keras                       import Input, Model, Sequential
from keras.layers                import Flatten, TimeDistributed, CuDNNGRU, CuDNNLSTM, Bidirectional, Activation, TimeDistributed, Dense, RepeatVector, Embedding, Dropout, BatchNormalization
from keras.layers.recurrent      import LSTM, GRU, SimpleRNN
from keras.utils                 import np_utils
from keras.callbacks             import EarlyStopping, TensorBoard, ModelCheckpoint

Import data and make folders

In [2]:
data_path = 'data'
temp_path = data_path + '/temp'
backup_path = data_path + '/backup'
weights_path = data_path + '/weights'
train_history = data_path + '/train_history'

if not os.path.isdir(data_path):
    os.mkdir(data_path)
if not os.path.isdir(temp_path):
    os.mkdir(temp_path)
if not os.path.isdir(backup_path):
    os.mkdir(backup_path)
if not os.path.isdir(weights_path):
    os.mkdir(weights_path)
if not os.path.isdir(train_history):
    os.mkdir(train_history)

#joint_angle_data.pickle
with open(data_path + '/augmented_input.pickle', 'rb') as file:
    joint_angle_data = pickle.load(file)
    file.close()

#power_data.pickle
with open(data_path + '/power_data.pickle', 'rb') as file:
    power_data = pickle.load(file)
    file.close()
    
print('===== Check sizes =====')
print('joint_angle_data shape is: ')
print(joint_angle_data.shape)
print('power_data shape is: ')
print(power_data.shape)
print('First value needs to be equal!')

===== Check sizes =====
joint_angle_data shape is: 
(32501, 19)
power_data shape is: 
(32501, 1)
First value needs to be equal!


## Recurrent neural networks

Different model functions

In [3]:
def mean_square_error(y_true, y_pred):
    return K.mean(K.square(y_true-y_pred))

In [72]:
def setModelToBasicLSTM(self, multiple=False):
    n_timesteps, n_features, n_outputs = self.X_train.shape[1], self.X_train.shape[2], self.Y_train.shape[-1]
    # define model
    self.model = Sequential()
    self.model.add(Bidirectional(CuDNNLSTM(128, return_sequences = True, kernel_initializer = 'random_uniform', bias_initializer = 'zero', input_shape = (n_timesteps, n_features))))
    self.model.add(Dropout(0.2))
    self.model.add(Bidirectional(CuDNNLSTM(128, return_sequences = True, kernel_initializer = 'random_uniform', bias_initializer = 'zero')))
    self.model.add(Dropout(0.2))
    self.model.add(Bidirectional(CuDNNLSTM(128, return_sequences = True, kernel_initializer = 'random_uniform', bias_initializer = 'zero')))
    self.model.add(Dropout(0.2))
    self.model.add(Bidirectional(CuDNNLSTM(128, return_sequences = multiple, kernel_initializer = 'random_uniform', bias_initializer = 'zero')))
    self.model.add(Dropout(0.2))
    if multiple:
        self.model.add(TimeDistributed(Dense(128, activation='relu', kernel_initializer = 'random_uniform', bias_initializer = 'zero')))
        self.model.add(TimeDistributed(Dense(n_outputs)))
    else:
        self.model.add(Dense(128, activation='relu', kernel_initializer = 'random_uniform', bias_initializer = 'zero'))
        self.model.add(Dense(n_outputs))
    self.model.compile(loss='mean_absolute_percentage_error', optimizer='adam', metrics=[mean_square_error])


In [73]:
def setModelToGridSearch(self, multiple=False, layer_type=CuDNNLSTM, hidden_units_RNN=128, hidden_units_dense=64, num_mid_layers = 3, optimizer='adam', init_mode='random_uniform', activation='relu', dropout_rate=0.2, num_dense_layers=1):
    n_timesteps, n_features, n_outputs = self.X_train.shape[1], self.X_train.shape[2], self.Y_train.shape[-1]
    # define model
    self.model = Sequential()
    self.model.add(Bidirectional(layer_type(hidden_units_RNN, return_sequences = True, kernel_initializer = init_mode, bias_initializer = 'zero', input_shape = (n_timesteps, n_features))))
    for n in range(num_mid_layers):
        self.model.add(Bidirectional(layer_type(hidden_units_RNN, return_sequences = True, kernel_initializer = init_mode, bias_initializer = 'zero')))
        self.model.add(Dropout(dropout_rate))
        
    self.model.add(Bidirectional(layer_type(hidden_units_RNN, return_sequences = multiple, kernel_initializer = init_mode, bias_initializer = 'zero')))
    self.model.add(Dropout(dropout_rate))
    
    if multiple:
        for n in range(num_dense_layers):
            self.model.add(TimeDistributed(Dense(hidden_units_dense, activation=activation, kernel_initializer = 'random_uniform', bias_initializer = 'zero'))) #, kernel_constraint=maxnorm(weight_constraint)
    
        self.model.add(TimeDistributed(Dense(n_outputs)))
    else:
        for n in range(num_dense_layers):
            self.model.add(Dense(hidden_units_dense, activation=activation, kernel_initializer = 'random_uniform', bias_initializer = 'zero')) #, kernel_constraint=maxnorm(weight_constraint)
    
        self.model.add(Dense(n_outputs))
        
    self.model.compile(loss='mean_absolute_percentage_error', optimizer=optimizer, metrics=["accuracy"])
    return self.model

In [74]:
def setModelToFFNN(self):
    n_timesteps, n_features, n_outputs = self.X_train.shape[1], self.X_train.shape[2], self.Y_train.shape[1]

    inputs = Input(shape=(n_timesteps,n_features))
    x = Dense(128, activation='relu')(inputs)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    x = Dense(32, activation='relu')(x)
    prediction = Dense(n_outputs, activation='relu')(x)
    
    self.model = Model(inputs, prediction)
    self.model.compile(loss='mean_absolute_percentage_error', optimizer='adam', metrics=["mean_square_error"])


In [75]:
def setModelToFinalModel(self, multiple=False):
    n_timesteps, n_features, n_outputs = self.X_train.shape[1], self.X_train.shape[2], self.Y_train.shape[-1]
    # define model
    self.model = Sequential()
    self.model.add(Bidirectional(CuDNNLSTM(128, return_sequences = True, kernel_initializer = 'random_uniform', bias_initializer = 'zero', input_shape = (n_timesteps, n_features))))
    self.model.add(Dropout(0.2))
    self.model.add(Bidirectional(CuDNNLSTM(128, return_sequences = True, kernel_initializer = 'random_uniform', bias_initializer = 'zero')))
    self.model.add(Dropout(0.2))
    self.model.add(Bidirectional(CuDNNLSTM(128, return_sequences = True, kernel_initializer = 'random_uniform', bias_initializer = 'zero')))
    self.model.add(Dropout(0.2))
    self.model.add(Bidirectional(CuDNNLSTM(128, return_sequences = multiple, kernel_initializer = 'random_uniform', bias_initializer = 'zero')))
    self.model.add(Dropout(0.2))
    if multiple:
        self.model.add(TimeDistributed(Dense(128, activation='relu', kernel_initializer = 'random_uniform', bias_initializer = 'zero')))
        self.model.add(TimeDistributed(Dense(n_outputs)))
    else:
        self.model.add(Dense(128, activation='relu', kernel_initializer = 'random_uniform', bias_initializer = 'zero'))
        self.model.add(Dense(n_outputs))
    self.model.compile(loss='mean_absolute_percentage_error', optimizer='adam', metrics=[mean_square_error])   

Other functions

In [76]:
def series_to_supervised(dataX, dataY, n_input, n_output, multiple=False):
    #Output: [batchs, n_timesteps, n_features]
    df_X = pd.DataFrame(dataX)
    df_Y = pd.DataFrame(dataY)
    X = np.ndarray((df_X.shape[0]-n_input, n_input, df_X.shape[1]) )
    if multiple:
        Y = np.ndarray((df_Y.shape[0]-n_input, n_input, n_output))
    else:
        Y = np.ndarray((df_Y.shape[0]-n_input, n_output))

    for n in range(0,df_X.shape[0]-n_input-1,1):
        for m in range(n_input-1,-1, -1):
            X[n,m,:] = df_X.loc[n+m:n+m,:]
    if multiple:
        for n in range(0,df_Y.shape[0]-n_input-1,1):
            for m in range(n_input-1,-1, -1):
                Y[n,m,:] = df_Y.loc[n+m:n+m,:]
    else:
        for n in range(0,df_Y.shape[0]-n_input-1,1):
            for m in range(n_output-1,-1, -1):
                Y[n,:] = df_Y.loc[n+m:n+m,:]
            
    return X, Y

In [77]:
def setUpData(self, seq_length, dataX, dataY, n_test_ratio, multiple=False):

    self.X_train = []
    self.Y_train = []
    self.X_test = []
    self.Y_test = []
    self.seq_length = seq_length
    
    X, Y = series_to_supervised(dataX, dataY, multiple=multiple, n_input=seq_length, n_output=dataY.shape[1])
    
    self.X_train = X[:int(len(X)*(1-n_test_ratio)), :, :]
    self.Y_train = Y[:int(len(Y)*(1-n_test_ratio)), :]
    self.X_test = X[int(len(X)*(1-n_test_ratio)):, :, :]
    self.Y_test = Y[int(len(Y)*(1-n_test_ratio)):, :]
    

In [78]:
def trainModel(self, name):
    # File path for model
    filepath = weights_path + "/weights-" + name + "-{epoch:02d}-{loss:.4f}.hdf5"
    # Callbacks functions
    es = EarlyStopping(monitor='val_loss', patience=5, verbose=0)
    tb = TensorBoard(log_dir='./logs', histogram_freq=0, batch_size=32)
    mc = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=True, mode='auto')
    
    # Train the model
    epochs = 100           #Maximum number of epochs to run
    batch_size = 32    #Size of training data batch
    Val_split = 0.1       #Procentage of training data to use as validation data
    history = self.model.fit(self.X_train, self.Y_train, epochs=epochs, batch_size=batch_size, validation_split=Val_split, callbacks=[es, tb, mc])
    
    # Save the model
    filename = "model_" + name + ".hdf5"
    self.model.save_weights(weights_path + '/' + filename)
    # Save the history
    filename = "history_" + name + ".pickle"
    with open(train_history + '/' + filename, 'wb') as f:
        pickle.dump(history.history, f)
        f.close()

Model generator

In [79]:
class Generator:

    set_up_data = setUpData
    train_model = trainModel
    
    set_model_to_Basic_LSTM = setModelToBasicLSTM
    set_model_to_Grid_Search = setModelToGridSearch
    set_model_to_FFNN = setModelToFFNN
    set_model_to_final_model = setModelToFinalModel
    

## Training the network

Test training FFNN

In [61]:
gen = Generator()
seqLleng = 2 #12ms/leng unit
n_test_ratio = 0.02
print((power_data.shape))
gen.set_up_data(seqLleng, joint_angle_data, power_data, n_test_ratio, multiple=False)
print(gen.X_train.shape)
print(gen.Y_train.shape)
print(gen.X_test.shape)
print(gen.Y_test.shape)
gen.set_model_to_FFNN()
name = 'test_run_FFNN'
gen.train_model(name = name)

(32501, 1)


KeyboardInterrupt: 

Test training Basic LSTM

In [80]:
gen = Generator()
seqLleng = 25 #12ms/leng unit
n_test_ratio = 0.02
multiple = True
print((power_data.shape))
gen.set_up_data(seqLleng, joint_angle_data, power_data, n_test_ratio, multiple=multiple)
print(gen.X_train.shape)
print(gen.Y_train.shape)
print(gen.X_test.shape)
print(gen.Y_test.shape)
gen.set_model_to_Basic_LSTM(multiple=multiple)
name = 'test_run_Basic_LSTM'
gen.train_model(name = name)

(32501, 1)
(31826, 25, 19)
(31826, 25, 1)
(650, 25, 19)
(650, 25, 1)
Train on 28643 samples, validate on 3183 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100


Hyperparamter Grid Search

In [39]:
gen_gs = Generator()
# Variables
seqLength = 2 #12ms/leng unit
n_test_ratio = 0.02
multiple = False
# Set up the data
print('Setting up data')
gen_gs.set_up_data(seqLength, joint_angle_data, power_data, n_test_ratio, multiple=multiple)
# create model
print('Creating keras Classifier')
epochs = 1
batch_size = 32
model = KerasClassifier(build_fn=gen_gs.set_model_to_Grid_Search, epochs=epochs, batch_size=batch_size, verbose=1)
# define the grid search parameters
multiple = [multiple]
layer_type = [CuDNNLSTM, CuDNNGRU]
hidden_units_RNN = [64, 128]
hidden_units_dense = [64, 128]
num_dense_layers = [1, 2]
num_mid_layers = [0, 2, 4]
#optimizer = ['adam']
#learn_rate = [0.01]
#momentum = [0]
#init_mode = ['random_uniform']
#activation = ['relu']
dropout_rate = [0, 0.2, 0,4]
#weight_constraint = [0]
# Make dictionary
param_grid = dict(multiple=multiple,
                  layer_type=layer_type,
                  hidden_units_dense=hidden_units_dense,
                  hidden_units_RNN=hidden_units_RNN,
                  num_mid_layers=num_mid_layers,
                  dropout_rate=dropout_rate,
                  num_dense_layers=num_dense_layers)
# Grid Search
print('Grid Search Starting')
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
print('X: ' + str(gen_gs.X_train.shape))
print('Y: ' + str(gen_gs.Y_train.shape))
grid_result = grid.fit(gen_gs.X_train, gen_gs.Y_train)

Setting up data
Creating keras Classifier
Grid Search Starting
X: (31849, 2, 19)
Y: (31849, 1)
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


## Prediction Results

Results from Grid Search

In [40]:
# Summarize the results from the Grid Search
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.000314 using {'dropout_rate': 0.2, 'hidden_units_RNN': 128, 'hidden_units_dense': 128, 'layer_type': <class 'keras.layers.cudnn_recurrent.CuDNNLSTM'>, 'multiple': False, 'num_dense_layers': 1, 'num_mid_layers': 2}
0.000157 (0.000222) with: {'dropout_rate': 0.2, 'hidden_units_RNN': 128, 'hidden_units_dense': 128, 'layer_type': <class 'keras.layers.cudnn_recurrent.CuDNNLSTM'>, 'multiple': False, 'num_dense_layers': 1, 'num_mid_layers': 1}
0.000314 (0.000222) with: {'dropout_rate': 0.2, 'hidden_units_RNN': 128, 'hidden_units_dense': 128, 'layer_type': <class 'keras.layers.cudnn_recurrent.CuDNNLSTM'>, 'multiple': False, 'num_dense_layers': 1, 'num_mid_layers': 2}
0.000157 (0.000222) with: {'dropout_rate': 0.2, 'hidden_units_RNN': 128, 'hidden_units_dense': 128, 'layer_type': <class 'keras.layers.cudnn_recurrent.CuDNNGRU'>, 'multiple': False, 'num_dense_layers': 1, 'num_mid_layers': 1}
0.000157 (0.000222) with: {'dropout_rate': 0.2, 'hidden_units_RNN': 128, 'hidden_units_dense': 128

In [38]:
mylist = [['mean_test_score', 'std_test_score', 'params']]
for mean, stdev, param in zip(means, stds, params):
    mylist.append([mean, stdev, param])
    
with open('Grid_Search_Result.csv', 'w') as myfile:
    #wr = csv.writer(myfile, dialect='excel')
    for row in mylist:
        for column in row:
            myfile.write('%s;' % column)
        myfile.write('\n')
    myfile.close()
print('Done Saving')

Done Saving


Prediction using train and test data

In [None]:
predictions_train = gen.model.predict(gen.X_train)
predictions_test = gen.model.predict(gen.X_test)

Plot the test prediction

In [None]:
time_interval = [100:1000]

plt.figure(figsize=(12,6))
fig1 = plt.subplot()
x1 = range(len(predictions_train))
fig1.plot(x1[time_interval], gen.Y_train[time_interval],  label='true',  marker='o',linewidth=0.3, markersize=4)
fig1.plot(x1[time_interval], predictions_train[time_interval],  label='pred',  marker='o',linewidth=0.3, markersize=4)
fig1.legend();
fig1.set_ylabel('Power[W]')
fig1.set_xlabel('Time')
fig1.set_title('Train data prediction');

plt.figure(figsize=(12,6))
fig2 = plt.subplot()
x2 = range(len(predictions_test))
fig2.plot(x2, gen.Y_test,  label='true',  marker='o',linewidth=0.3, markersize=4)
fig2.plot(x2, predictions_test,  label='pred',  marker='o',linewidth=0.3, markersize=4)
fig2.legend();
fig2.set_ylabel('Power[W]')
fig2.set_xlabel('Time')
fig2.set_title('Test data prediction');

## Final training 

In [None]:
# Setup for final training
path_to_data_mapp = 'Pickled_data'
gen_final = Generator()
seqLleng = 25 #12ms/leng unit
n_test_ratio = 0
num_of_test_paths = 1
gen_final.set_model_to_final_model()

joint_angle_data_path_list = []
power_data_path_list = []

for name in os.listdir(folder_path):
    path = os.path.join(folder_path, name)
    if "_auginput.pickle" in name:
        joint_angle_data_path_list.append(name)
    if "_poweroutput.pickle" in name:
        power_data_path_list.append(name)

match_list_vector = []
for filename in joint_angle_data_path_list:
    name = filename[:-len("_auginput.pickle")]
    num = 0
    for filename in power_data_path_list:
        if name in filename:
            match_list_vector.append(num)
            continue
        num = num + 1

In [None]:
# The training
for n in range(len(joint_angle_data_path_list)-num_of_test_paths):
    #joint_angle_data.pickle
    with open(path_to_data_mapp + joint_angle_data_path_list(n), 'rb') as file:
        joint_angle_data = pickle.load(file)
        file.close()

    #power_data.pickle
    with open(path_to_data_mapp + power_data_path_list(match_list_vector(n)), 'rb') as file:
        power_data = pickle.load(file)
        file.close()
    
    gen_final.set_up_data(seqLleng, joint_angle_data, power_data, n_test_ratio)
    name = 'Final_Model'
    gen_final.train_model(name = name)
    

Plot Resulting figures

In [None]:
predictions_test = gen.model.predict(gen.X_test)
time_interval = [100:1000]

plt.figure(figsize=(12,6))
fig = plt.subplot()
x = range(len(predictions_test))
fig.plot(x[time_interval], gen.Y_test[time_interval],  label='true',  marker='o',linewidth=0.3, markersize=4)
fig.plot(x[time_interval], predictions_test[time_interval],  label='pred',  marker='o',linewidth=0.3, markersize=4)
fig.legend();
fig.set_ylabel('Power [W]')
fig.set_xlabel('Time [s]')
fig.set_title('Test data prediction');