In [1]:
#load relevant libraries
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import choice, uniform
import tensorflow as tf
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM, SimpleRNN, Dropout, GaussianNoise, Activation
from keras.optimizers import Adam, SGD
from keras.utils import to_categorical 
from matplotlib import pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, KFold, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder, LabelBinarizer, MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix
from keras.wrappers.scikit_learn import KerasClassifier

Using TensorFlow backend.


In [2]:
#data function for reading and processing the train and test sets
#necessary as an input for the optimisation algorithm
def data():
    #define input processing function
    def process_attributes(df, train, test):
        
        #define and fit the scaler to the full dataset
        cs = MinMaxScaler()
        cs.fit(df_inputs.select_dtypes(np.number))
        
        #scale the numerical input variables
        trainContinuous = cs.transform(train.select_dtypes(np.number))
        testContinuous = cs.transform(test.select_dtypes(np.number))
        
        #uncomment the code below to accommodate for any categorical columns
        #zipBinarizer = LabelBinarizer().fit(df["Cat"])
        #trainCategorical = zipBinarizer.transform(train["Cat"])
        #testCategorical = zipBinarizer.transform(test["Cat"])
        
        # construct our training and testing data points by concatenating
        # the categorical features with the continuous features
        #trainX = np.hstack([trainCategorical, trainContinuous])
        #testX = np.hstack([testCategorical, testContinuous])
        
        #return the processed train and test sets
        trainX=trainContinuous
        testX=testContinuous
        
        # return the concatenated training and testing data
        return (trainX, testX)
    
    #read the excel datasets
    df = pd.read_excel('Colorectal Generated Data_New.xlsx')
    df.set_index('Name',inplace=True)
    df_inputs = df.drop('Marker', axis=1)

    train_inputs = pd.read_excel('Train_test_split.xlsx', sheet_name='X_train')
    train_inputs.set_index('Name',inplace=True)

    train_outputs = pd.read_excel('Train_test_split.xlsx', sheet_name='Y_train')
    train_outputs.set_index('Name',inplace=True)

    test_inputs = pd.read_excel('Train_test_split.xlsx', sheet_name='X_test')
    test_inputs.set_index('Name',inplace=True)

    test_outputs = pd.read_excel('Train_test_split.xlsx', sheet_name='Y_test')
    test_outputs.set_index('Name',inplace=True)
    
    #process the input sets
    (X_train, X_test) = process_attributes(df_inputs, train_inputs, test_inputs)
    
    #encode the categorical output variables
    Y_train = to_categorical(train_outputs)
    Y_test = to_categorical(test_outputs)
    return X_train, Y_train, X_test, Y_test

In [3]:
#define model and search space for the optimisation algorithm
def model(X_train, Y_train, X_test, Y_test):
    
    #define ANN model and search space
    def ANN():
        
        #define first two layers, possible alternatives for neurons in each,
        #activation function, and dropout layers
        model=Sequential()
        model.add(Dense({{choice([8,16, 24, 32])}}))
        model.add(Activation({{choice(['relu', 'sigmoid', 'tanh'])}}))

        model.add(Dropout({{uniform(0, 0.3)}}))

        model.add(Dense({{choice([8,16, 24, 32])}}))
        model.add(Activation({{choice(['relu', 'sigmoid', 'tanh'])}}))

        model.add(Dropout({{uniform(0, 0.3)}}))
        
        #add a choice for an additional layer, with relevant sub-choices

        conditional = {{choice(['two', 'three'])}}
        if conditional == 'three':
            model.add(Dense({{choice([8,16, 24, 32])}}))
            model.add(Activation({{choice(['relu', 'sigmoid', 'tanh'])}}))
            model.add(Dropout({{uniform(0, 0.3)}}))
        #define output layer of the model
        model.add(Dense(2))
        model.add(Activation('softmax'))
        
        #define optimisation algorithm for network training
        optim=tf.keras.optimizers.Adam(learning_rate={{choice([0.005, 0.001, 0.0001])}})
        
        #compile model and return it
        model.compile(loss='categorical_crossentropy', metrics=['accuracy'],optimizer=optim)
        
        return model2 
    #encode and transform labels for model training
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(np.argmax(Y_train,axis=1))
    
    #call the ANN and ddefine training epochs; define batch size alternatives
    net = KerasClassifier(build_fn = ANN,
                                 epochs=50,
                                 batch_size= {{choice([32,64])}},
                                 verbose = 0)
    model = ANN()
    
    #set up cross-validation scoring, and returned variables
    c = cross_val_score(net,
                    X_train, y,
                    cv= StratifiedKFold(n_splits=5, shuffle=True),
                    scoring='f1').mean()
    print('Test accuracy:', c)
    return {'loss': -c, 'status': STATUS_OK, 'model': model}

In [4]:
#call in data function for test evaluation later
X_train, Y_train, X_test, Y_test = data()

In [None]:
#call the optimisation algorithm
best_run, best_model = optim.minimize(model,
                                      data=data,
                                      algo=tpe.suggest,
                                      max_evals=500,
                                      trials=Trials(),
                                      notebook_name='ANN kfold optimiser')

>>> Imports:
#coding=utf-8

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import choice, uniform
except:
    pass

try:
    import tensorflow as tf
except:
    pass

try:
    import pandas as pd
except:
    pass

try:
    from keras.models import Sequential
except:
    pass

try:
    from keras.layers import Dense, LSTM, SimpleRNN, Dropout, GaussianNoise, Activation
except:
    pass

try:
    from keras.optimizers import Adam, SGD
except:
    pass

try:
    from keras.utils import to_categorical
except:
    pass

try:
    from matplotlib import pyplot as plt
except:
    pass

try:
    import numpy as np
except:
    pass

try:
    from sklearn.model_selection import train_test_split, cross_val_score, KFold, StratifiedKFold
except:
    pass

try:
    from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder, LabelBinarizer, MinMaxScaler
except:
    pass

t

0.7613667675280068                                                                   
Test accuracy:                                                                       
0.768926477125924                                                                    
Test accuracy:                                                                       
0.7541562291914363                                                                   
Test accuracy:                                                                       
0.7709112887301838                                                                   
Test accuracy:                                                                       
0.6398900785366222                                                                   
Test accuracy:                                                                       
0.7564737985291857                                                                   
Test accuracy:                                        

Test accuracy:                                                                        
0.7630519441985                                                                       
Test accuracy:                                                                        
0.7686586290744273                                                                    
Test accuracy:                                                                        
0.7796268194596306                                                                    
Test accuracy:                                                                        
0.7750764701055619                                                                    
Test accuracy:                                                                        
0.7720676313292067                                                                    
Test accuracy:                                                                        
0.7713747371596088                         

In [None]:
#display the parameters for the best performing model
print(best_run)

In [None]:
#train the model to gt learning curves
batch=[32,64]
history = best_model.fit(
    X_train, Y_train,
    batch_size=batch[best_run['batch_size']],
    epochs=100,
    verbose=1,
    validation_data=(X_test, Y_test),
    shuffle=False)

In [None]:
#Calculate the labels for the test set
predictions = best_model.predict(X_test)

In [None]:
#evaluate test performance of the model
print(confusion_matrix(np.argmax(Y_test, axis=1), np.argmax(predictions, axis=1)))
print(classification_report(np.argmax(Y_test, axis=1), np.argmax(predictions, axis=1)))

In [None]:
history_dict = history.history
print(history_dict.keys())

In [None]:
# Visualize history
# Plot history: Loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss value')
plt.xlabel('Epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

In [None]:
# Plot history: Accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Validation accuracy history')
plt.ylabel('Accuracy value (%)')
plt.xlabel('No. epoch')
plt.legend(['train', 'test'], loc='lower right')
plt.show()