In [None]:
!pip install bayesian-optimization

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bayesian-optimization
  Downloading bayesian_optimization-1.3.1-py3-none-any.whl (16 kB)
Installing collected packages: bayesian-optimization
Successfully installed bayesian-optimization-1.3.1


In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

from collections import Counter
from imblearn.over_sampling import RandomOverSampler

from bayes_opt import BayesianOptimization

import keras
import tensorflow
from keras import layers
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Embedding, concatenate, Lambda, Bidirectional, LSTM, LeakyReLU
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
from keras import metrics
from keras.losses import BinaryCrossentropy
from keras.wrappers.scikit_learn import KerasClassifier

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import linear_model, model_selection

In [7]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib

from matplotlib import pyplot as plt

from sklearn import model_selection
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, roc_auc_score
from sklearn.tree import DecisionTreeClassifier

from imblearn.over_sampling import SMOTE 
from imblearn.under_sampling import EditedNearestNeighbours
from imblearn.combine import SMOTEENN
from collections import Counter

In [10]:
from imblearn.over_sampling import RandomOverSampler

In [2]:
X_train = pd.read_csv('../datasets/word2vec/train_data_imputed_FINAL.csv')
X_test = pd.read_csv('../datasets/word2vec/test_data_imputed_FINAL.csv')
y_train = pd.read_csv('../datasets/word2vec/y_train_FINAL.csv').iloc[:,1:]
y_test = pd.read_csv('../datasets/word2vec/y_test_FINAL.csv').iloc[:,1:]

In [17]:
print(f'X_train: {X_train.shape}')
print(f'y_train: {y_train.shape}')
print(f'X_test: {X_test.shape}')
print(f'y_test: {y_test.shape}')

X_train: (11516, 263)
y_train: (11516, 1)
X_test: (2880, 1, 263)
y_test: (2880, 1)


## Oversampling 

In [8]:
original_counter = Counter(y_train.loc[:,'fraudulent'])
print("Before oversampling:", original_counter)

Before oversampling: Counter({0: 11293, 1: 223})


In [11]:
oversampler = RandomOverSampler(sampling_strategy = 1, random_state=42)
X_train_over, y_train_over = oversampler.fit_resample(X_train, y_train)

In [12]:
after_counter = Counter(y_train_over.loc[:,'fraudulent'])
print("After oversampling:", after_counter)

After oversampling: Counter({0: 11293, 1: 11293})


In [13]:
X_train_over = X_train_over.values
X_train_over = X_train_over.reshape(X_train_over.shape[0], 1, X_train_over.shape[1])

In [14]:
X_test = X_test.values
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

# Basic NN with Bidirectional LSTM

In [None]:
# for reproducibility 
np.random.seed(42)
tensorflow.random.set_seed(42)

model1 = Sequential()
model1.add(Bidirectional(LSTM(64, return_sequences=False))) 
model1.add(Dropout(0.2))
model1.add(Dense(1, activation='sigmoid'))

model1.compile(loss='binary_crossentropy', optimizer='adam', metrics = ['accuracy',metrics.Precision(), metrics.Recall(),metrics.AUC()])


In [None]:
model1.fit(X_train_over, y_train_over, epochs=20, batch_size = 64)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f26effa3bd0>

In [None]:
print(model1.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 128)              167936    
 l)                                                              
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 168,065
Trainable params: 168,065
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
train_results = model1.evaluate(X_train_over, y_train_over)
precision = train_results[2]
recall = train_results[3]
f1_score = 2 * (precision * recall) / (precision + recall)
print('Train Results')
print(f'Precision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}\nAccuracy: {train_results[1]}\nAUROC:{train_results[4]}')

Train Results
Precision: 0.9991152882575989
Recall: 1.0
F1 Score: 0.9995574483634847
Accuracy: 0.9995572566986084
AUROC:1.0000001192092896


In [None]:
test_results = model1.evaluate(X_test, y_test)
precision = test_results[2]
recall = test_results[3]
f1_score = 2 * (precision * recall) / (precision + recall)
print('Test Results')
print(f'Precision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}\nAccuracy: {test_results[1]}\nAUROC:{test_results[4]}')

Test Results
Precision: 0.6901408433914185
Recall: 0.875
F1 Score: 0.7716535422575659
Accuracy: 0.9899305701255798
AUROC:0.9782476425170898


Add hidden layers

In [None]:
# for reproducibility 
np.random.seed(42)
tensorflow.random.set_seed(42)

model2 = Sequential()
model2.add(Bidirectional(LSTM(64, return_sequences=False))) #input_shape = (batch_size (1st dimension -> num_rows --> excluded), input_size (2nd dimension -> cols),input_dim (2-1))
model2.add(Dropout(0.2))
model2.add(Dense(16, activation='relu'))
model2.add(Dropout(0.2))
model2.add(Dense(8, activation='relu'))
model2.add(Dense(1, activation='sigmoid'))

model2.compile(loss='binary_crossentropy', optimizer='adam', metrics = ['accuracy',metrics.Precision(), metrics.Recall(),metrics.AUC()])

In [None]:
model2.fit(X_train_over, y_train_over, epochs=20, batch_size = 64)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f26e8146dd0>

In [None]:
print(model2.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_1 (Bidirectio  (None, 128)              167936    
 nal)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 16)                2064      
                                                                 
 dropout_2 (Dropout)         (None, 16)                0         
                                                                 
 dense_2 (Dense)             (None, 8)                 136       
                                                                 
 dense_3 (Dense)             (None, 1)                 9         
                                                      

In [None]:
train_results = model2.evaluate(X_train_over, y_train_over)
precision = train_results[2]
recall = train_results[3]
f1_score = 2 * (precision * recall) / (precision + recall)
print('Train Results')
print(f'Precision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}\nAccuracy: {train_results[1]}\nAUROC:{train_results[4]}')

Train Results
Precision: 0.9992036819458008
Recall: 1.0
F1 Score: 0.999601682379144
Accuracy: 0.9996015429496765
AUROC:1.0


In [None]:
test_results = model2.evaluate(X_test, y_test)
precision = test_results[2]
recall = test_results[3]
f1_score = 2 * (precision * recall) / (precision + recall)
print('Test Results')
print(f'Precision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}\nAccuracy: {test_results[1]}\nAUROC:{test_results[4]}')

Test Results
Precision: 0.7076923251152039
Recall: 0.8214285969734192
F1 Score: 0.7603305995110141
Accuracy: 0.9899305701255798
AUROC:0.943466067314148


Add more hidden layers

In [None]:
# for reproducibility 
np.random.seed(42)
tensorflow.random.set_seed(42)

model3 = Sequential()
model3.add(Bidirectional(LSTM(64, return_sequences=False))) 
model3.add(Dense(64, activation='relu'))
model3.add(Dropout(0.2))
model3.add(Dense(32, activation='relu'))
model3.add(Dropout(0.2))
model3.add(Dense(16, activation='relu'))
model3.add(Dense(8, activation='relu'))
model3.add(Dense(1, activation='sigmoid'))

model3.compile(loss='binary_crossentropy', optimizer='adam', metrics = ['accuracy',metrics.Precision(), metrics.Recall(),metrics.AUC()])

In [None]:
model3.fit(X_train_over, y_train_over, epochs=20, batch_size = 64)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f26e9748ed0>

In [None]:
model3.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_2 (Bidirectio  (None, 128)              167936    
 nal)                                                            
                                                                 
 dense_4 (Dense)             (None, 64)                8256      
                                                                 
 dropout_3 (Dropout)         (None, 64)                0         
                                                                 
 dense_5 (Dense)             (None, 32)                2080      
                                                                 
 dropout_4 (Dropout)         (None, 32)                0         
                                                                 
 dense_6 (Dense)             (None, 16)                528       
                                                      

In [None]:
train_results = model3.evaluate(X_train_over, y_train_over)
precision = train_results[2]
recall = train_results[3]
f1_score = 2 * (precision * recall) / (precision + recall)
print('Train Results')
print(f'Precision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}\nAccuracy: {train_results[1]}\nAUROC:{train_results[4]}')

Train Results
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Accuracy: 1.0
AUROC:1.0


In [None]:
test_results = model3.evaluate(X_test, y_test)
precision = test_results[2]
recall = test_results[3]
f1_score = 2 * (precision * recall) / (precision + recall)
print('Test Results')
print(f'Precision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}\nAccuracy: {test_results[1]}\nAUROC:{test_results[4]}')

Test Results
Precision: 0.7843137383460999
Recall: 0.7142857313156128
F1 Score: 0.747663566572446
Accuracy: 0.9906250238418579
AUROC:0.8737004995346069


#Hyperparameter tuning (Round 1)

Best test F1 score: 0.826

In [None]:
def bidirectional_lstm_tuner(batch_size, neurons, activation, optimizer, dropout, dropout_rate, learning_rate, epochs, layers1):

    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax']
    optimizerD= {'Adam':Adam(learning_rate=learning_rate), 'SGD':SGD(learning_rate=learning_rate),
                  'RMSprop':RMSprop(learning_rate=learning_rate), 'Adadelta':Adadelta(learning_rate=learning_rate),
                  'Adagrad':Adagrad(learning_rate=learning_rate), 'Adamax':Adamax(learning_rate=learning_rate),}
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'exponential', 'relu']
    neurons = round(neurons)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    epochs = round(epochs)
    layers1 = round(layers1)

    def create_model():
      # for reproducibility 
      np.random.seed(42)
      tensorflow.random.set_seed(42)

      tuned_model = Sequential()
      tuned_model.add(Bidirectional(LSTM(neurons, return_sequences=False)))  
      tuned_model.add(Dropout(dropout_rate, seed=42))

      for i in range(layers1): #varying first set of hidden layers
        tuned_model.add(Dense(neurons, activation=activation))
        if dropout > 0.5:
          tuned_model.add(Dropout(dropout_rate, seed=42))

      tuned_model.add(Dense(1, activation='sigmoid')) #output layer
      # tuned_model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics=['accuracy',metrics.Precision(), metrics.Recall(),metrics.AUC()])
      tuned_model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics=['accuracy',metrics.Precision(), metrics.Recall()])
      return tuned_model

    es = EarlyStopping(monitor="loss", patience=3)
    mc = ModelCheckpoint('Best_tuned_bidirectional_LSTM_.h5', monitor='val_loss',mode='min',verbose=2, save_best_only=True)

    tune_rnn = KerasClassifier(build_fn=create_model, epochs=epochs, batch_size=batch_size, verbose=0) 
    score = cross_val_score(tune_rnn, X_train_over, y_train_over, fit_params={'callbacks':[es]}, error_score='raise', scoring='f1').mean() #5 fold cross validation default
    return score

In [None]:
params_bidirectional_lstm ={
    'neurons': (10, 64),
    'activation':(0, 8),
    'optimizer':(0,5),
    'learning_rate':(0.0001, 0.001),
    'batch_size':(32, 128),
    'epochs':(15, 30),
    'layers1':(0,3),
    'dropout':(0, 1),
    'dropout_rate':(0, 0.5)
}

In [None]:
# Run Bayesian Optimization
bidirectional_lstm_bo = BayesianOptimization(bidirectional_lstm_tuner, params_bidirectional_lstm, random_state=42)
bidirectional_lstm_bo.maximize(init_points=20, n_iter=5)

|   iter    |  target   | activa... | batch_... |  dropout  | dropou... |  epochs   |  layers1  | learni... |  neurons  | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------




| [0m1        [0m | [0m0.1172   [0m | [0m2.996    [0m | [0m123.3    [0m | [0m0.732    [0m | [0m0.2993   [0m | [0m17.34    [0m | [0m0.468    [0m | [0m0.0001523[0m | [0m56.77    [0m | [0m3.006    [0m |




| [95m2        [0m | [95m0.1571   [0m | [95m5.665    [0m | [95m33.98    [0m | [95m0.9699   [0m | [95m0.4162   [0m | [95m18.19    [0m | [95m0.5455   [0m | [95m0.0002651[0m | [95m26.43    [0m | [95m2.624    [0m |




| [95m3        [0m | [95m0.9641   [0m | [95m3.456    [0m | [95m59.96    [0m | [95m0.6119   [0m | [95m0.06975  [0m | [95m19.38    [0m | [95m1.099    [0m | [95m0.0005105[0m | [95m52.4     [0m | [95m0.9984   [0m |




| [0m4        [0m | [0m0.2691   [0m | [0m4.114    [0m | [0m88.87    [0m | [0m0.04645  [0m | [0m0.3038   [0m | [0m17.56    [0m | [0m0.1952   [0m | [0m0.000954 [0m | [0m62.14    [0m | [0m4.042    [0m |




| [0m5        [0m | [0m0.8549   [0m | [0m2.437    [0m | [0m41.38    [0m | [0m0.6842   [0m | [0m0.2201   [0m | [0m16.83    [0m | [0m1.486    [0m | [0m0.0001309[0m | [0m59.1     [0m | [0m1.294    [0m |




| [0m6        [0m | [0m0.5467   [0m | [0m5.3      [0m | [0m61.92    [0m | [0m0.5201   [0m | [0m0.2734   [0m | [0m17.77    [0m | [0m2.909    [0m | [0m0.0007976[0m | [0m60.73    [0m | [0m4.474    [0m |




| [0m7        [0m | [0m0.1604   [0m | [0m4.783    [0m | [0m120.5    [0m | [0m0.08849  [0m | [0m0.09799  [0m | [0m15.68    [0m | [0m0.976    [0m | [0m0.0004498[0m | [0m24.65    [0m | [0m4.144    [0m |




| [95m8        [0m | [95m0.9644   [0m | [95m2.854    [0m | [95m58.97    [0m | [95m0.5427   [0m | [95m0.07046  [0m | [95m27.03    [0m | [95m0.2237   [0m | [95m0.0009882[0m | [95m51.7     [0m | [95m0.9936   [0m |




| [0m9        [0m | [0m0.1486   [0m | [0m0.04418  [0m | [0m110.3    [0m | [0m0.7069   [0m | [0m0.3645   [0m | [0m26.57    [0m | [0m0.2221   [0m | [0m0.0004226[0m | [0m16.26    [0m | [0m4.316    [0m |




| [0m10       [0m | [0m0.9546   [0m | [0m4.986    [0m | [0m63.77    [0m | [0m0.06356  [0m | [0m0.1555   [0m | [0m19.88    [0m | [0m2.189    [0m | [0m0.0006738[0m | [0m57.91    [0m | [0m2.361    [0m |




| [0m11       [0m | [0m0.1458   [0m | [0m0.9568   [0m | [0m100.5    [0m | [0m0.7608   [0m | [0m0.2806   [0m | [0m26.56    [0m | [0m1.481    [0m | [0m0.0005705[0m | [0m33.09    [0m | [0m0.1271   [0m |




| [0m12       [0m | [0m0.1416   [0m | [0m0.8631   [0m | [0m35.02    [0m | [0m0.6364   [0m | [0m0.1572   [0m | [0m22.63    [0m | [0m2.723    [0m | [0m0.0003244[0m | [0m32.16    [0m | [0m3.778    [0m |




| [0m13       [0m | [0m0.4785   [0m | [0m1.83     [0m | [0m39.39    [0m | [0m0.2898   [0m | [0m0.08061  [0m | [0m28.95    [0m | [0m2.424    [0m | [0m0.0006701[0m | [0m57.06    [0m | [0m4.018    [0m |




| [0m14       [0m | [0m0.7447   [0m | [0m1.493    [0m | [0m117.7    [0m | [0m0.5393   [0m | [0m0.4037   [0m | [0m28.44    [0m | [0m0.954    [0m | [0m0.000199 [0m | [0m22.31    [0m | [0m2.136    [0m |




| [0m15       [0m | [0m0.6926   [0m | [0m6.544    [0m | [0m114.6    [0m | [0m0.006952 [0m | [0m0.2554   [0m | [0m21.26    [0m | [0m0.6663   [0m | [0m0.0002079[0m | [0m28.23    [0m | [0m4.715    [0m |




| [0m16       [0m | [0m0.9047   [0m | [0m2.586    [0m | [0m81.8     [0m | [0m0.703    [0m | [0m0.1818   [0m | [0m29.58    [0m | [0m2.887    [0m | [0m0.0003266[0m | [0m36.85    [0m | [0m1.504    [0m |




| [0m17       [0m | [0m0.9062   [0m | [0m2.279    [0m | [0m35.54    [0m | [0m0.6096   [0m | [0m0.2513   [0m | [0m15.77    [0m | [0m0.8359   [0m | [0m0.0009174[0m | [0m22.94    [0m | [0m0.7245   [0m |




| [0m18       [0m | [0m0.1133   [0m | [0m3.916    [0m | [0m126.6    [0m | [0m0.2421   [0m | [0m0.3361   [0m | [0m26.42    [0m | [0m0.7129   [0m | [0m0.0007554[0m | [0m29.86    [0m | [0m3.162    [0m |




| [0m19       [0m | [0m0.02015  [0m | [0m5.068    [0m | [0m83.43    [0m | [0m0.09029  [0m | [0m0.4177   [0m | [0m19.81    [0m | [0m0.5596   [0m | [0m0.0001367[0m | [0m41.91    [0m | [0m3.388    [0m |




| [0m20       [0m | [0m0.9485   [0m | [0m0.1327   [0m | [0m81.16    [0m | [0m0.2265   [0m | [0m0.3226   [0m | [0m17.62    [0m | [0m2.073    [0m | [0m0.0004481[0m | [0m60.58    [0m | [0m0.6876   [0m |




| [0m21       [0m | [0m0.9418   [0m | [0m3.785    [0m | [0m59.48    [0m | [0m0.8941   [0m | [0m0.09065  [0m | [0m21.88    [0m | [0m1.764    [0m | [0m0.0005626[0m | [0m54.83    [0m | [0m1.157    [0m |




| [0m22       [0m | [0m0.9495   [0m | [0m3.867    [0m | [0m63.91    [0m | [0m0.0      [0m | [0m0.1062   [0m | [0m23.06    [0m | [0m0.2945   [0m | [0m0.0009811[0m | [0m53.14    [0m | [0m1.769    [0m |




| [0m23       [0m | [0m0.1451   [0m | [0m0.0      [0m | [0m64.07    [0m | [0m0.0      [0m | [0m0.0      [0m | [0m20.19    [0m | [0m3.0      [0m | [0m0.0001   [0m | [0m55.11    [0m | [0m0.0      [0m |




| [0m24       [0m | [0m0.945    [0m | [0m5.168    [0m | [0m60.06    [0m | [0m0.2805   [0m | [0m0.1314   [0m | [0m23.17    [0m | [0m0.0      [0m | [0m0.001    [0m | [0m51.62    [0m | [0m2.04     [0m |




| [0m25       [0m | [0m0.9319   [0m | [0m7.126    [0m | [0m62.24    [0m | [0m0.5395   [0m | [0m0.1362   [0m | [0m20.24    [0m | [0m0.0      [0m | [0m0.001    [0m | [0m54.8     [0m | [0m1.86     [0m |


In [None]:
bidirectional_lstm_bo.max['params']

{'activation': 2.8540266135487142,
 'batch_size': 58.96971292998855,
 'dropout': 0.5426960831582485,
 'dropout_rate': 0.07046211248738132,
 'epochs': 27.032954711310595,
 'layers1': 0.22365193103931247,
 'learning_rate': 0.0009881982429404655,
 'neurons': 51.7012175420195,
 'optimizer': 0.993578407670862}

In [None]:
def create_model_best_params(params):
  learning_rate = params['learning_rate']
  optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax']
  optimizerD= {'Adam':Adam(learning_rate=learning_rate), 'SGD':SGD(learning_rate=learning_rate),
                'RMSprop':RMSprop(learning_rate=learning_rate), 'Adadelta':Adadelta(learning_rate=learning_rate),
                'Adagrad':Adagrad(learning_rate=learning_rate), 'Adamax':Adamax(learning_rate=learning_rate),}
  activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'exponential', 'relu']
  neurons = round(params['neurons'])
  activation = activationL[round(params['activation'])]
  optimizer = optimizerD[optimizerL[round(params['optimizer'])]]
  batch_size = round(params['batch_size'])
  epochs = round(params['epochs'])
  layers1 = round(params['layers1'])
  dropout = params['dropout']
  dropout_rate = params['dropout_rate']

  # for reproducibility 
  np.random.seed(42)
  tensorflow.random.set_seed(42)
  tuned_model = Sequential()
  tuned_model.add(Bidirectional(LSTM(neurons, return_sequences=False)))  
  tuned_model.add(Dropout(dropout_rate, seed=42))

  for i in range(layers1): #varying first set of hidden layers
    tuned_model.add(Dense(neurons, activation=activation))
    if dropout > 0.5:
      tuned_model.add(Dropout(dropout_rate, seed=42))

  tuned_model.add(Dense(1, activation='sigmoid')) #output layer
  tuned_model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics=['accuracy',metrics.Precision(), metrics.Recall(),metrics.AUC()])
  return tuned_model

In [None]:
best_params = {'activation': 2.8540266135487142,
              'batch_size': 58.96971292998855,
              'dropout': 0.5426960831582485,
              'dropout_rate': 0.07046211248738132,
              'epochs': 27.032954711310595,
              'layers1': 0.22365193103931247,
              'learning_rate': 0.0009881982429404655,
              'neurons': 51.7012175420195,
              'optimizer': 0.993578407670862}
best_model = create_model_best_params(best_params)
best_model.fit(X_train_over, y_train_over, epochs=round(best_params['epochs']), batch_size = round(best_params['batch_size']))

Epoch 1/27
Epoch 2/27
Epoch 3/27
Epoch 4/27
Epoch 5/27
Epoch 6/27
Epoch 7/27
Epoch 8/27
Epoch 9/27
Epoch 10/27
Epoch 11/27
Epoch 12/27
Epoch 13/27
Epoch 14/27
Epoch 15/27
Epoch 16/27
Epoch 17/27
Epoch 18/27
Epoch 19/27
Epoch 20/27
Epoch 21/27
Epoch 22/27
Epoch 23/27
Epoch 24/27
Epoch 25/27
Epoch 26/27
Epoch 27/27


<keras.callbacks.History at 0x7f07bee45e50>

In [None]:
best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional (Bidirectiona  (None, 104)              131456    
 l)                                                              
                                                                 
 dropout (Dropout)           (None, 104)               0         
                                                                 
 dense (Dense)               (None, 1)                 105       
                                                                 
Total params: 131,561
Trainable params: 131,561
Non-trainable params: 0
_________________________________________________________________


In [None]:
train_results = best_model.evaluate(X_train_over, y_train_over)
precision = train_results[2]
recall = train_results[3]
f1_score = 2 * (precision * recall) / (precision + recall)
print('Train Results')
print(f'Precision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}\nAccuracy: {train_results[1]}\nAUROC:{train_results[4]}')

Train Results
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Accuracy: 1.0
AUROC:1.0


In [None]:
test_results = best_model.evaluate(X_test, y_test)
precision = test_results[2]
recall = test_results[3]
f1_score = 2 * (precision * recall) / (precision + recall)
print('Test Results')
print(f'Precision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}\nAccuracy: {test_results[1]}\nAUROC:{test_results[4]}')

Test Results
Precision: 0.849056601524353
Recall: 0.8035714030265808
F1 Score: 0.8256880588457645
Accuracy: 0.9934027791023254
AUROC:0.9531503915786743


#Hyperparameter tuning (Round 2)

Best test F1 score: 0.739

In [None]:
def bidirectional_lstm_tuner(batch_size, neurons, activation, optimizer, dropout_rate, learning_rate, epochs, layers1):
    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad']
    optimizerD= {'Adam':Adam(learning_rate=learning_rate), 'SGD':SGD(learning_rate=learning_rate),
                  'RMSprop':RMSprop(learning_rate=learning_rate), 'Adadelta':Adadelta(learning_rate=learning_rate),
                  'Adagrad':Adagrad(learning_rate=learning_rate),}
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'relu']
    neurons = round(neurons)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    epochs = round(epochs)
    layers1 = round(layers1)

    def create_model():
      # for reproducibility 
      np.random.seed(42)
      tensorflow.random.set_seed(42)

      tuned_model = Sequential()
      tuned_model.add(Bidirectional(LSTM(neurons, return_sequences=False)))  
      tuned_model.add(Dropout(dropout_rate, seed=42))

      for i in range(layers1): #varying first set of hidden layers
        tuned_model.add(Dense(neurons, activation=activation))
        tuned_model.add(Dropout(dropout_rate, seed=42))

      tuned_model.add(Dense(1, activation='sigmoid')) #output layer
      # tuned_model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics=['accuracy',metrics.Precision(), metrics.Recall(),metrics.AUC()])
      tuned_model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics=['accuracy',metrics.Precision(), metrics.Recall()])
      return tuned_model

    es = EarlyStopping(monitor="loss", patience=3)
    mc = ModelCheckpoint('Best_tuned_bidirectional_LSTM_.h5', monitor='val_loss',mode='min',verbose=2, save_best_only=True)

    tune_rnn = KerasClassifier(build_fn=create_model, epochs=epochs, batch_size=batch_size, verbose=0) 
    score = cross_val_score(tune_rnn, X_train_over, y_train_over, fit_params={'callbacks':[es]}, error_score='raise', scoring='f1').mean() #5 fold cross validation default
    return score

In [None]:
params_bidirectional_lstm ={
    'neurons': (10, 64),
    'activation':(0, 7),
    'optimizer':(0, 4),
    'learning_rate':(0.0001, 0.001),
    'batch_size':(32, 128),
    'epochs':(15, 30),
    'layers1':(0,3),
    'dropout_rate':(0, 0.4)
}

In [None]:
# Run Bayesian Optimization
bidirectional_lstm_bo = BayesianOptimization(bidirectional_lstm_tuner, params_bidirectional_lstm, random_state=42)
bidirectional_lstm_bo.maximize(init_points=20, n_iter=5)

|   iter    |  target   | activa... | batch_... | dropou... |  epochs   |  layers1  | learni... |  neurons  | optimizer |
-------------------------------------------------------------------------------------------------------------------------




| [0m1        [0m | [0m0.4619   [0m | [0m2.622    [0m | [0m123.3    [0m | [0m0.2928   [0m | [0m23.98    [0m | [0m0.4681   [0m | [0m0.0002404[0m | [0m13.14    [0m | [0m3.465    [0m |




| [95m2        [0m | [95m0.9354   [0m | [95m4.208    [0m | [95m99.97    [0m | [95m0.008234 [0m | [95m29.55    [0m | [95m2.497    [0m | [95m0.0002911[0m | [95m19.82    [0m | [95m0.7336   [0m |




| [0m3        [0m | [0m0.844    [0m | [0m2.13     [0m | [0m82.38    [0m | [0m0.1728   [0m | [0m19.37    [0m | [0m1.836    [0m | [0m0.0002255[0m | [0m25.78    [0m | [0m1.465    [0m |




| [0m4        [0m | [0m0.8894   [0m | [0m3.192    [0m | [0m107.4    [0m | [0m0.07987  [0m | [0m22.71    [0m | [0m1.777    [0m | [0m0.0001418[0m | [0m42.81    [0m | [0m0.6821   [0m |




| [0m5        [0m | [0m0.8087   [0m | [0m0.4554   [0m | [0m123.1    [0m | [0m0.3863   [0m | [0m27.13    [0m | [0m0.9138   [0m | [0m0.0001879[0m | [0m46.95    [0m | [0m1.761    [0m |




| [95m6        [0m | [95m0.9504   [0m | [95m0.8543   [0m | [95m79.54    [0m | [95m0.01376  [0m | [95m28.64    [0m | [95m0.7763   [0m | [95m0.0006963[0m | [95m26.83    [0m | [95m2.08     [0m |




| [0m7        [0m | [0m0.4407   [0m | [0m3.827    [0m | [0m49.75    [0m | [0m0.3878   [0m | [0m26.63    [0m | [0m2.818    [0m | [0m0.0009053[0m | [0m42.29    [0m | [0m3.687    [0m |




| [95m8        [0m | [95m0.9568   [0m | [95m0.6194   [0m | [95m50.81    [0m | [95m0.01809  [0m | [95m19.88    [0m | [95m1.166    [0m | [95m0.0003442[0m | [95m54.75    [0m | [95m1.427    [0m |




| [0m9        [0m | [0m0.9544   [0m | [0m1.967    [0m | [0m84.1     [0m | [0m0.05637  [0m | [0m27.03    [0m | [0m0.2237   [0m | [0m0.0009882[0m | [0m51.7     [0m | [0m0.7949   [0m |




| [0m10       [0m | [0m0.1479   [0m | [0m0.03865  [0m | [0m110.3    [0m | [0m0.2827   [0m | [0m25.94    [0m | [0m2.314    [0m | [0m0.0001666[0m | [0m29.36    [0m | [0m0.4635   [0m |




| [0m11       [0m | [0m0.03732  [0m | [0m6.042    [0m | [0m91.84    [0m | [0m0.1324   [0m | [0m15.95    [0m | [0m0.9329   [0m | [0m0.0003927[0m | [0m49.4     [0m | [0m2.55     [0m |




| [95m12       [0m | [95m0.9637   [0m | [95m6.21     [0m | [95m77.33    [0m | [95m0.04784  [0m | [95m25.7     [0m | [95m2.282    [0m | [95m0.0006051[0m | [95m51.63    [0m | [95m1.975    [0m |




| [0m13       [0m | [0m0.8621   [0m | [0m3.659    [0m | [0m73.04    [0m | [0m0.01017  [0m | [0m16.62    [0m | [0m0.09429  [0m | [0m0.0006728[0m | [0m26.98    [0m | [0m2.034    [0m |




| [0m14       [0m | [0m0.9036   [0m | [0m6.353    [0m | [0m55.93    [0m | [0m0.1642   [0m | [0m26.33    [0m | [0m0.6864   [0m | [0m0.0001693[0m | [0m25.65    [0m | [0m0.6449   [0m |




| [0m15       [0m | [0m0.8756   [0m | [0m6.508    [0m | [0m109.6    [0m | [0m0.2534   [0m | [0m28.07    [0m | [0m2.411    [0m | [0m0.0002679[0m | [0m58.2     [0m | [0m2.157    [0m |




| [0m16       [0m | [0m0.5334   [0m | [0m5.652    [0m | [0m118.0    [0m | [0m0.1272   [0m | [0m16.65    [0m | [0m0.6838   [0m | [0m0.0004844[0m | [0m54.17    [0m | [0m3.443    [0m |




| [0m17       [0m | [0m0.9019   [0m | [0m0.04866  [0m | [0m81.03    [0m | [0m0.167    [0m | [0m18.33    [0m | [0m0.3596   [0m | [0m0.0004039[0m | [0m60.92    [0m | [0m1.293    [0m |




| [0m18       [0m | [0m0.8825   [0m | [0m3.632    [0m | [0m99.49    [0m | [0m0.1455   [0m | [0m29.58    [0m | [0m2.887    [0m | [0m0.0003266[0m | [0m36.85    [0m | [0m1.204    [0m |




| [0m19       [0m | [0m0.9374   [0m | [0m1.994    [0m | [0m35.54    [0m | [0m0.2438   [0m | [0m22.54    [0m | [0m0.1544   [0m | [0m0.0003508[0m | [0m59.05    [0m | [0m0.9582   [0m |




| [0m20       [0m | [0m0.0      [0m | [0m1.014    [0m | [0m78.99    [0m | [0m0.3943   [0m | [0m18.63    [0m | [0m2.016    [0m | [0m0.0007855[0m | [0m22.83    [0m | [0m2.913    [0m |




| [0m21       [0m | [0m0.0      [0m | [0m2.664    [0m | [0m114.4    [0m | [0m0.3018   [0m | [0m15.28    [0m | [0m0.4995   [0m | [0m0.0005266[0m | [0m45.57    [0m | [0m2.861    [0m |




| [0m22       [0m | [0m0.1445   [0m | [0m3.507    [0m | [0m115.8    [0m | [0m0.3596   [0m | [0m16.19    [0m | [0m2.499    [0m | [0m0.0005793[0m | [0m44.74    [0m | [0m0.4607   [0m |




| [0m23       [0m | [0m0.5147   [0m | [0m4.233    [0m | [0m81.79    [0m | [0m0.1885   [0m | [0m25.23    [0m | [0m1.856    [0m | [0m0.0008313[0m | [0m51.19    [0m | [0m0.4253   [0m |




| [0m24       [0m | [0m0.1943   [0m | [0m6.81     [0m | [0m75.42    [0m | [0m0.2822   [0m | [0m26.81    [0m | [0m0.9503   [0m | [0m0.0007072[0m | [0m52.41    [0m | [0m0.3749   [0m |




| [0m25       [0m | [0m0.1416   [0m | [0m1.831    [0m | [0m110.7    [0m | [0m0.3918   [0m | [0m17.27    [0m | [0m2.53     [0m | [0m0.0007728[0m | [0m45.19    [0m | [0m3.627    [0m |


In [None]:
bidirectional_lstm_bo.max['params']

{'activation': 6.210489198034286,
 'batch_size': 77.33263281554713,
 'dropout_rate': 0.04783769837532068,
 'epochs': 25.698671808344926,
 'layers1': 2.2823551458506923,
 'learning_rate': 0.0006051494778125466,
 'neurons': 51.63222771754629,
 'optimizer': 1.975182385457563}

In [None]:
def create_model_best_params(params):
  learning_rate = params['learning_rate']
  optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad']
  optimizerD= {'Adam':Adam(learning_rate=learning_rate), 'SGD':SGD(learning_rate=learning_rate),
                'RMSprop':RMSprop(learning_rate=learning_rate), 'Adadelta':Adadelta(learning_rate=learning_rate),
                'Adagrad':Adagrad(learning_rate=learning_rate),}
  activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'relu']
  neurons = round(params['neurons'])
  activation = activationL[round(params['activation'])]
  optimizer = optimizerD[optimizerL[round(params['optimizer'])]]
  batch_size = round(params['batch_size'])
  epochs = round(params['epochs'])
  layers1 = round(params['layers1'])
  dropout_rate = params['dropout_rate']

  # for reproducibility 
  np.random.seed(42)
  tensorflow.random.set_seed(42)
  tuned_model = Sequential()
  tuned_model.add(Bidirectional(LSTM(neurons, return_sequences=False)))  
  tuned_model.add(Dropout(dropout_rate, seed=42))

  for i in range(layers1): #varying first set of hidden layers
    tuned_model.add(Dense(neurons, activation=activation))
    tuned_model.add(Dropout(dropout_rate, seed=42))

  tuned_model.add(Dense(1, activation='sigmoid')) #output layer
  tuned_model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics=['accuracy',metrics.Precision(), metrics.Recall(),metrics.AUC()])
  return tuned_model

In [None]:
best_params = {'activation': 6.210489198034286,
              'batch_size': 77.33263281554713,
              'dropout_rate': 0.04783769837532068,
              'epochs': 25.698671808344926,
              'layers1': 2.2823551458506923,
              'learning_rate': 0.0006051494778125466,
              'neurons': 51.63222771754629,
              'optimizer': 1.975182385457563}
best_model2 = create_model_best_params(best_params)
best_model2.fit(X_train_over, y_train_over, epochs=round(best_params['epochs']), batch_size = round(best_params['batch_size']))

Epoch 1/26
Epoch 2/26
Epoch 3/26
Epoch 4/26
Epoch 5/26
Epoch 6/26
Epoch 7/26
Epoch 8/26
Epoch 9/26
Epoch 10/26
Epoch 11/26
Epoch 12/26
Epoch 13/26
Epoch 14/26
Epoch 15/26
Epoch 16/26
Epoch 17/26
Epoch 18/26
Epoch 19/26
Epoch 20/26
Epoch 21/26
Epoch 22/26
Epoch 23/26
Epoch 24/26
Epoch 25/26
Epoch 26/26


<keras.callbacks.History at 0x7f3fedd5dad0>

In [None]:
best_model2.summary()

Model: "sequential_252"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_252 (Bidirect  (None, 104)              131456    
 ional)                                                          
                                                                 
 dropout_453 (Dropout)       (None, 104)               0         
                                                                 
 dense_576 (Dense)           (None, 52)                5460      
                                                                 
 dropout_454 (Dropout)       (None, 52)                0         
                                                                 
 dense_577 (Dense)           (None, 52)                2756      
                                                                 
 dropout_455 (Dropout)       (None, 52)                0         
                                                    

In [None]:
train_results = best_model2.evaluate(X_train_over, y_train_over)
precision = train_results[2]
recall = train_results[3]
f1_score = 2 * (precision * recall) / (precision + recall)
print('Train Results')
print(f'Precision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}\nAccuracy: {train_results[1]}\nAUROC:{train_results[4]}')

Train Results
Precision: 0.9996459484100342
Recall: 1.0
F1 Score: 0.9998229428613363
Accuracy: 0.9998229146003723
AUROC:1.0000001192092896


In [None]:
test_results = best_model2.evaluate(X_test, y_test)
precision = test_results[2]
recall = test_results[3]
f1_score = 2 * (precision * recall) / (precision + recall)
print('Test Results')
print(f'Precision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}\nAccuracy: {test_results[1]}\nAUROC:{test_results[4]}')

Test Results
Precision: 0.6984127163887024
Recall: 0.7857142686843872
F1 Score: 0.7394958008531877
Accuracy: 0.9892361164093018
AUROC:0.9341992735862732


#Hyperparameter tuning (Round 3)

Best test F1 score: 0.712

In [None]:
def bidirectional_lstm_tuner(batch_size, neurons, activation, optimizer, dropout_rate, learning_rate, epochs, layers1, layers2):
    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad']
    optimizerD= {'Adam':Adam(learning_rate=learning_rate), 'SGD':SGD(learning_rate=learning_rate),
                  'RMSprop':RMSprop(learning_rate=learning_rate), 'Adadelta':Adadelta(learning_rate=learning_rate),
                  'Adagrad':Adagrad(learning_rate=learning_rate),}
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'relu']
    neurons = round(neurons)
    activation = activationL[round(activation)]
    optimizer = optimizerD[optimizerL[round(optimizer)]]
    batch_size = round(batch_size)
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)

    def create_model():
      # for reproducibility 
      np.random.seed(42)
      tensorflow.random.set_seed(42)

      tuned_model = Sequential()
      tuned_model.add(Bidirectional(LSTM(neurons, return_sequences=False)))  
      tuned_model.add(Dropout(dropout_rate, seed=42))

      for i in range(layers1): #varying first set of hidden layers
        tuned_model.add(Dense(neurons, activation=activation))
        tuned_model.add(Dropout(dropout_rate, seed=42))

      for i in range(layers2): #varying second set of hidden layers
        tuned_model.add(Dense(neurons, activation=activation))

      tuned_model.add(Dense(1, activation='sigmoid')) #output layer
      # tuned_model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics=['accuracy',metrics.Precision(), metrics.Recall(),metrics.AUC()])
      tuned_model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics=['accuracy',metrics.Precision(), metrics.Recall()])
      return tuned_model

    es = EarlyStopping(monitor="loss", patience=3)
    mc = ModelCheckpoint('Best_tuned_bidirectional_LSTM_.h5', monitor='val_loss',mode='min',verbose=2, save_best_only=True)

    tune_rnn = KerasClassifier(build_fn=create_model, epochs=epochs, batch_size=batch_size, verbose=0) 
    score = cross_val_score(tune_rnn, X_train_over, y_train_over, fit_params={'callbacks':[es]}, error_score='raise', scoring='f1').mean() #5 fold cross validation default
    return score

In [None]:
params_bidirectional_lstm ={
    'neurons': (10, 64),
    'activation':(0, 7),
    'optimizer':(0, 4),
    'learning_rate':(0.0001, 0.001),
    'batch_size':(32, 128),
    'epochs':(15, 30),
    'layers1':(0,3),
    'layers2':(0,3),
    'dropout_rate':(0, 0.4)
}

In [None]:
# Run Bayesian Optimization
bidirectional_lstm_bo = BayesianOptimization(bidirectional_lstm_tuner, params_bidirectional_lstm, random_state=42)
bidirectional_lstm_bo.maximize(init_points=20, n_iter=5)

|   iter    |  target   | activa... | batch_... | dropou... |  epochs   |  layers1  |  layers2  | learni... |  neurons  | optimizer |
-------------------------------------------------------------------------------------------------------------------------------------




| [0m1        [0m | [0m0.7486   [0m | [0m2.622    [0m | [0m123.3    [0m | [0m0.2928   [0m | [0m23.98    [0m | [0m0.4681   [0m | [0m0.468    [0m | [0m0.0001523[0m | [0m56.77    [0m | [0m2.404    [0m |




| [95m2        [0m | [95m0.8683   [0m | [95m4.957    [0m | [95m33.98    [0m | [95m0.388    [0m | [95m27.49    [0m | [95m0.637    [0m | [95m0.5455   [0m | [95m0.0002651[0m | [95m26.43    [0m | [95m2.099    [0m |




| [95m3        [0m | [95m0.9397   [0m | [95m3.024    [0m | [95m59.96    [0m | [95m0.2447   [0m | [95m17.09    [0m | [95m0.8764   [0m | [95m1.099    [0m | [95m0.0005105[0m | [95m52.4     [0m | [95m0.7987   [0m |




| [0m4        [0m | [0m0.1478   [0m | [0m3.6      [0m | [0m88.87    [0m | [0m0.01858  [0m | [0m24.11    [0m | [0m0.5116   [0m | [0m0.1952   [0m | [0m0.000954 [0m | [0m62.14    [0m | [0m3.234    [0m |




| [0m5        [0m | [0m0.8895   [0m | [0m2.132    [0m | [0m41.38    [0m | [0m0.2737   [0m | [0m21.6     [0m | [0m0.3661   [0m | [0m1.486    [0m | [0m0.0001309[0m | [0m59.1     [0m | [0m1.035    [0m |




| [0m6        [0m | [0m0.6052   [0m | [0m4.638    [0m | [0m61.92    [0m | [0m0.208    [0m | [0m23.2     [0m | [0m0.5546   [0m | [0m2.909    [0m | [0m0.0007976[0m | [0m60.73    [0m | [0m3.579    [0m |




| [0m7        [0m | [0m0.08759  [0m | [0m4.185    [0m | [0m120.5    [0m | [0m0.0354   [0m | [0m17.94    [0m | [0m0.1357   [0m | [0m0.976    [0m | [0m0.0004498[0m | [0m24.65    [0m | [0m3.315    [0m |




| [95m8        [0m | [95m0.95     [0m | [95m2.497    [0m | [95m58.97    [0m | [95m0.2171   [0m | [95m17.11    [0m | [95m2.407    [0m | [95m0.2237   [0m | [95m0.0009882[0m | [95m51.7     [0m | [95m0.7949   [0m |




| [0m9        [0m | [0m0.5416   [0m | [0m0.03865  [0m | [0m110.3    [0m | [0m0.2827   [0m | [0m25.94    [0m | [0m2.314    [0m | [0m0.2221   [0m | [0m0.0004226[0m | [0m16.26    [0m | [0m3.452    [0m |




| [0m10       [0m | [0m0.9434   [0m | [0m4.363    [0m | [0m63.77    [0m | [0m0.02542  [0m | [0m19.66    [0m | [0m0.9755   [0m | [0m2.189    [0m | [0m0.0006738[0m | [0m57.91    [0m | [0m1.889    [0m |




| [0m11       [0m | [0m0.1416   [0m | [0m0.8372   [0m | [0m100.5    [0m | [0m0.3043   [0m | [0m23.42    [0m | [0m2.313    [0m | [0m1.481    [0m | [0m0.0005705[0m | [0m33.09    [0m | [0m0.1017   [0m |




| [0m12       [0m | [0m0.0      [0m | [0m0.7552   [0m | [0m35.02    [0m | [0m0.2546   [0m | [0m19.72    [0m | [0m1.526    [0m | [0m2.723    [0m | [0m0.0003244[0m | [0m32.16    [0m | [0m3.022    [0m |




| [0m13       [0m | [0m0.1434   [0m | [0m1.602    [0m | [0m39.39    [0m | [0m0.1159   [0m | [0m17.42    [0m | [0m2.789    [0m | [0m2.424    [0m | [0m0.0006701[0m | [0m57.06    [0m | [0m3.215    [0m |




| [0m14       [0m | [0m0.7595   [0m | [0m1.306    [0m | [0m117.7    [0m | [0m0.2157   [0m | [0m27.11    [0m | [0m2.688    [0m | [0m0.954    [0m | [0m0.000199 [0m | [0m22.31    [0m | [0m1.708    [0m |




| [0m15       [0m | [0m0.1513   [0m | [0m5.726    [0m | [0m114.6    [0m | [0m0.002781 [0m | [0m22.66    [0m | [0m1.252    [0m | [0m0.6663   [0m | [0m0.0002079[0m | [0m28.23    [0m | [0m3.772    [0m |




| [0m16       [0m | [0m0.9004   [0m | [0m2.262    [0m | [0m81.8     [0m | [0m0.2812   [0m | [0m20.45    [0m | [0m2.915    [0m | [0m2.887    [0m | [0m0.0003266[0m | [0m36.85    [0m | [0m1.204    [0m |




| [0m17       [0m | [0m0.9381   [0m | [0m1.994    [0m | [0m35.54    [0m | [0m0.2438   [0m | [0m22.54    [0m | [0m0.1544   [0m | [0m0.8359   [0m | [0m0.0009174[0m | [0m22.94    [0m | [0m0.5796   [0m |




| [0m18       [0m | [0m0.1448   [0m | [0m3.426    [0m | [0m126.6    [0m | [0m0.09682  [0m | [0m25.08    [0m | [0m2.285    [0m | [0m0.7129   [0m | [0m0.0007554[0m | [0m29.86    [0m | [0m2.529    [0m |




| [0m19       [0m | [0m0.1761   [0m | [0m4.435    [0m | [0m83.43    [0m | [0m0.03612  [0m | [0m27.53    [0m | [0m0.9623   [0m | [0m0.5596   [0m | [0m0.0001367[0m | [0m41.91    [0m | [0m2.71     [0m |




| [95m20       [0m | [95m0.9519   [0m | [95m0.1161   [0m | [95m81.16    [0m | [95m0.0906   [0m | [95m24.68    [0m | [95m0.5231   [0m | [95m2.073    [0m | [95m0.0004481[0m | [95m60.58    [0m | [95m0.5501   [0m |




| [95m21       [0m | [95m0.9648   [0m | [95m1.347    [0m | [95m62.06    [0m | [95m0.0      [0m | [95m19.0     [0m | [95m3.0      [0m | [95m0.0      [0m | [95m0.001    [0m | [95m55.4     [0m | [95m1.229    [0m |




| [0m22       [0m | [0m0.9608   [0m | [0m6.538    [0m | [0m63.97    [0m | [0m0.0      [0m | [0m19.58    [0m | [0m3.0      [0m | [0m0.0      [0m | [0m0.001    [0m | [0m52.72    [0m | [0m0.7556   [0m |




| [0m23       [0m | [0m0.5788   [0m | [0m3.384    [0m | [0m66.21    [0m | [0m0.0      [0m | [0m15.0     [0m | [0m3.0      [0m | [0m1.541    [0m | [0m0.001    [0m | [0m53.93    [0m | [0m4.0      [0m |




| [0m24       [0m | [0m0.1495   [0m | [0m6.772    [0m | [0m58.87    [0m | [0m0.0      [0m | [0m18.47    [0m | [0m3.0      [0m | [0m0.0      [0m | [0m0.001    [0m | [0m56.43    [0m | [0m0.0      [0m |




| [0m25       [0m | [0m0.9573   [0m | [0m2.893    [0m | [0m63.78    [0m | [0m0.1625   [0m | [0m20.62    [0m | [0m1.133    [0m | [0m1.524    [0m | [0m0.0008711[0m | [0m53.05    [0m | [0m1.331    [0m |


In [None]:
bidirectional_lstm_bo.max['params']

{'activation': 1.3466763722200286,
 'batch_size': 62.057661802744875,
 'dropout_rate': 0.0,
 'epochs': 18.997758809674913,
 'layers1': 3.0,
 'layers2': 0.0,
 'learning_rate': 0.001,
 'neurons': 55.39979522299761,
 'optimizer': 1.2288553110171885}

In [None]:
def create_model_best_params(params):
  learning_rate = params['learning_rate']
  optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad']
  optimizerD= {'Adam':Adam(learning_rate=learning_rate), 'SGD':SGD(learning_rate=learning_rate),
                'RMSprop':RMSprop(learning_rate=learning_rate), 'Adadelta':Adadelta(learning_rate=learning_rate),
                'Adagrad':Adagrad(learning_rate=learning_rate),}
  activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 'selu', 'elu', 'relu']
  neurons = round(params['neurons'])
  activation = activationL[round(params['activation'])]
  optimizer = optimizerD[optimizerL[round(params['optimizer'])]]
  batch_size = round(params['batch_size'])
  epochs = round(params['epochs'])
  layers1 = round(params['layers1'])
  layers2 = round(params['layers2'])
  dropout_rate = params['dropout_rate']

  # for reproducibility 
  np.random.seed(42)
  tensorflow.random.set_seed(42)
  tuned_model = Sequential()
  tuned_model.add(Bidirectional(LSTM(neurons, return_sequences=False)))  
  tuned_model.add(Dropout(dropout_rate, seed=42))

  for i in range(layers1): #varying first set of hidden layers
    tuned_model.add(Dense(neurons, activation=activation))
    tuned_model.add(Dropout(dropout_rate, seed=42))
  
  for i in range(layers2): #varying second set of hidden layers
        tuned_model.add(Dense(neurons, activation=activation))

  tuned_model.add(Dense(1, activation='sigmoid')) #output layer
  tuned_model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics=['accuracy',metrics.Precision(), metrics.Recall(),metrics.AUC()])
  return tuned_model

In [None]:
best_params = {'activation': 1.3466763722200286,
              'batch_size': 62.057661802744875,
              'dropout_rate': 0.0,
              'epochs': 18.997758809674913,
              'layers1': 3.0,
              'layers2': 0.0,
              'learning_rate': 0.001,
              'neurons': 55.39979522299761,
              'optimizer': 1.2288553110171885}
best_model3 = create_model_best_params(best_params)
best_model3.fit(X_train_over, y_train_over, epochs=round(best_params['epochs']), batch_size = round(best_params['batch_size']))

Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Epoch 6/19
Epoch 7/19
Epoch 8/19
Epoch 9/19
Epoch 10/19
Epoch 11/19
Epoch 12/19
Epoch 13/19
Epoch 14/19
Epoch 15/19
Epoch 16/19
Epoch 17/19
Epoch 18/19
Epoch 19/19


<keras.callbacks.History at 0x7f3fe9de0290>

In [None]:
best_model3.summary()

Model: "sequential_378"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_378 (Bidirect  (None, 110)              140360    
 ional)                                                          
                                                                 
 dropout_781 (Dropout)       (None, 110)               0         
                                                                 
 dense_1049 (Dense)          (None, 55)                6105      
                                                                 
 dropout_782 (Dropout)       (None, 55)                0         
                                                                 
 dense_1050 (Dense)          (None, 55)                3080      
                                                                 
 dropout_783 (Dropout)       (None, 55)                0         
                                                    

In [None]:
train_results = best_model3.evaluate(X_train_over, y_train_over)
precision = train_results[2]
recall = train_results[3]
f1_score = 2 * (precision * recall) / (precision + recall)
print('Train Results')
print(f'Precision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}\nAccuracy: {train_results[1]}\nAUROC:{train_results[4]}')

Train Results
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Accuracy: 1.0
AUROC:1.0


In [None]:
test_results = best_model3.evaluate(X_test, y_test)
precision = test_results[2]
recall = test_results[3]
f1_score = 2 * (precision * recall) / (precision + recall)
print('Test Results')
print(f'Precision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}\nAccuracy: {test_results[1]}\nAUROC:{test_results[4]}')

Test Results
Precision: 0.7708333134651184
Recall: 0.6607142686843872
F1 Score: 0.7115384431985708
Accuracy: 0.9895833134651184
AUROC:0.8722144961357117
