In [6]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, CuDNNLSTM
from sklearn.metrics import confusion_matrix, classification_report

import keras
import keras_tuner as kt
from keras.callbacks import ModelCheckpoint
from keras.models import load_model
from matplotlib import pyplot as plt

In [2]:
print('GPU Device:', tf.test.gpu_device_name())

GPU Device: /device:GPU:0


# Hyperparameter Training

In [3]:
Train = pd.read_csv('../TrainTestData/cle_train.csv')
Test = pd.read_csv('../TrainTestData/cle_test.csv')

X_train = Train.iloc[:,:-1]
Y_train = Train.iloc[:,-1]

In [16]:
def model_builder(hp):
    hp_units_1 = hp.Int('units_Conv1', min_value=32, max_value=256, step=32)
    hp_units_2 = hp.Int('units_FC', min_value=32, max_value=256, step=32)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    
    model = Sequential()
    model.add(CuDNNLSTM(units=hp_units_1, return_sequences=True, input_shape=(35, 1)))
    model.add(CuDNNLSTM(units=hp_units_2, return_sequences=True))
    model.add(Flatten())
    model.add(Dense(units=2, activation='sigmoid'))
    model.compile(loss='sparse_categorical_crossentropy', 
                  optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate), 
                  metrics=['accuracy'])
    
    return model

In [17]:
tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='../Models/Baseline_RNN/HP',
                     project_name = 'RNN_HP')

In [19]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

with tf.device('/device:GPU:0'):
    tuner.search(X_train, Y_train, epochs=50, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
The hyperparameter search is complete. The optimal number of units in the first Conv
layer is {best_hps.get('units_Conv1')}, The optimal number of units in the FC 
layer is {best_hps.get('units_FC')}.
""")

Trial 30 Complete [00h 01m 09s]
val_accuracy: 0.7561565041542053

Best val_accuracy So Far: 0.7618975639343262
Total elapsed time: 00h 14m 03s

The hyperparameter search is complete. The optimal number of units in the first Conv
layer is 64, The optimal number of units in the FC 
layer is 96.



# Train

In [28]:
def RNN_train():
    #load the data
    Train = pd.read_csv('../TrainTestData/com_train.csv')
    Test = pd.read_csv('../TrainTestData/com_test.csv')
    
    X_train = Train.iloc[:,:-1]
    Y_train = Train.iloc[:,-1]

    X_test = Test.iloc[:,:-1]
    Y_test = Test.iloc[:,-1]
    
    
    #train the RNN model
    filepath = "../Models/Baseline_RNN/train/baseline_RNN_model_at_epoch_{epoch}.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=False, mode='auto', save_freq="epoch")
    with tf.device('/device:GPU:0'):
        history = model.fit(X_train, Y_train,validation_split = 0.1, epochs=25, batch_size=32,callbacks=[checkpoint])
        
    #save model history
    #np.save('../Models/Baseline_RNN/baseline_RNN_hist.npy',history.history)
    #history=np.load('my_history.npy',allow_pickle='TRUE').item()
    
    #plot train val loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

def RNN_test():
    testlists = ['../TrainTestData/com_test.csv','../TrainTestData/cle_test.csv','../TrainTestData/vir_test.csv',
                '../TrainTestData/hun_test.csv','../TrainTestData/swi_test.csv']
    
    for i in testlists:
        Test = pd.read_csv(i)

        X_test = Test.iloc[:,:-1]
        Y_test = Test.iloc[:,-1]

        Y_pred = model.predict(X_test).argmax(axis=1)

        cm = confusion_matrix(Y_pred, Y_test)
        print("this is the test for " + str(i[-12:][:3]))
        print(cm)
        print(classification_report(Y_test, Y_pred,digits = 4))
        print("============================================")
    

In [21]:
# RNN
model = tuner.hypermodel.build(best_hps)
RNN_train()

Epoch 1/25
Epoch 1: saving model to ../Models/Baseline_CNN/train\baseline_CNN_model_at_epoch_1.hdf5
Epoch 2/25
Epoch 2: saving model to ../Models/Baseline_CNN/train\baseline_CNN_model_at_epoch_2.hdf5
Epoch 3/25
Epoch 3: saving model to ../Models/Baseline_CNN/train\baseline_CNN_model_at_epoch_3.hdf5
Epoch 4/25
Epoch 4: saving model to ../Models/Baseline_CNN/train\baseline_CNN_model_at_epoch_4.hdf5
Epoch 5/25
Epoch 5: saving model to ../Models/Baseline_CNN/train\baseline_CNN_model_at_epoch_5.hdf5
Epoch 6/25
Epoch 6: saving model to ../Models/Baseline_CNN/train\baseline_CNN_model_at_epoch_6.hdf5
Epoch 7/25
Epoch 7: saving model to ../Models/Baseline_CNN/train\baseline_CNN_model_at_epoch_7.hdf5
Epoch 8/25
Epoch 8: saving model to ../Models/Baseline_CNN/train\baseline_CNN_model_at_epoch_8.hdf5
Epoch 9/25
Epoch 9: saving model to ../Models/Baseline_CNN/train\baseline_CNN_model_at_epoch_9.hdf5
Epoch 10/25
Epoch 10: saving model to ../Models/Baseline_CNN/train\baseline_CNN_model_at_epoch_10.hd

FileNotFoundError: [Errno 2] No such file or directory: '../Models/history/baseline_RNN_hist.npy'

# Testing

In [29]:
model = load_model('../Models/Baseline_RNN/train/baseline_RNN_model_at_epoch_25.hdf5')
RNN_test()

this is the test for com
[[30728  9982]
 [ 3485 12541]]
              precision    recall  f1-score   support

           0     0.7548    0.8981    0.8203     34213
           1     0.7825    0.5568    0.6507     22523

    accuracy                         0.7626     56736
   macro avg     0.7687    0.7275    0.7355     56736
weighted avg     0.7658    0.7626    0.7529     56736

this is the test for cle
[[7641 2504]
 [ 894 3145]]
              precision    recall  f1-score   support

           0     0.7532    0.8953    0.8181      8535
           1     0.7787    0.5567    0.6493      5649

    accuracy                         0.7604     14184
   macro avg     0.7659    0.7260    0.7337     14184
weighted avg     0.7633    0.7604    0.7509     14184

this is the test for vir
[[7789 2415]
 [ 858 3122]]
              precision    recall  f1-score   support

           0     0.7633    0.9008    0.8264      8647
           1     0.7844    0.5638    0.6561      5537

    accuracy          