In [8]:
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN

from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

from sklearn import metrics
from sklearn.preprocessing import label_binarize

import numpy as np
import pandas as pd

In [13]:
x_train = pd.read_csv('/home/jovyan/UNSW/preprocessed_data/x_train.csv', low_memory=False)
y_train = pd.read_csv('/home/jovyan/UNSW/preprocessed_data/y_train.csv', low_memory=False)

In [14]:
x_test = pd.read_csv('/home/jovyan/UNSW/preprocessed_data/x_test.csv', low_memory=False)
y_test = pd.read_csv('/home/jovyan/UNSW/preprocessed_data/y_test.csv', low_memory=False)

In [15]:
def calculate_performance_metrics(x_test, y_test, model):

    # Predictions
    y_pred = model.predict(x_test)
    y_pred_classes = np.argmax(y_pred, axis=1)

    # Compute metrics
    precision = metrics.precision_score(y_test, y_pred_classes, average='weighted')
    accuracy = metrics.accuracy_score(y_test, y_pred_classes)
    f1 = metrics.f1_score(y_test, y_pred_classes, average='macro')
    auc = metrics.roc_auc_score(y_test, y_pred_classes, multi_class='ovr')

    print(f"Precision: {precision}\nAccuracy: {accuracy}\nF1 Score: {f1}\nAUC: {auc}")
    
    # Confusion matrix for FNR, TNR, FPR, TPR
    cm = metrics.confusion_matrix(y_test, y_pred_classes)
    tn, fp, fn, tp = cm.ravel()

    fnr = fn / (fn + tp)
    tnr = tn / (tn + fp)
    fpr = fp / (fp + tn)
    tpr = tp / (tp + fn)


    # Printing the mean metrics
    print(f"FNR: {fnr}\nTNR: {tnr}\nFPR: {fpr}\nTPR: {tpr}")

In [18]:
timesteps = 1  # Number of timesteps
features_per_timestep = x_train.shape[1]  # Features per timestep

x_train_reshaped = x_train.values.reshape(-1, timesteps, features_per_timestep)
x_test_reshaped = x_test.values.reshape(-1, timesteps, features_per_timestep)

# Define the RNN model
output_shape = len(np.unique(y_train))


# Function to create the RNN model with an additional hidden layer
def create_rnn_model(units=32):
    model = Sequential([
        SimpleRNN(units, activation='relu', input_shape=(timesteps, features_per_timestep)),
        Dense(units, activation='relu'),  # Hidden dense layer
        Dense(output_shape, activation='softmax')  # Output layer
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Wrapping the model with KerasClassifier
rnn_model = KerasClassifier(model=create_rnn_model, verbose=1, units = [32, 64, 128], batch_size = [32, 64, 128, 256, 512, 1024], epochs = 20)

# Define the parameter grid for hyperparameter tuning
param_dist = {
    'units': [32, 64, 128],
    'batch_size': [32, 64, 128, 256, 512, 1024]
    # 'batch_size': [1000, 1200, 1400],
}

# Setting up RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=rnn_model, param_distributions=param_dist, 
                                   n_iter=10, cv=3, verbose=2)

# Perform hyperparameter tuning using x_val and y_val
random_search_result = random_search.fit(x_train_reshaped, y_train)

# Best parameters
best_params = random_search_result.best_params_
print(f"Best Parameters: {best_params}")

Fitting 3 folds for each of 10 candidates, totalling 30 fits
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[CV] END ...........................batch_size=512, units=32; total time=   3.0s
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[CV] END ...........................batch_size=512, units=32; total time=   2.6s
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[CV] END ...........................batch_size=512, units=32; total time=   2.7s
Epo

In [19]:
# Train the optimal RNN model with best parameters
optimal_rnn_model = create_rnn_model(units=best_params['units'])
optimal_rnn_model.fit(x_train_reshaped, y_train, batch_size=best_params['batch_size'],
                         epochs=50, validation_split=0.2, 
                         callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5)])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50


<keras.src.callbacks.History at 0x7f6c18765710>

In [20]:
calculate_performance_metrics(x_test_reshaped, y_test, optimal_rnn_model)

Precision: 0.9185787621057406
Accuracy: 0.9076599312197375
F1 Score: 0.8985519110307311
AUC: 0.9193983855686048
FNR: 0.11309608600564768
TNR: 0.9518928571428571
FPR: 0.048107142857142855
TPR: 0.8869039139943523
