In [19]:
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Conv1D, MaxPooling1D
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

from tensorflow.keras.optimizers import SGD
from sklearn.metrics import classification_report, roc_curve, auc, confusion_matrix
from sklearn.preprocessing import label_binarize
import numpy as np
import pandas as pd
from sklearn import metrics

In [20]:
x_train = pd.read_csv('/home/jovyan/UNSW/preprocessed_data/x_train.csv', low_memory=False)
y_train = pd.read_csv('/home/jovyan/UNSW/preprocessed_data/y_train.csv', low_memory=False)

In [21]:
x_test = pd.read_csv('/home/jovyan/UNSW/preprocessed_data/x_test.csv', low_memory=False)
y_test = pd.read_csv('/home/jovyan/UNSW/preprocessed_data/y_test.csv', low_memory=False)

In [4]:
x_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,46,47,48,49,50,51,52,53,54,55
0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0
1,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,1.0,1.0,2.0,0.0,0.0,0.0,1.0,6.0,0.0
2,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,2.0,1.0,1.0,3.0,0.0,0.0,0.0,2.0,6.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,2.0,1.0,1.0,3.0,1.0,1.0,0.0,2.0,1.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,2.0,2.0,1.0,33.0,0.0,0.0,0.0,2.0,33.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175336,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,24.0,22.0,13.0,24.0,0.0,0.0,0.0,24.0,24.0,0.0
175337,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,...,1.0,1.0,1.0,2.0,0.0,0.0,0.0,1.0,1.0,0.0
175338,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,3.0,3.0,3.0,13.0,0.0,0.0,0.0,3.0,12.0,0.0
175339,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,24.0,22.0,14.0,30.0,0.0,0.0,0.0,26.0,30.0,0.0


In [30]:
def calculate_performance_metrics(x_test, y_test, model):

    # Predictions
    y_pred = model.predict(x_test)
    y_pred_classes = np.argmax(y_pred, axis=1)

    # Compute metrics
    precision = metrics.precision_score(y_test, y_pred_classes, average='weighted')
    accuracy = metrics.accuracy_score(y_test, y_pred_classes)
    f1 = metrics.f1_score(y_test, y_pred_classes, average='macro')
    auc = metrics.roc_auc_score(y_test, y_pred_classes, multi_class='ovr')

    print(f"Precision: {precision}\nAccuracy: {accuracy}\nF1 Score: {f1}\nAUC: {auc}")
    
    # Confusion matrix for FNR, TNR, FPR, TPR
    cm = metrics.confusion_matrix(y_test, y_pred_classes)
    tn, fp, fn, tp = cm.ravel()

    fnr = fn / (fn + tp)
    tnr = tn / (tn + fp)
    fpr = fp / (fp + tn)
    tpr = tp / (tp + fn)


    # Printing the mean metrics
    print(f"FNR: {fnr}\nTNR: {tnr}\nFPR: {fpr}\nTPR: {tpr}")

In [6]:
x_train = x_train.values.reshape(x_train.shape[0], x_train.shape[1], 1)
x_test = x_test.values.reshape(x_test.shape[0], x_test.shape[1], 1)

In [38]:
# path/filename: cnn_model_randomized_search.py

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV


input_shape = x_train.shape[1:]
output_shape = len(np.unique(y_train))

# Function to create the CNN model
def create_cnn_model(filters=32, kernel_size=4, pool_size=(2, 2), dense_units=32):
    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, strides=2, padding='valid', activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D())
    model.add(Conv1D(filters=20, kernel_size=4, strides=2, padding='same', activation='relu'))
    model.add(Conv1D(filters=3, kernel_size=2, strides=1, padding='same', activation='relu'))
    model.add(Flatten())
    model.add(Dense(dense_units, activation='relu'))
    model.add(Dense(output_shape, activation='softmax'))
    opt = SGD(learning_rate=0.01) ### divide by 10 if learning stops after some epochs

    model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Wrapping the model with KerasClassifier
cnn_model = KerasClassifier(model=create_cnn_model, verbose=1, filters = [20, 40, 60, 100], kernel_size = [2,3,4,5], pool_size = [(2, 2), (3, 3)], dense_units = [32, 64, 128], batch_size = [32, 64, 128, 256, 512, 1024], epochs = 20)

# Define the parameter grid for hyperparameter tuning
param_dist = {
    'filters': [20, 40, 60, 100],
    'kernel_size': [2,3,4,5],
    'pool_size': [(2, 2), (3, 3)],
    'dense_units': [32, 64, 128],
    'batch_size': [32, 64, 128, 256, 512, 1024]
}

# Setting up RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=cnn_model, param_distributions=param_dist, 
                                   n_iter=10, cv=3, verbose=2)

# Perform hyperparameter tuning using x_val and y_val
random_search_result = random_search.fit(x_train, y_train)

# Best parameters
best_params = random_search_result.best_params_
print(f"Best Parameters: {best_params}")

Fitting 3 folds for each of 10 candidates, totalling 30 fits
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[CV] END batch_size=1024, dense_units=32, filters=40, kernel_size=3, pool_size=(3, 3); total time=   4.8s
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[CV] END batch_size=1024, dense_units=32, filters=40, kernel_size=3, pool_size=(3, 3); total time=   4.7s
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[CV] END batch_size=1024, dense_un

In [46]:
# Train the optimal CNN model with best parameters
optimal_cnn_model = create_cnn_model(filters=best_params['filters'], 
                                     kernel_size=best_params['kernel_size'],
                                     pool_size=best_params['pool_size'],
                                     dense_units=best_params['dense_units'])
optimal_cnn_model.fit(x_train, y_train.values, batch_size=best_params['batch_size'], epochs=50, validation_split=0.2,
                             callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5)])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50


<keras.src.callbacks.History at 0x7f43804b81d0>

In [47]:
calculate_performance_metrics(x_test, y_test, optimal_cnn_model)

Precision: 0.9176845889937931
Accuracy: 0.9182735355678364
F1 Score: 0.9042226271230213
AUC: 0.8969612112410177
FNR: 0.04404186323225044
TNR: 0.8379642857142857
FPR: 0.16203571428571428
TPR: 0.9559581367677495


In [34]:
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=20, kernel_size=4, strides=2, padding='valid', activation='relu', input_shape=input_shape))
cnn_model.add(MaxPooling1D())
cnn_model.add(Conv1D(filters=20, kernel_size=4, strides=2, padding='same', activation='relu'))
cnn_model.add(Conv1D(filters=3, kernel_size=2, strides=1, padding='same', activation='relu'))
cnn_model.add(Flatten())
cnn_model.add(Dense(units=100, activation='relu'))
cnn_model.add(Dense(units=num_classes, activation='softmax'))

opt = SGD(learning_rate=0.01) ### divide by 10 if learning stops after some epochs
cnn_model.compile(loss = "sparse_categorical_crossentropy", optimizer=opt, metrics=['accuracy'])

In [35]:
hist = cnn_model.fit(x_train, y_train, epochs=50,
                       batch_size=64, validation_split=0.2,
                       callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5)])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50


In [36]:
calculate_performance_metrics(x_test, y_test, cnn_model)

Precision: 0.918410480846043
Accuracy: 0.9175948580195162
F1 Score: 0.9059629253475087
AUC: 0.90919131582129
FNR: 0.0675459397859914
TNR: 0.8859285714285714
FPR: 0.11407142857142857
TPR: 0.9324540602140086
