In [14]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, MaxPooling1D, Flatten

import numpy as np
import pandas as pd

from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from scikeras.wrappers import KerasClassifier

from warnings import simplefilter

In [15]:
#with two dataset splitted
dftrain = pd.read_csv("/home/jovyan/MQTTset/train70_reduced.csv") 
dftest = pd.read_csv("/home/jovyan/MQTTset/test30_reduced.csv")

# dftrain = pd.read_csv("train70.csv", low_memory=False) 
# dftest = pd.read_csv("test30.csv", low_memory=False)

simplefilter(action='ignore', category=FutureWarning)
seed = 7

In [16]:
#train
#print(dftrain.loc[dftrain['target'] == 'legitimate'])
class_names = dftrain.target.unique()
dftrain=dftrain.astype('category')
cat_columns = dftrain.select_dtypes(['category']).columns
dftrain[cat_columns] = dftrain[cat_columns].apply(lambda x: x.cat.codes)
#print(dftrain.loc[125, 'target'])
x_columns = dftrain.columns.drop('target')
x_train = dftrain[x_columns].values
y_train = dftrain['target']

#test
class_names = dftest.target.unique()
dftest=dftest.astype('category')
cat_columns = dftest.select_dtypes(['category']).columns
dftest[cat_columns] = dftest[cat_columns].apply(lambda x: x.cat.codes)
x_columns = dftest.columns.drop('target')
x_test = dftest[x_columns].values
y_test = dftest['target']

print("Ready to generate train and test datasets")

Ready to generate train and test datasets


In [12]:
from sklearn import preprocessing
scaler = preprocessing.MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

x_train = np.copy(x_train_scaled)
x_test = np.copy(x_test_scaled)

In [17]:
def calculate_performance_metrics(x_test, y_test, model):

    # Predictions
    y_pred = model.predict(x_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    
    # Compute metrics
    precision = metrics.precision_score(y_test, y_pred_classes, average='weighted')
    accuracy = metrics.accuracy_score(y_test, y_pred_classes)
    f1 = metrics.f1_score(y_test, y_pred_classes, average='macro')
    auc = metrics.roc_auc_score(y_test, y_pred, multi_class='ovr')

    print(f"Precision: {precision}\nAccuracy: {accuracy}\nF1 Score: {f1}\nAUC: {auc}")
    
    # Confusion matrix for FNR, TNR, FPR, TPR
    cm = metrics.confusion_matrix(y_test, y_pred_classes)
    def calculate_rates(conf_matrix, class_index):
        tp = conf_matrix[class_index, class_index]
        fn = np.sum(conf_matrix[class_index, :]) - tp
        fp = np.sum(conf_matrix[:, class_index]) - tp
        tn = np.sum(conf_matrix) - (tp + fn + fp)
    
        fnr = fn / (fn + tp)
        tnr = tn / (tn + fp)
        fpr = fp / (fp + tn)
        tpr = tp / (tp + fn)
        return fnr, tnr, fpr, tpr

    # Calculate and aggregate rates
    fnrs, tnrs, fprs, tprs = [], [], [], []
    for i in range(cm.shape[0]):
        fnr, tnr, fpr, tpr = calculate_rates(cm, i)
        fnrs.append(fnr)
        tnrs.append(tnr)
        fprs.append(fpr)
        tprs.append(tpr)
    
    mean_fnr = np.mean(fnrs)
    mean_tnr = np.mean(tnrs)
    mean_fpr = np.mean(fprs)
    mean_tpr = np.mean(tprs)

    # Printing the mean metrics
    print(f"Mean FNR: {mean_fnr}\nMean TNR: {mean_tnr}\nMean FPR: {mean_fpr}\nMean TPR: {mean_tpr}")
    
    # Example usage
    # y_test = [actual labels]
    # y_test_pred = [predicted labels]
    # calculate_performance_metrics(y_test, y_test_pred)


In [18]:
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1)
input_shape = x_train.shape[1:]
output_shape = len(np.unique(y_train))

# Function to create the CNN model
def create_cnn_model(filters=32, kernel_size=3, activation='relu', pool_size=(2, 2), dense_units=128):
    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=4, strides=2, padding='valid', activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D())
    model.add(Conv1D(filters=20, kernel_size=4, strides=2, padding='same', activation='relu'))
    model.add(Conv1D(filters=3, kernel_size=2, strides=1, padding='same', activation='relu'))
    model.add(Flatten())
    model.add(Dense(dense_units, activation='relu'))
    model.add(Dense(output_shape, activation='softmax'))
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Wrapping the model with KerasClassifier
cnn_model = KerasClassifier(model=create_cnn_model, verbose=1, filters = [32, 64, 128], kernel_size = [2, 3, 5], activation = ['relu'], pool_size = [(2, 2), (3, 3)], dense_units = [32, 64, 128], batch_size = [1000, 1200], epochs = [10,15,20])

# Define the parameter grid for hyperparameter tuning
param_dist = {
    'filters': [32, 64, 128],
    'kernel_size': [2,3,5],
    'activation': ['relu'],
    'pool_size': [(2, 2), (3, 3)],
    'dense_units': [32, 64, 128],
    'batch_size': [1000, 1200],
    'epochs': [10, 15, 20]
}

# Setting up RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=cnn_model, param_distributions=param_dist, 
                                   n_iter=10, cv=3, verbose=2)

# Perform hyperparameter tuning using x_val and y_val
random_search_result = random_search.fit(x_train, y_train)

# Best parameters
best_params = random_search_result.best_params_
print(f"Best Parameters: {best_params}")

# Train the optimal CNN model with best parameters
optimal_cnn_model = create_cnn_model(filters=best_params['filters'], 
                                     kernel_size=best_params['kernel_size'],
                                     activation=best_params['activation'],
                                     pool_size=best_params['pool_size'],
                                     dense_units=best_params['dense_units'])
optimal_cnn_model.fit(x_train, y_train, batch_size=best_params['batch_size'], epochs=best_params['epochs'])

Fitting 3 folds for each of 10 candidates, totalling 30 fits
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[CV] END activation=relu, batch_size=1000, dense_units=32, epochs=20, filters=64, kernel_size=3, pool_size=(2, 2); total time= 1.6min
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
[CV] END activation=relu, batch_size=1000, dense_units=32, epochs=20, filters=64, kernel_size=3, pool_size=(2, 2); total time= 1.7min
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Ep

<keras.src.callbacks.History at 0x7f5190121750>

In [19]:
# nonscaled
calculate_performance_metrics(x_test, y_test, optimal_cnn_model)

Precision: 0.8940150508680169
Accuracy: 0.8898781347567731
F1 Score: 0.6598316023770444
AUC: 0.9225819620175444
Mean FNR: 0.35310190715046513
Mean TNR: 0.9688807232393529
Mean FPR: 0.031119276760646953
Mean TPR: 0.6468980928495348
