In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from time import time
import random
import warnings
warnings.filterwarnings("ignore")
from sklearn.metrics import f1_score as f1_score_rep
from sklearn.metrics import accuracy_score

In [2]:
from sklearn.metrics import confusion_matrix

In [3]:
train = pd.read_csv("F://CyberLab/MQTT/Data/FINAL_CSV/train70.csv")
test = pd.read_csv("F://CyberLab/MQTT/Data/FINAL_CSV/test30.csv")

In [4]:
class_names = train.target.unique()
train = train.astype('category')
cat_col = train.select_dtypes(['category']).columns
train[cat_col] = train[cat_col].apply(lambda x: x.cat.codes)
x_col = train.columns.drop('target')
x_train = train[x_col].values
y_train = train['target']

In [5]:
class_names = test.target.unique()
test = test.astype('category')
cat_col = test.select_dtypes(['category']).columns
test[cat_col] = test[cat_col].apply(lambda x: x.cat.codes)
x_col = test.columns.drop('target')
x_test = test[x_col].values
y_test = test['target']

In [6]:
from sklearn import preprocessing
X_train_scaled = preprocessing.scale(x_train)
X_test_scaled = preprocessing.scale(x_test)

In [7]:
x_train = np.copy(X_train_scaled)
x_test = np.copy(X_test_scaled)

In [8]:
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

In [9]:
def report(test, pred):
    cm = confusion_matrix(test, pred)
    cm_df = pd.DataFrame(cm)
    
    tp_arr = []
    fp_arr = []
    tn_arr = []
    fn_arr = []
    for i in range(cm_df.shape[0]):
      tp = cm_df[i][i]
      fp = sum(cm_df[i])-tp
      fn = sum(cm_df.iloc[i])-tp
      total = cm_df.sum().sum()
      tn = total-fp-fn-tp
      tpr = tp/(tp+fn)
      fpr = fp/(tn+fp)
      fnr = fn/(fn+tp)
      tnr = tn/(tn+fp)
      tp_arr.append(tpr)
      tn_arr.append(tnr)
      fp_arr.append(fpr)
      fn_arr.append(fnr)
    
    print("TPR: ", np.mean(tp_arr))
    print("FPR: ", np.mean(fp_arr))
    print("FNR: ", np.mean(fn_arr))
    print("TNR: ", np.mean(tn_arr))
    return

## DL Models

In [10]:
# Tensorflow and Keras
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.optimizers import SGD




In [11]:
# Setting seed for reproducability
np.random.seed(1)
PYTHONHASHSEED = 0

### Convolutional Neural Network

In [12]:
### Reshape training and test data for CNN model training
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1)
y_train = y_train.reshape(y_train.shape[0], 1)
y_test = y_test.reshape(y_test.shape[0], 1)
input_shape = x_train.shape[1:]
num_classes = len(np.unique(y_train))
print("input shape: ", input_shape)
print("y train shape: ", y_train.shape)
print("x_train shape: ", x_train.shape)
print("y test shape: ", y_test.shape)
print("x test shape: ", x_test.shape)
print("number of classes: ", num_classes)

input shape:  (33, 1)
y train shape:  (8456823, 1)
x_train shape:  (8456823, 33, 1)
y test shape:  (3624366, 1)
x test shape:  (3624366, 33, 1)
number of classes:  6


In [13]:
cnn_model = Sequential()
cnn_model.add(Conv1D(filters=20, kernel_size=4, strides=2, padding='valid', activation='relu', input_shape=input_shape))
cnn_model.add(MaxPooling1D())
cnn_model.add(Conv1D(filters=20, kernel_size=4, strides=2, padding='same', activation='relu'))
cnn_model.add(Conv1D(filters=3, kernel_size=2, strides=1, padding='same', activation='relu'))
cnn_model.add(Flatten())
cnn_model.add(Dense(units=100, activation='relu'))
cnn_model.add(Dense(units=num_classes, activation='softmax'))

opt = SGD(learning_rate=0.01) ### divide by 10 if learning stops after some epochs
cnn_model.compile(loss = "sparse_categorical_crossentropy", optimizer=opt, metrics=['accuracy'])
cnn_model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 15, 20)            100       
                                                                 
 max_pooling1d (MaxPooling1  (None, 7, 20)             0         
 D)                                                              
                                                                 
 conv1d_1 (Conv1D)           (None, 4, 20)             1620      
                                                                 
 conv1d_2 (Conv1D)           (None, 4, 3)              123       
                                                                 
 flatten (Flatten)           (None, 12)                0         
                                                                 
 dense (Dense)               (None, 100)               1300      
                                                      

In [14]:
hist = cnn_model.fit(x_train, y_train, epochs=50,
                       batch_size=64, validation_split=0.2,
                       callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5)])

Epoch 1/50



KeyboardInterrupt: 

[]

In [None]:
y_hat = cnn_model.predict(x_test)
y_hat = np.argmax(y_hat, axis=-1)

In [None]:
print('****************** 1-D CNN prediction results ******************')
print("Accuracy: ", accuracy_score(y_test, y_hat))
print("Micro F1 Score: ", f1_score_rep(y_test, y_hat, average="micro"))
print("Macro F1 Score: ", f1_score_rep(y_test, y_hat, average="macro"))

In [None]:
report(y_test, y_hat)

### Deep Neural Network

In [None]:
x_train = np.copy(X_train_scaled)
x_test = np.copy(X_test_scaled)
input_shape = x_train.shape[1:]

In [None]:
dnn_model = Sequential()
dnn_model.add(Input(shape=input_shape))
dnn_model.add(Dense(units=30, activation='relu'))
dnn_model.add(Dense(units=20, activation='relu'))
dnn_model.add(Dense(units=num_classes, activation='softmax'))

opt = SGD(lr=0.01)
dnn_model.compile(loss = "sparse_categorical_crossentropy", optimizer=opt, metrics=['accuracy'])

In [None]:
hist = dnn_model.fit(x_train, y_train, epochs=50,
                       batch_size=64, validation_split=0.2,
                       callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5)])

In [None]:
y_hat = dnn_model.predict(x_test)
y_hat = np.argmax(y_hat, axis=-1)

In [None]:
print('****************** DNN prediction results ******************')
print("Accuracy: ", accuracy_score(y_test, y_hat))
print("Micro F1 Score: ", f1_score_rep(y_test, y_hat, average="micro"))
print("Macro F1 Score: ", f1_score_rep(y_test, y_hat, average="macro"))

In [None]:
report(y_test, y_hat)

### Saving and loading DNN models

In [None]:
### Save the CNN model
dcnn_model.save('CNN-X-IIoT.h5')

In [None]:
### Load the CNN model
dcnn_model = tensorflow.keras.models.load_model('CNN-X-IIoT.h5')