In [18]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.utils.vis_utils import plot_model
from sklearn.metrics import f1_score, recall_score, precision_score

In [19]:
df_train_1 = pd.read_csv("https://raw.githubusercontent.com/Tdjaaleb/IDS_AdversarialML/main/Data/df_train_0.csv", header=0, index_col=0)
df_train_2 = pd.read_csv("https://raw.githubusercontent.com/Tdjaaleb/IDS_AdversarialML/main/Data/df_train_1.csv", header=0, index_col=0)

df = pd.concat([df_train_1, df_train_2], axis=0, ignore_index=True)

# Multilayer Perceptron

In [20]:
X = df.iloc[:,0:93]
Y = df[['Dos','normal','Probe','R2L','U2R']]

X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.25, random_state=42)

In [None]:
X

In [None]:
#MLP
mlp = tf.keras.Sequential()
mlp.add(tf.keras.layers.Dense(units=50, activation='relu'))
mlp.add(tf.keras.layers.Dense(units=5, activation='softmax'))
mlp.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

history = mlp.fit(X_train, y_train, epochs=100, batch_size=5000,validation_split=0.2)

mlp.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

test_results = mlp.evaluate(X_test, y_test, verbose=1)

pred = mlp.predict(X_test)
for j in range(0,pred.shape[1]):
  for i in range(0,pred.shape[0]):
    pred[i][j] = int(round(pred[i][j]))
pred_df = pd.DataFrame(pred,columns=y_test.columns)

print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]*100}%')
print("Recall Score - ",recall_score(y_test,pred_df.astype('uint8'),average='micro'))
print("F1 Score - ",f1_score(y_test,pred_df.astype('uint8'),average='micro'))
print("Precision Score - ",precision_score(y_test,pred_df.astype('uint8'),average='micro'))

In [None]:
mlp.summary()

In [None]:
plot_model(mlp, show_shapes=True)

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title("Plot of accuracy vs epoch for train and test dataset")
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title("Plot of loss vs epoch for train and test dataset")
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

# Autoencoder

In [None]:
X_train, X_test = train_test_split(df, test_size=0.25, random_state=42)

y_train = X_train[['Dos','normal','Probe','R2L','U2R']]
X_train = X_train.drop(['intrusion','Dos','normal','Probe','R2L','U2R','label'],axis=1)

y_test = X_test[['Dos','normal','Probe','R2L','U2R']]

X_test = X_test.drop(['intrusion','Dos','normal','Probe','R2L','U2R','label'],axis=1)

X = df.iloc[:,0:93]
Y = df[['Dos','normal','Probe','R2L','U2R']]

X_train = X_train.values
X_test = X_test.values
y_test = y_test.values

In [None]:
#AE
input_dim = X_train.shape[1]
encoding_dim = 50

input_layer = tf.keras.layers.Input(shape=(input_dim, ))
encoder = tf.keras.layers.Dense(encoding_dim, activation="relu")(input_layer)
output_layer = tf.keras.layers.Dense(input_dim, activation='softmax')(encoder)  
autoencoder = tf.keras.Model(inputs=input_layer, outputs=output_layer)
autoencoder.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

history = autoencoder.fit(X_train, X_train, epochs=100,batch_size=500,validation_data=(X_test, X_test)).history

autoencoder.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

test_results = autoencoder.evaluate(X_test, X_test, verbose=1)
predictions = autoencoder.predict(X_test)

print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]*100}%')

In [None]:
autoencoder.summary()

In [None]:
plot_model(autoencoder, show_shapes=True)

In [None]:
plt.plot(history['loss'])
plt.plot(history['val_loss'])
plt.title("Plot of loss vs epoch for train and test dataset")
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

In [None]:
plt.plot(history['accuracy'])
plt.plot(history['val_accuracy'])
plt.title("Plot of accuracy vs epoch for train and test dataset")
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

In [None]:
#AE Classifier
i_dim = predictions.shape[1]
i_layer = tf.keras.layers.Input(shape=(i_dim, ))
fvector = tf.keras.layers.Dense(48, activation="sigmoid")(i_layer)
o_layer = tf.keras.layers.Dense(5, activation='sigmoid')(fvector)
ae_classifier = tf.keras.Model(inputs=i_layer, outputs=o_layer)
ae_classifier.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

his = ae_classifier.fit(predictions, y_test, epochs=200,batch_size=700, validation_split=0.2).history

ae_classifier.compile(optimizer='adam', loss='mean_squared_error',metrics=['accuracy'])

test_results = ae_classifier.evaluate(X_test, y_test, verbose=1)

y_pred = ae_classifier.predict(X_test)

for j in range(0,y_pred.shape[1]):
  for i in range(0,y_pred.shape[0]):
    y_pred[i][j] = int(round(y_pred[i][j]))


print(f'Test results - Loss: {test_results[0]} - Accuracy: {test_results[1]*100}%')
print("Recall Score - ",recall_score(y_test,y_pred.astype('uint8'),average='micro'))
print("F1 Score - ",f1_score(y_test,y_pred.astype('uint8'),average='micro'))
print("Precision Score - ",precision_score(y_test,y_pred.astype('uint8'),average='micro'))

In [None]:
ae_classifier.summary()

In [None]:
plot_model(ae_classifier, to_file='plots/ae_classifier_multi.png', show_shapes=True)

In [None]:
plt.plot(his['loss'])
plt.plot(his['val_loss'])
plt.title("Plot of loss vs epoch for train and test dataset")
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()

In [None]:
plt.plot(his['accuracy'])
plt.plot(his['val_accuracy'])
plt.title("Plot of accuracy vs epoch for train and test dataset")
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()