In [39]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten,Conv1D,MaxPool1D

from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import sklearn
from sklearn.metrics import classification_report
from joblib import dump, load
import queue
import pandas as pd
import numpy as np
import glob, os, sys
pd.options.mode.chained_assignment = None



In [4]:
with open('./Dataset/csv/Attack_merge.csv', newline='') as csvfile:
        rows = pd.read_csv(csvfile,header=None)
        y = rows[10]
        x = rows.drop([10], axis=1)
        X_train, X_test, y_train, y_test = train_test_split(x, y , test_size=0.2)
    
if not os.path.exists("./models"):
    os.mkdir("./models")

model = DecisionTreeClassifier()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
dump(model, './models/DT.joblib')
prediction = model.predict(X_test)
print(classification_report(y_test,prediction))

0.9706013860943439
              precision    recall  f1-score   support

           0       0.97      0.98      0.97     22694
           1       0.97      0.97      0.97     22036

    accuracy                           0.97     44730
   macro avg       0.97      0.97      0.97     44730
weighted avg       0.97      0.97      0.97     44730



In [14]:
model = RandomForestClassifier(n_estimators=20, max_depth=4)
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
dump(model, './models/RF.joblib')
prediction = model.predict(X_test)
print(classification_report(y_test,prediction))

0.951397272524033
              precision    recall  f1-score   support

           0       0.92      0.99      0.95     22694
           1       0.99      0.91      0.95     22036

    accuracy                           0.95     44730
   macro avg       0.96      0.95      0.95     44730
weighted avg       0.95      0.95      0.95     44730



In [21]:
model = LogisticRegression(C=3)
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
dump(model, './models/LR.joblib')
prediction = model.predict(X_test)
print(classification_report(y_test,prediction))

0.9063715627095909
              precision    recall  f1-score   support

           0       0.91      0.90      0.91     22694
           1       0.90      0.91      0.91     22036

    accuracy                           0.91     44730
   macro avg       0.91      0.91      0.91     44730
weighted avg       0.91      0.91      0.91     44730



In [15]:
model = XGBClassifier(booster="gbtree", min_split_loss=7.100747859845302e-07, alpha=0.0010277375135306342, 
                        max_depth=9, eta=0.6737504946980999, gamma=6.101266632438708e-07, grow_policy="lossguide")
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
dump(model, './models/XGB.joblib')
prediction = model.predict(X_test)
print(classification_report(y_test,prediction))

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


0.9760116253073999
              precision    recall  f1-score   support

           0       0.98      0.97      0.98     22694
           1       0.97      0.98      0.98     22036

    accuracy                           0.98     44730
   macro avg       0.98      0.98      0.98     44730
weighted avg       0.98      0.98      0.98     44730



  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


In [19]:
model = LinearSVC(C = 2, verbose=False)
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
dump(model, './models/SVM.joblib')
prediction = model.predict(X_test)
print(classification_report(y_test,prediction))

0.8961547060138609
              precision    recall  f1-score   support

           0       0.89      0.90      0.90     22694
           1       0.90      0.89      0.89     22036

    accuracy                           0.90     44730
   macro avg       0.90      0.90      0.90     44730
weighted avg       0.90      0.90      0.90     44730





In [38]:
model = KNeighborsClassifier()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
dump(model, './models/KNN.joblib')
prediction = model.predict(X_test)
print(classification_report(y_test,prediction))

0.9742678291974066
              precision    recall  f1-score   support

           0       0.98      0.97      0.97     22694
           1       0.97      0.98      0.97     22036

    accuracy                           0.97     44730
   macro avg       0.97      0.97      0.97     44730
weighted avg       0.97      0.97      0.97     44730



In [26]:
batch_size = 32
X_train = np.array(X_train)
y_train = np.array(y_train)

earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='min')
mcp_save = ModelCheckpoint('./models/DNN4_Best_Model.h5', save_weights_only=False, save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1, epsilon=1e-4, mode='min')

dnn4 = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=(X_train.shape[1],)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation = 'relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.01),
    tf.keras.layers.Dense(8, activation = 'relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.01),
    tf.keras.layers.Dense(6, activation = 'relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.01),
    tf.keras.layers.Dense(4, activation = 'relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.01),
    tf.keras.layers.Dense(2, activation='softmax')
])

dnn4.compile(optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']);

dnn4.fit(X_train, y_train, batch_size=batch_size, epochs=10, verbose=1, callbacks=[earlyStopping, mcp_save, reduce_lr_loss], validation_split=0.25)
model = tf.keras.models.load_model('./models/DNN4_Best_Model.h5')

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50

Epoch 00006: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 7/50
Epoch 8/50
Epoch 9/50

Epoch 00009: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 10/50
Epoch 11/50
Epoch 12/50

Epoch 00012: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 13/50


In [35]:
preds = model.predict(X_test)
pred_labels = np.rint(preds)
accuracy = sklearn.metrics.accuracy_score(y_test, pred_labels[0:,1])
f1 = sklearn.metrics.f1_score(y_test, pred_labels[0:,1], average="binary")
prec = sklearn.metrics.precision_score(y_test, pred_labels[0:,1], average="binary")
recall = sklearn.metrics.recall_score(y_test, pred_labels[0:,1], average="binary")
print("accuracy: " + str(accuracy))
print("f1: " + str(f1))
print("prec: " + str(prec))
print("recall: " + str(recall))

accuracy: 0.9276101050748938
f1: 0.9252332132631385
prec: 0.9418484392628808
recall: 0.9091940461063714


In [42]:
model = tf.keras.models.load_model('./models/DNN4_Best_Model.h5')

In [43]:
import matplotlib.pyplot as plt
def show_train_history(train_acc,test_acc):
    plt.plot(model.history[train_acc])
    plt.plot(model.history[test_acc])
    plt.title('Train History')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()

In [46]:
model.history['accuracy']

TypeError: 'NoneType' object is not subscriptable

In [None]:
model = Sequential()
model.add()