In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten,Conv1D,MaxPool1D

from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
import sklearn
from sklearn.metrics import classification_report
from joblib import dump, load
import queue
import pandas as pd
import numpy as np
import glob, os, sys
pd.options.mode.chained_assignment = None

  from pandas import MultiIndex, Int64Index


In [10]:
def import_dataset():
        with open('./Dataset/csv/Original/Attack_merge.csv', newline='') as csvfile:
                rows = pd.read_csv(csvfile,header=None)
                y = rows[10]
                x = rows.drop([10], axis=1)
                X_train, X_test, y_train, y_test = train_test_split(x, y , test_size=0.2)
        
        if not os.path.exists("./models"):
                os.mkdir("./models")
        return np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test)

In [11]:
X_train, X_test, y_train, y_test = import_dataset()

In [12]:
model = DecisionTreeClassifier()
model.fit(np.array(X_train), np.array(y_train))
print(model.score(np.array(X_test), np.array(y_test)))
dump(model, './models/DT.joblib')
prediction = model.predict(np.array(X_test))
print(classification_report(np.array(y_test),prediction))

0.9704672479320366
              precision    recall  f1-score   support

           0       0.97      0.98      0.97     22845
           1       0.98      0.96      0.97     21885

    accuracy                           0.97     44730
   macro avg       0.97      0.97      0.97     44730
weighted avg       0.97      0.97      0.97     44730



In [13]:
preds = model.predict(X_test)
pred_labels = np.rint(preds)
f1 = sklearn.metrics.f1_score(y_test, pred_labels, average="binary")
print(f1)

0.9696496266513498


In [14]:
model = RandomForestClassifier(n_estimators=20, max_depth=4)
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
dump(model, './models/RF.joblib')
prediction = model.predict(X_test)
print(classification_report(y_test,prediction))

0.9659959758551308
              precision    recall  f1-score   support

           0       0.96      0.97      0.97     22845
           1       0.97      0.96      0.97     21885

    accuracy                           0.97     44730
   macro avg       0.97      0.97      0.97     44730
weighted avg       0.97      0.97      0.97     44730



In [15]:
model = LogisticRegression(C=3)
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
dump(model, './models/LR.joblib')
prediction = model.predict(X_test)
print(classification_report(y_test,prediction))

0.9069081153588195
              precision    recall  f1-score   support

           0       0.91      0.90      0.91     22845
           1       0.90      0.91      0.91     21885

    accuracy                           0.91     44730
   macro avg       0.91      0.91      0.91     44730
weighted avg       0.91      0.91      0.91     44730



In [16]:
model = XGBClassifier(booster="gbtree", min_split_loss=7.100747859845302e-07, alpha=0.0010277375135306342, 
                        max_depth=9, eta=0.6737504946980999, gamma=6.101266632438708e-07, grow_policy="lossguide")
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
dump(model, './models/XGB.joblib')
prediction = model.predict(X_test)
print(classification_report(y_test,prediction))



0.9774647887323944
              precision    recall  f1-score   support

           0       0.99      0.97      0.98     22845
           1       0.97      0.99      0.98     21885

    accuracy                           0.98     44730
   macro avg       0.98      0.98      0.98     44730
weighted avg       0.98      0.98      0.98     44730



In [17]:
model = LinearSVC(C = 2, verbose=False)
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
dump(model, './models/SVM.joblib')
prediction = model.predict(X_test)
print(classification_report(y_test,prediction))

0.864989939637827
              precision    recall  f1-score   support

           0       1.00      0.74      0.85     22845
           1       0.78      1.00      0.88     21885

    accuracy                           0.86     44730
   macro avg       0.89      0.87      0.86     44730
weighted avg       0.89      0.86      0.86     44730





In [18]:
model = KNeighborsClassifier()
model.fit(X_train, y_train)
print(model.score(X_test, y_test))
dump(model, './models/KNN.joblib')
prediction = model.predict(X_test)
print(prediction)
print(classification_report(y_test,prediction))

0.9751397272524033
[0 0 1 ... 1 0 0]
              precision    recall  f1-score   support

           0       0.98      0.97      0.98     22845
           1       0.97      0.98      0.97     21885

    accuracy                           0.98     44730
   macro avg       0.98      0.98      0.98     44730
weighted avg       0.98      0.98      0.98     44730



In [20]:
batch_size = 32
X_train = np.array(X_train)
y_train = np.array(y_train)

earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='min')
mcp_save = ModelCheckpoint('./models/DNN4.h5', save_weights_only=False, save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1, epsilon=1e-4, mode='min')

dnn4 = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=(X_train.shape[1],)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(10, activation = 'relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.01),
    tf.keras.layers.Dense(8, activation = 'relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.01),
    tf.keras.layers.Dense(4, activation = 'relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.01),
    tf.keras.layers.Dense(2, activation='softmax')
])

dnn4.compile(optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']);

dnn4.fit(X_train, y_train, batch_size=batch_size, epochs=10, verbose=1, callbacks=[earlyStopping, mcp_save, reduce_lr_loss], validation_split=0.25)
model = tf.keras.models.load_model('./models/DNN4.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 9/10
Epoch 10/10


In [21]:
model.save("./models/DNN4.h5")
model = tf.keras.models.load_model('./models/DNN4.h5')

In [22]:
preds = model.predict(X_test)
pred_labels = np.rint(preds)
accuracy = sklearn.metrics.accuracy_score(y_test, pred_labels[0:,1])
f1 = sklearn.metrics.f1_score(y_test, pred_labels[0:,1], average="binary")
prec = sklearn.metrics.precision_score(y_test, pred_labels[0:,1], average="binary")
recall = sklearn.metrics.recall_score(y_test, pred_labels[0:,1], average="binary")
print("accuracy: " + str(accuracy))
print("f1: " + str(f1))
print("prec: " + str(prec))
print("recall: " + str(recall))

accuracy: 0.920947909680304
f1: 0.9184539458512062
prec: 0.9271779112538995
recall: 0.9098926205163353


In [23]:
from numpy import newaxis
y_train_ex = y_train[:, newaxis]
X_train_ex = X_train[:, :, newaxis]
y_test_ex = y_test[:, newaxis]
X_test_ex = X_test[:, :, newaxis]

In [25]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, Embedding, GlobalAveragePooling1D
epochs = 100
batch_size = 256

model = Sequential()
model.add(Conv1D(filters=100, kernel_size=5, padding='same',activation='relu',strides=1, input_shape=(X_train_ex.shape[1],1))) 
model.add(Dropout(0.2))
model.add(Conv1D(filters=200, kernel_size=5, padding='same', activation='relu',strides=1))
model.add(Dropout(0.2))
model.add(Conv1D(filters=400, kernel_size=10, padding='same', activation='relu',strides=1))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer= 'adam', metrics=['accuracy'])
model.summary()

model.fit(X_train_ex, y_train, epochs=epochs, batch_size=batch_size,) 

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_3 (Conv1D)            (None, 10, 100)           600       
_________________________________________________________________
dropout_9 (Dropout)          (None, 10, 100)           0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 10, 200)           100200    
_________________________________________________________________
dropout_10 (Dropout)         (None, 10, 200)           0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 10, 400)           800400    
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 5, 400)            0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 2000)             

<tensorflow.python.keras.callbacks.History at 0x21e859ddc70>

In [26]:
#model.save("./models/CNN.h5")
model = tf.keras.models.load_model("./models/CNN.h5")

In [27]:
preds = model.predict(X_test_ex)
pred_labels = np.rint(preds)
accuracy = sklearn.metrics.accuracy_score(y_test_ex, pred_labels)
f1 = sklearn.metrics.f1_score(y_test_ex, pred_labels, average="binary")
prec = sklearn.metrics.precision_score(y_test_ex, pred_labels, average="binary")
recall = sklearn.metrics.recall_score(y_test_ex, pred_labels, average="binary")
print("accuracy: " + str(accuracy))
print("f1: " + str(f1))
print("prec: " + str(prec))
print("recall: " + str(recall))

accuracy: 0.9307623518891125
f1: 0.927847540945414
prec: 0.9465253351079
recall: 0.9098926205163353


In [28]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

In [31]:
regressor = Sequential()
regressor.add(LSTM(units = 50, return_sequences = True, input_shape = (X_train_ex.shape[1], X_train_ex.shape[2])))
regressor.add(Dropout(0.2))

# Adding a second LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

# Adding a third LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50, return_sequences = True))
regressor.add(Dropout(0.2))

# Adding a fourth LSTM layer and some Dropout regularisation
regressor.add(LSTM(units = 50))
regressor.add(Dropout(0.2))
regressor.add(Dense(1, activation='sigmoid'))
regressor.compile(loss='binary_crossentropy', optimizer= 'adam', metrics=['accuracy'])
regressor.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 10, 50)            10400     
_________________________________________________________________
dropout_12 (Dropout)         (None, 10, 50)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 10, 50)            20200     
_________________________________________________________________
dropout_13 (Dropout)         (None, 10, 50)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 10, 50)            20200     
_________________________________________________________________
dropout_14 (Dropout)         (None, 10, 50)            0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 50)               

In [32]:
# 進行訓練
regressor.fit(X_train_ex, y_train_ex, epochs = 20, batch_size = 32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x221ad599d90>

In [33]:
regressor.save("./models/LSTM.h5")
model = tf.keras.models.load_model("./models/LSTM.h5")

In [35]:
preds = model.predict(X_test_ex)
pred_labels = np.rint(preds)
accuracy = sklearn.metrics.accuracy_score(y_test_ex, pred_labels)
f1 = sklearn.metrics.f1_score(y_test_ex, pred_labels, average="binary")
prec = sklearn.metrics.precision_score(y_test_ex, pred_labels, average="binary")
recall = sklearn.metrics.recall_score(y_test_ex, pred_labels, average="binary")
print("accuracy: " + str(accuracy))
print("f1: " + str(f1))
print("prec: " + str(prec))
print("recall: " + str(recall))

accuracy: 0.9685222445785826
f1: 0.9678509452918074
prec: 0.9672767103281457
recall: 0.9684258624628741
