In [None]:
# Load the Drive helper and mount
from google.colab import drive

# This will prompt for authorization.
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten, Dropout, Conv1D, MaxPooling1D, GlobalAveragePooling1D, LSTM, Activation, GRU, SimpleRNN
from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import BatchNormalization
import keras.callbacks as kcallbacks
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report


#20 normal + 20 anomalous = test / con lai la train
#fwap + http = train

# Path

## Normal

In [None]:
DATASET_TRAINING = '/content/drive/MyDrive/UIT/HocKy8/KLTN/detect_attack_by_reinforcement_learning/data/matrix3/min-max/train_minmax.csv'
# DATASET_TESTING='/content/drive/MyDrive/UIT/HocKy8/KLTN/detect_attack_by_reinforcement_learning/data/matrix3/min-max/test_csic2010-minmax.csv'
# DATASET_TESTING='/content/drive/MyDrive/UIT/HocKy8/KLTN/detect_attack_by_reinforcement_learning/data/matrix3/min-max/test_fwaf-minmax.csv'
DATASET_TESTING='/content/drive/MyDrive/UIT/HocKy8/KLTN/detect_attack_by_reinforcement_learning/data/matrix3/min-max/test_http_param-minmax.csv'

## minmax

In [None]:
DATASET_TRAINING = '/content/drive/MyDrive/UIT/HocKy8/KLTN/detect_attack_by_reinforcement_learning/data/matrix3/min-max/train_minmax.csv'
# DATASET_TESTING='/content/drive/MyDrive/UIT/HocKy8/KLTN/detect_attack_by_reinforcement_learning/data/matrix3/min-max/test_csic2010-minmax.csv'
# DATASET_TESTING='/content/drive/MyDrive/UIT/HocKy8/KLTN/detect_attack_by_reinforcement_learning/data/matrix3/min-max/test_fwaf-minmax.csv'
DATASET_TESTING='/content/drive/MyDrive/UIT/HocKy8/KLTN/detect_attack_by_reinforcement_learning/data/matrix3/min-max/test_http_param-minmax.csv'

## binary

In [None]:
DATASET_TRAINING = '/content/drive/MyDrive/UIT/HocKy8/KLTN/detect_attack_by_reinforcement_learning/data/matrix3/min-max/train_minmax.csv'
# DATASET_TESTING='/content/drive/MyDrive/UIT/HocKy8/KLTN/detect_attack_by_reinforcement_learning/data/matrix3/min-max/test_csic2010-minmax.csv'
# DATASET_TESTING='/content/drive/MyDrive/UIT/HocKy8/KLTN/detect_attack_by_reinforcement_learning/data/matrix3/min-max/test_fwaf-minmax.csv'
DATASET_TESTING='/content/drive/MyDrive/UIT/HocKy8/KLTN/detect_attack_by_reinforcement_learning/data/matrix3/min-max/test_http_param-minmax.csv'

# load data

In [None]:
### Load data
df_train = pd.read_csv(DATASET_TRAINING)
# display(df_train)
# df_test = pd.read_csv(DATASET_TESTING, header=None, index_col=False)
df_test = pd.read_csv(DATASET_TESTING)

df_train = df_train.astype(float)
df_test = df_test.astype(float)

df_train = df_train.sample(frac = 1)
df_test = df_test.sample(frac = 1)

x_train, y_train = df_train.iloc[:, :-1], df_train.iloc[:, [-1]]
x_test, y_test = df_test.iloc[:, :-1], df_test.iloc[:, [-1]]

x_train = x_train/255.0
x_test = x_test/255.0

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1)

y_train = y_train.astype(int)
y_val = y_val.astype(int)
y_test = y_test.astype(int)

y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
y_test = to_categorical(y_test)

input_shape = x_train.shape[1]
output_shape = y_train.shape[1]

In [None]:
print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)
print(x_test.shape)
print(y_test.shape)
print(input_shape, output_shape)

In [None]:
checkpoint_filepath = '/content/drive/MyDrive/UIT/HocKy8/KLTN/checkpoints/weights.{epoch:02d}-{val_loss:.2f}.h5'
model_checkpoint_callback = kcallbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

In [None]:
METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'),
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
      tf.keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
]

# CNN


## CNN 1

In [None]:
# # ### CNN 1
# model = Sequential()
# model.add(Conv1D(64, 3, padding="same",activation="relu",input_shape=(input_shape, 1)))
# model.add(Conv1D(64, 3, padding="same", activation="relu"))
# model.add(MaxPooling1D(pool_size=(2)))
# model.add(Flatten())
# model.add(Dense(128, activation="relu"))
# model.add(Dropout(0.5))
# model.add(Dense(output_shape, activation="softmax"))

# # define optimizer and objective, compile cnn

# model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=['accuracy'])

# # train
# early_stop = kcallbacks.EarlyStopping(monitor='val_accuracy', patience=2, verbose=1, mode='auto')
# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=100, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)

# # # loss: 0.2412 - accuracy: 0.8811

## CNN 2

In [None]:
### CNN 2
model = Sequential()

model.add(Conv1D(64, 3, activation="relu", padding='same', input_shape=(input_shape, 1)))
model.add(MaxPooling1D(pool_size=(2)))

model.add(Flatten())
model.add(Dense(128, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(output_shape, activation="softmax"))

# define optimizer and objective, compile cnn
model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=METRICS)

early_stop = kcallbacks.EarlyStopping(monitor='val_accuracy', patience=5, verbose=1, mode='auto')
#fit model
history=model.fit(x_train, y_train, epochs=20, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)

# # loss: 0.2433 - accuracy: 0.8846 - 128
# # loss: 0.2384 - accuracy: 0.8884 - 64
# # loss: 0.2347 - accuracy: 0.8934 - 32

In [None]:
print("Evaluate on test data")
results = model.evaluate(x_test, y_test, batch_size=32)

for name, value in zip(model.metrics_names, results):
    print(name, ': ', value)
print()
print("test loss, test acc:", results)
# tính precision, recall. F1_score

## CNN 3

In [None]:
# ### CNN 3
# model = Sequential()
# model.add(Conv1D(64, 3, padding="same",activation="relu",input_shape=(input_shape, 1)))
# model.add(Conv1D(64, 3, padding="same", activation="relu"))
# model.add(MaxPooling1D(pool_size=(2)))
# model.add(Conv1D(128, 3, padding="same", activation="relu"))
# model.add(Conv1D(128, 3, padding="same", activation="relu"))
# model.add(MaxPooling1D(pool_size=(2)))
# model.add(Flatten())
# model.add(Dense(128, activation="relu"))
# model.add(Dropout(0.5))
# model.add(Dense(output_shape, activation="softmax"))

# # define optimizer and objective, compile cnn
# model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=['accuracy'])

# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=100, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)

# # loss: 0.2384 - accuracy: 0.8908

## CNN5

In [None]:
# ### CNN5
# model = Sequential()
# model.add(Conv1D(16, 3, padding="same",activation="relu",input_shape=(input_shape, 1)))
# model.add(Conv1D(64, 3, padding="same", activation="relu"))
# model.add(MaxPooling1D(pool_size=(2)))
# model.add(Conv1D(128, 3, padding="same", activation="relu"))
# model.add(MaxPooling1D(pool_size=(2)))
# model.add(Flatten())
# model.add(Dense(128, activation="relu"))
# model.add(Dropout(0.5))
# model.add(Dense(output_shape, activation="softmax"))

# # define optimizer and objective, compile cnn
# model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=['accuracy'])

# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=100, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)

## CNN6

In [None]:
# ### CNN6
# model = Sequential()
# model.add(Conv1D(16, 3, padding="same",activation="relu",input_shape=(input_shape, 1)))
# model.add(Conv1D(64, 3, padding="same", activation="relu"))
# model.add(MaxPooling1D(pool_size=(2)))
# model.add(Conv1D(128, 3, padding="same", activation="relu"))
# model.add(MaxPooling1D(pool_size=(2)))
# model.add(Conv1D(256, 3, padding="same", activation="relu"))
# model.add(MaxPooling1D(pool_size=(2)))
# model.add(Conv1D(256, 3, padding="same", activation="relu"))
# model.add(MaxPooling1D(pool_size=(2)))
# model.add(Flatten())
# model.add(Dense(256, activation="relu"))
# model.add(Dropout(0.5))
# model.add(Dense(output_shape, activation="softmax"))

# # define optimizer and objective, compile cnn
# model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=['accuracy'])

# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=100, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)

## CNN + LSTM

In [None]:
# ### LSTM
# model = Sequential()
# model.add(LSTM(2, input_shape=(input_shape, 1), return_sequences=True, activation='sigmoid'))
# model.add(LSTM(units=120, recurrent_activation='hard_sigmoid', activation='sigmoid'))
# model.add(Dense(activation='sigmoid', units=output_shape))
# adam=Adam(learning_rate=0.01)

# model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])

# early_stop = kcallbacks.EarlyStopping(monitor='val_accuracy', patience=2, verbose=1, mode='auto')
# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=100, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)

# # loss: 0.4802 - accuracy: 0.7314

# LSTM

## LSTM 2

In [None]:
## LSTM 2
model = Sequential()
model.add(LSTM(4,input_shape=(input_shape, 1)))
model.add(Dropout(0.1))
model.add(Dense(output_shape))
model.add(Activation('softmax'))

# try using different optimizers and different optimizer configs
model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=METRICS)

history=model.fit(x_train, y_train, epochs=100, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)

# loss: 0.4497 - accuracy: 0.7483

In [None]:
print("Evaluate on test data")
results = model.evaluate(x_test, y_test, batch_size=32)

for name, value in zip(model.metrics_names, results):
    print(name, ': ', value)
print()
print("test loss, test acc:", results)
# tính precision, recall. F1_score

## LSTM 3

In [None]:
# ### LSTM 3
# model = Sequential()
# model.add(LSTM(8,input_dim=input_shape, return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(LSTM(8,input_dim=input_shape, return_sequences=False))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(Dense(output_shape))
# model.add(Activation('softmax'))

# # try using different optimizers and different optimizer configs
# model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=100, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)

## LSTM 4

In [None]:
# ### LSTM 4
# model = Sequential()
# model.add(LSTM(16,input_dim=input_shape, return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(LSTM(16, return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(LSTM(16, return_sequences=False))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(Dense(output_shape))
# model.add(Activation('softmax'))

# # try using different optimizers and different optimizer configs
# model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=100, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)

## LSTM 5

In [None]:
# ### LSTM 5
# model = Sequential()
# model.add(LSTM(64,input_dim=input_shape, return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(LSTM(64,return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(LSTM(64, return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(LSTM(64, return_sequences=False))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(Dense(output_shape))
# model.add(Activation('softmax'))

# # try using different optimizers and different optimizer configs
# model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=100, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)

# GRU

## GRU 1

In [None]:
# ### GRU 1
# model = Sequential()
# model.add(GRU(4,input_shape=(input_shape, 1)))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(Dense(output_shape))
# model.add(Activation('softmax'))

# # try using different optimizers and different optimizer configs
# model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=100, batch_size=32, callbacks=[model_checkpoint_callback], shuffle=True)

# # loss: 0.6380 - accuracy: 0.6299

## GRU 2

In [None]:
### GRU 2
model = Sequential()
model.add(GRU(8,input_shape=(input_shape, 1), return_sequences=True))  # try using a GRU instead, for fun
model.add(Dropout(0.1))
model.add(GRU(8,input_shape=(input_shape, 1), return_sequences=False))  # try using a GRU instead, for fun
model.add(Dropout(0.1))
model.add(Dense(output_shape))
model.add(Activation('softmax'))

# try using different optimizers and different optimizer configs
model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=METRICS)

history=model.fit(x_train, y_train, epochs=30, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)
# loss: 0.4454 - accuracy: 0.7774

In [None]:
print("Evaluate on test data")
results = model.evaluate(x_test, y_test, batch_size=32)

for name, value in zip(model.metrics_names, results):
    print(name, ': ', value)
print()
print("test loss, test acc:", results)
# tính precision, recall. F1_score

## GRU 3


In [None]:
# ### GRU 3
# model = Sequential()
# model.add(GRU(16,input_shape=(input_shape, 1), return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(GRU(16, return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(GRU(16, return_sequences=False))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(Dense(output_shape))
# model.add(Activation('softmax'))

# # try using different optimizers and different optimizer configs
# model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=30, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)
# # loss: 0.3558 - accuracy: 0.8156

## GRU 4

In [None]:
# ### GRU 4
# model = Sequential()
# model.add(GRU(64,input_shape=(input_shape, 1), return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(GRU(64,return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(GRU(64, return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(GRU(64, return_sequences=False))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(Dense(output_shape))
# model.add(Activation('softmax'))

# # try using different optimizers and different optimizer configs
# model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=30, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)
# # loss: 0.2769 - accuracy: 0.8616

# SimpleRNN

## SimpleRNN 1

In [None]:
# ### SimpleRNN 1
# model = Sequential()
# model.add(SimpleRNN(4,input_shape=(input_shape, 1)))
# model.add(Dropout(0.1))
# model.add(Dense(output_shape))
# model.add(Activation('softmax'))

# # try using different optimizers and different optimizer configs
# model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=100, batch_size=32, callbacks=[model_checkpoint_callback], shuffle=True)

# # loss: 0.5145 - accuracy: 0.7150

## SimpleRNN 2

In [None]:
### SimpleRNN 2
model = Sequential()
model.add(SimpleRNN(8,input_shape=(input_shape, 1), return_sequences=True))  # try using a GRU instead, for fun
model.add(Dropout(0.1))
model.add(SimpleRNN(8,input_shape=(input_shape, 1), return_sequences=False))  # try using a GRU instead, for fun
model.add(Dropout(0.1))
model.add(Dense(output_shape))
model.add(Activation('softmax'))

# try using different optimizers and different optimizer configs
model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=METRICS)

history=model.fit(x_train, y_train,  epochs=30, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)
# loss: 0.4497 - accuracy: 0.7519

In [None]:
print("Evaluate on test data")
results = model.evaluate(x_test, y_test, batch_size=32)

for name, value in zip(model.metrics_names, results):
    print(name, ': ', value)
print()
print("test loss, test acc:", results)
# tính precision, recall. F1_score

## SimpleRNN 3

In [None]:
# ### SimpleRNN 3
# model = Sequential()
# model.add(SimpleRNN(16,input_shape=(input_shape, 1), return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(SimpleRNN(16, return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(SimpleRNN(16, return_sequences=False))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(Dense(output_shape))
# model.add(Activation('softmax'))

# # try using different optimizers and different optimizer configs
# model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=30, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)
# #loss: 0.3500 - accuracy: 0.8229

## Simple RNN 4

In [None]:
# ### Simple RNN 4
# model = Sequential()
# model.add(SimpleRNN(64,input_shape=(input_shape, 1), return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(SimpleRNN(64,return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(SimpleRNN(64, return_sequences=True))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(SimpleRNN(64, return_sequences=False))  # try using a GRU instead, for fun
# model.add(Dropout(0.1))
# model.add(Dense(output_shape))
# model.add(Activation('softmax'))

# # try using different optimizers and different optimizer configs
# model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

# history=model.fit(x_train, y_train, validation_data=(x_val,y_val), epochs=30, batch_size=64, callbacks=[model_checkpoint_callback], shuffle=True)
# # loss: 0.6376 - accuracy: 0.6803

# DNN

In [None]:
### DNN
model = Sequential()
model.add(Dense(1024,input_dim=input_shape,activation='relu'))  
model.add(Dropout(0.01))
model.add(Dense(output_shape))
model.add(Activation('softmax'))

# try using different optimizers and different optimizer configs
model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=METRICS)

history=model.fit(x_train, y_train, epochs=100, batch_size=32, callbacks=[model_checkpoint_callback], shuffle=True)

# loss: 0.2518 - accuracy: 0.8580

# Evaluate on test data

In [None]:
print("Evaluate on test data")
results = model.evaluate(x_test, y_test, batch_size=32)

for name, value in zip(model.metrics_names, results):
    print(name, ': ', value)
print()
print("test loss, test acc:", results)
# tính precision, recall. F1_score

1.Evaluate on test data -  csic - cnn1

loss: 0.3377 - accuracy: 0.8764
test loss, test acc: [0.33771705627441406, 0.876423716545105]

2.Evaluate on test data - test_fwaf - CNN1

loss: 0.0053 - accuracy: 0.9984
test loss, test acc: [0.005339534021914005, 0.9983782768249512]

3.Evaluate on test data - http - CNN1

loss: 0.0580 - accuracy: 0.9837
test loss, test acc: [0.058005861937999725, 0.9837455749511719]

## CNN2

1. Evaluate on test data normal - csic210- 200 epoch - CNN2

loss: 0.3722 - accuracy: 0.8588
test loss, test acc: [0.3722080886363983, 0.858769953250885]

2. Evaluate on test data - test_fwaf- 100 epoch - CNN2

loss: 0.0073 - accuracy: 0.9974
test loss, test acc: [0.007320699747651815, 0.9973954558372498]

3. Evaluate on test data - test_fwaf http- 100 epoch - CNN2

loss: 0.0708 - accuracy: 0.9797
test loss, test acc: [0.07081031054258347, 0.9796819686889648]

