In [None]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib

In [None]:
!pip install scikeras



In [None]:
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras import optimizers
from tensorflow.keras import models
from tensorflow.keras import layers
from scikeras import wrappers
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
from tensorflow.keras import losses
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV
from scikeras.wrappers import KerasClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error
import time
from pickle import load

In [None]:
attack_types = {
        'normal': 'normal',

        'back': 'DoS',
        'land': 'DoS',
        'neptune': 'DoS',
        'pod': 'DoS',
        'smurf': 'DoS',
        'teardrop': 'DoS',
        'mailbomb': 'DoS',
        'apache2': 'DoS',
        'processtable': 'DoS',
        'udpstorm': 'DoS',

        'ipsweep': 'Probe',
        'nmap': 'Probe',
        'portsweep': 'Probe',
        'satan': 'Probe',
        'mscan': 'Probe',
        'saint': 'Probe',

        'ftp_write': 'R2L',
        'guess_passwd': 'R2L',
        'imap': 'R2L',
        'multihop': 'R2L',
        'phf': 'R2L',
        'spy': 'R2L',
        'warezclient': 'R2L',
        'warezmaster': 'R2L',
        'sendmail': 'R2L',
        'named': 'R2L',
        'snmpgetattack': 'R2L',
        'snmpguess': 'R2L',
        'xlock': 'R2L',
        'xsnoop': 'R2L',
        'worm': 'R2L',

        'buffer_overflow': 'U2R',
        'loadmodule': 'U2R',
        'perl': 'U2R',
        'rootkit': 'U2R',
        'httptunnel': 'U2R',
        'ps': 'U2R',
        'sqlattack': 'U2R',
        'xterm': 'U2R'
}


is_attack = {
        "DoS":"attack",
        "R2L":"attack",
        "U2R":"attack",
        "Probe":"attack",
        "normal":"normal"
}


In [None]:
class read_data:
    col_names = ["duration","protocol_type","service","flag","src_bytes",
        "dst_bytes","land","wrong_fragment","urgent","hot","num_failed_logins",
        "logged_in","num_compromised","root_shell","su_attempted","num_root",
        "num_file_creations","num_shells","num_access_files","num_outbound_cmds",
        "is_host_login","is_guest_login","count","srv_count","serror_rate",
        "srv_serror_rate","rerror_rate","srv_rerror_rate","same_srv_rate",
        "diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count",
        "dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate",
        "dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate",
        "dst_host_rerror_rate","dst_host_srv_rerror_rate","label", "difficulty_level"]

    KDDTrain = pd.read_csv("KDDTrain+.txt",names = col_names,)
    KDDTest = pd.read_csv("KDDTest+.txt",names = col_names,)


    KDDAll = pd.concat([KDDTrain, KDDTest])

    kdd_diff_level_all = KDDAll["difficulty_level"].copy()
    kdd_diff_level_train = KDDTrain["difficulty_level"].copy()
    kdd_diff_level_test = KDDTest["difficulty_level"].copy()

    KDDAll = KDDAll.drop("difficulty_level", axis = 1)

    KDDTrain.to_csv("KDDAll+.csv")

    KDDTrain_len = KDDTrain.shape[0]
    KDDTest_len = KDDTest.shape[0]

    KDDAll["type"] = KDDAll.label.map(lambda x: attack_types[x])
    KDDAll["isa"] = KDDAll.type.map(lambda x: is_attack[x])

    KDDTrain["type"] = KDDTrain.label.map(lambda x: attack_types[x])
    KDDTrain["isa"] = KDDTrain.type.map(lambda x: is_attack[x])

    KDDTest["type"] = KDDTest.label.map(lambda x: attack_types[x])
    KDDTest["isa"] = KDDTest.type.map(lambda x: is_attack[x])

    kdd_attack_type_group = KDDAll.groupby("type")
    kdd_is_attack_group = KDDAll.groupby("isa")

    kdd_attack_type_group.type.count()
    kdd_is_attack_group["isa"].count()

    KDDAll_is = KDDAll.copy()
    KDDAll_type = KDDAll.copy()

    KDDAll_is_y = KDDAll["isa"].copy()
    KDDAll_is.drop(["label", "isa"], axis=1, inplace=True)
    KDDAll_type_y = KDDAll["type"].copy()
    KDDAll_type.drop(["label", "type"], axis=1, inplace=True)

    KDDTrain_is_y = KDDTrain["isa"].copy()
    KDDTrain_type_y = KDDTrain["type"].copy()

    KDDTest_is_y = KDDTest["isa"].copy()
    KDDTest_type_y = KDDTest["type"].copy()

    class_mapping = {'attack': 0, 'normal': 1}

    Y_Train = KDDTrain_is_y.map(class_mapping)
    Y_Test = KDDTest_is_y.map(class_mapping)


In [None]:
class preprocess_data:

    col_names_onehot = ["protocol_type","service","flag","type"]
    col_names_onehot_s = ["protocol_type","service","flag","type"]
    KDDAll_num = read_data.KDDAll_is.drop(col_names_onehot, axis=1)  #pd
    KDDAll_onehot_s = read_data.KDDAll_is[ col_names_onehot_s]  #pd

    num_pipeline = Pipeline([('scaling', StandardScaler())])
    cat_string_pipeline = Pipeline([('imputer', SimpleImputer(strategy = "constant", fill_value = "missing")), ('ordi', OrdinalEncoder()), ('onehots', OneHotEncoder(categories='auto'))])

    num_attribs = list(KDDAll_num)
    cat_s_attribs = list(KDDAll_onehot_s)

    full_pipeline = ColumnTransformer([("num", num_pipeline, num_attribs), ("cats", cat_string_pipeline, cat_s_attribs)])

    KDDAll_t = full_pipeline.fit_transform(read_data.KDDAll_is)

    X_Train_i = KDDAll_t[:read_data.KDDTrain_len]
    X_Test_i = KDDAll_t[read_data.KDDTrain_len:read_data.KDDTrain_len + read_data.KDDTest_len]

    X_Train = np.expand_dims(X_Train_i, axis=1)
    X_Test = np.expand_dims(X_Test_i, axis=1)
    Y_Train = read_data.Y_Train
    Y_Test = read_data.Y_Test


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras import optimizers
from tensorflow.keras import models
from tensorflow.keras import layers
from scikeras import wrappers
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
from tensorflow.keras import losses
from scipy.stats import reciprocal
from sklearn.model_selection import RandomizedSearchCV
from scikeras.wrappers import KerasClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import mean_squared_error
import time


In [None]:
input_dim = preprocess_data.X_Train.shape[2]
print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
print(input_dim)

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
127


In [None]:
classes = 2
loop_back =1
hidden_encoder_dim = input_dim
hidden_decoder_dim = input_dim

In [None]:
def build_model(lr, hidden_layers, initiali):
    model = models.Sequential([
        layers.LSTM(hidden_layers, input_shape=(loop_back, input_dim)),
        layers.Dense(1)])
    adamopt = optimizers.Adam(learning_rate=lr, beta_1=0.9, beta_2=0.999, amsgrad=False)
    model.compile(loss="binary_crossentropy", optimizer=adamopt)
    return model



In [None]:
keras_reg = KerasClassifier(build_model)

param_distribs = {"lr": reciprocal(0.0001, 0.0005), "units": [1, 2, 4]}

rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, cv=5, scoring='accuracy', n_jobs=-1, error_score='raise')


In [None]:
X_Train = preprocess_data.X_Train
Y_Train = read_data.Y_Train

X_Test = preprocess_data.X_Test
Y_Test = read_data.Y_Test


In [None]:
batch_s = 2000
epoches = 250
ver = 2

In [None]:
rnd_search_cv.fit(X_Train, Y_Train, batch_size=batch_s, epochs=epoches, verbose=ver)

In [None]:
print(X_Train)

[[[-0.11248106 -0.00734564 -0.00461423 ...  0.          0.
    1.        ]]

 [[-0.11248106 -0.00740942 -0.00461423 ...  0.          0.
    1.        ]]

 [[-0.11248106 -0.00743641 -0.00461423 ...  0.          0.
    0.        ]]

 ...

 [[-0.11248106 -0.00702399 -0.00451054 ...  0.          0.
    1.        ]]

 [[-0.11248106 -0.00743641 -0.00461423 ...  0.          0.
    0.        ]]

 [[-0.11248106 -0.0074085  -0.00461423 ...  0.          0.
    1.        ]]]


In [None]:

def build_model(units=1, initiali='uniform', loop_back=10, input_dim=5):
    model = models.Sequential()

    model.add(layers.LSTM(units, input_shape=(loop_back, input_dim)))
    model.add(layers.Dense(1))

    adamopt = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
    model.compile(loss="binary_crossentropy", optimizer=adamopt)
    return model

keras_reg = KerasClassifier(build_fn=build_model, units=1)

param_distribs = {"units": [1, 2, 4]}

rnd_search_cv = RandomizedSearchCV(keras_reg, param_distribs, cv=5, scoring='accuracy', n_jobs=-1, error_score='raise')

X_Train = preprocess_data.X_Train
Y_Train = read_data.Y_Train

X_Test = preprocess_data.X_Test
Y_Test = read_data.Y_Test

batch_s = 2000
epoches = 250
ver = 2
rnd_search_cv.fit(X_Train, Y_Train, batch_size=batch_s, epochs=epoches, verbose=ver)




ValueError: ignored

# NAD on EPC: Model is already pre-trained

Model : RNN LSTM Network with 4 hidden layers, Adam optimizer and binary-crossentropy loss function -
Epochs: 250 -
Batch size : 2000 connections per step

## Start Prediction

In [None]:
pred_test = rnd_search_cv.predict(X_Test)

Classification report ready

In [None]:
print("Classclassification_report:               \n", classification_report(Y_Test, pred_test))

In [None]:
print("Best estimator: \n", rnd_search_cv.best_estimator_)
print("Best score: \n", rnd_search_cv.best_score_)
print("Best params: \n", rnd_search_cv.best_params_)
print("Refit time: \n", rnd_search_cv.refit_time_)

Plot Confusion Matrix Graph

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_Test, pred_test)
import itertools
classes = ['attack','normal']
plt.figure()
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion matrix')
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
print(cm)
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, cm[i, j].round(4),
             horizontalalignment="center",
             color="white" if cm[i, j] > thresh else "black")

plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()