In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import load_model

In [2]:
df=pd.read_csv('/content/SDN_DDoS_.csv')

In [5]:
# Splitting dataset into features and label
X= df.drop('Label', axis =1)
y = df['Label']

# Splitting the dataset into the training set and the test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [6]:
# scale data
t = MinMaxScaler()
t.fit(X_train)
X_train = t.transform(X_train)
X_test = t.transform(X_test)

In [7]:
# AutoEncoder Model Preparation
n_inputs = X.shape[1]
# define encoder
input_data_shape= Input(shape=(n_inputs,))
# encoder level 1
encoder= Dense(n_inputs*2)(input_data_shape)
encoder = BatchNormalization()(encoder)
encoder= LeakyReLU()(encoder)
# encoder level 2
encoder= Dense(n_inputs)(encoder)
encoder= BatchNormalization()(encoder)
encoder= LeakyReLU()(encoder)
# bottleneck
n_bottleneck = round(float(n_inputs) / 2.0)
bottleneck = Dense(n_bottleneck)(encoder)
# define decoder, level 1
decoder = Dense(n_inputs)(bottleneck)
decoder = BatchNormalization()(decoder)
decoder = LeakyReLU()(decoder)
# decoder level 2
decoder = Dense(n_inputs*2)(decoder)
decoder = BatchNormalization()(decoder)
decoder = LeakyReLU()(decoder)

In [8]:
# output layer
output = Dense(n_inputs, activation='linear')(decoder)
# define autoencoder model
model = Model(inputs=input_data_shape, outputs=output)
# compile autoencoder model
model.compile(optimizer='adam', loss='mse')

In [9]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 66)]              0         
                                                                 
 dense (Dense)               (None, 132)               8844      
                                                                 
 batch_normalization (BatchN  (None, 132)              528       
 ormalization)                                                   
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 132)               0         
                                                                 
 dense_1 (Dense)             (None, 66)                8778      
                                                                 
 batch_normalization_1 (Batc  (None, 66)               264       
 hNormalization)                                             

In [10]:
# fit the autoencoder model to reconstruct input
history = model.fit(X_train, y_train, epochs=100, batch_size=16, verbose=2, validation_data=(X_test,y_test))

Epoch 1/100
3802/3802 - 12s - loss: 0.0104 - val_loss: 0.0063 - 12s/epoch - 3ms/step
Epoch 2/100
3802/3802 - 10s - loss: 0.0020 - val_loss: 0.0035 - 10s/epoch - 3ms/step
Epoch 3/100
3802/3802 - 11s - loss: 0.0015 - val_loss: 0.0026 - 11s/epoch - 3ms/step
Epoch 4/100
3802/3802 - 10s - loss: 0.0012 - val_loss: 8.5620e-04 - 10s/epoch - 3ms/step
Epoch 5/100
3802/3802 - 10s - loss: 0.0011 - val_loss: 0.0030 - 10s/epoch - 3ms/step
Epoch 6/100
3802/3802 - 10s - loss: 0.0010 - val_loss: 7.4517e-04 - 10s/epoch - 3ms/step
Epoch 7/100
3802/3802 - 10s - loss: 9.2583e-04 - val_loss: 0.0033 - 10s/epoch - 3ms/step
Epoch 8/100
3802/3802 - 11s - loss: 9.1668e-04 - val_loss: 0.0023 - 11s/epoch - 3ms/step
Epoch 9/100
3802/3802 - 11s - loss: 7.5791e-04 - val_loss: 0.0028 - 11s/epoch - 3ms/step
Epoch 10/100
3802/3802 - 11s - loss: 8.1166e-04 - val_loss: 0.0138 - 11s/epoch - 3ms/step
Epoch 11/100
3802/3802 - 11s - loss: 8.4282e-04 - val_loss: 0.0063 - 11s/epoch - 3ms/step
Epoch 12/100
3802/3802 - 10s - loss

In [11]:
# define an encoder model (without the decoder)
encoder = Model(inputs=input_data_shape, outputs=bottleneck)
# save the encoder to file
encoder.save('encoder.h5')



In [16]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
#Compressing the input data using Encoder Model and fitting it on the Logistic Regression model.
# load the model from file
encoder = load_model('encoder.h5')

# encode the train data
X_train_encode = encoder.predict(X_train)
# encode the test data
X_test_encode = encoder.predict(X_test)
# define the model
model = XGBClassifier(max_iter=100)
# fit the model on the training set
model.fit(X_train_encode, y_train)
# make predictions on the test set
yhat = model.predict(X_test_encode)
# calculate classification accuracy
acc = accuracy_score(y_test, yhat)
print(acc)

0.9999342364855978
