In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.models import load_model

In [2]:
DDoS=pd.read_csv('/content/dataset_sdn.csv')

In [3]:
DDoS.Protocol.unique()
DDoS['Protocol'] = DDoS['Protocol'].replace('TCP', '0')
DDoS['Protocol'] = DDoS['Protocol'].replace('UDP', '1')
DDoS['Protocol'] = DDoS['Protocol'].replace('ICMP', '2')
DDoS.Protocol.unique()

array(['1', '0', '2'], dtype=object)

In [4]:
DDoS["rx_kbps"] = DDoS["rx_kbps"].fillna(DDoS["rx_kbps"].mean())

In [5]:
DDoS["tot_kbps"] = DDoS["tot_kbps"].fillna(DDoS["tot_kbps"].mean())

In [6]:
DDoS.isnull().sum()

dt             0
switch         0
src            0
dst            0
pktcount       0
bytecount      0
dur            0
dur_nsec       0
tot_dur        0
flows          0
packetins      0
pktperflow     0
byteperflow    0
pktrate        0
Pairflow       0
Protocol       0
port_no        0
tx_bytes       0
rx_bytes       0
tx_kbps        0
rx_kbps        0
tot_kbps       0
label          0
dtype: int64

In [7]:
# Splitting dataset into features and label
X= DDoS[['dt', 'switch', 'pktcount', 'bytecount', 'dur', 'dur_nsec', 'tot_dur',
       'flows', 'packetins', 'pktperflow', 'byteperflow', 'pktrate',
       'Pairflow','Protocol' ,'port_no', 'tx_bytes', 'rx_bytes', 'tx_kbps', 'rx_kbps',
       'tot_kbps']]
y = DDoS['label']

# Splitting the dataset into the training set and the test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [8]:
# scale data
t = MinMaxScaler()
t.fit(X_train)
X_train = t.transform(X_train)
X_test = t.transform(X_test)

In [9]:
# AutoEncoder Model Preparation
n_inputs = X.shape[1]
# define encoder
input_data_shape= Input(shape=(n_inputs,))
# encoder level 1
encoder= Dense(n_inputs*2)(input_data_shape)
encoder = BatchNormalization()(encoder)
encoder= LeakyReLU()(encoder)
# encoder level 2
encoder= Dense(n_inputs)(encoder)
encoder= BatchNormalization()(encoder)
encoder= LeakyReLU()(encoder)
# bottleneck
n_bottleneck = round(float(n_inputs) / 2.0)
bottleneck = Dense(n_bottleneck)(encoder)
# define decoder, level 1
decoder = Dense(n_inputs)(bottleneck)
decoder = BatchNormalization()(decoder)
decoder = LeakyReLU()(decoder)
# decoder level 2
decoder = Dense(n_inputs*2)(decoder)
decoder = BatchNormalization()(decoder)
decoder = LeakyReLU()(decoder)

In [10]:
# output layer
output = Dense(n_inputs, activation='linear')(decoder)
# define autoencoder model
model = Model(inputs=input_data_shape, outputs=output)
# compile autoencoder model
model.compile(optimizer='adam', loss='mse')

In [11]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 20)]              0         
                                                                 
 dense (Dense)               (None, 40)                840       
                                                                 
 batch_normalization (BatchN  (None, 40)               160       
 ormalization)                                                   
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 40)                0         
                                                                 
 dense_1 (Dense)             (None, 20)                820       
                                                                 
 batch_normalization_1 (Batc  (None, 20)               80        
 hNormalization)                                             

In [12]:
# fit the autoencoder model to reconstruct input
history = model.fit(X_train, y_train, epochs=20, batch_size=64, verbose=2, validation_data=(X_test,y_test))

Epoch 1/20
1305/1305 - 5s - loss: 0.0935 - val_loss: 0.0771 - 5s/epoch - 4ms/step
Epoch 2/20
1305/1305 - 3s - loss: 0.0422 - val_loss: 0.0332 - 3s/epoch - 3ms/step
Epoch 3/20
1305/1305 - 4s - loss: 0.0343 - val_loss: 0.0240 - 4s/epoch - 3ms/step
Epoch 4/20
1305/1305 - 4s - loss: 0.0289 - val_loss: 0.0248 - 4s/epoch - 3ms/step
Epoch 5/20
1305/1305 - 4s - loss: 0.0264 - val_loss: 0.0269 - 4s/epoch - 3ms/step
Epoch 6/20
1305/1305 - 3s - loss: 0.0243 - val_loss: 0.0198 - 3s/epoch - 3ms/step
Epoch 7/20
1305/1305 - 3s - loss: 0.0224 - val_loss: 0.0165 - 3s/epoch - 3ms/step
Epoch 8/20
1305/1305 - 4s - loss: 0.0205 - val_loss: 0.0189 - 4s/epoch - 3ms/step
Epoch 9/20
1305/1305 - 4s - loss: 0.0193 - val_loss: 0.0167 - 4s/epoch - 3ms/step
Epoch 10/20
1305/1305 - 4s - loss: 0.0184 - val_loss: 0.0153 - 4s/epoch - 3ms/step
Epoch 11/20
1305/1305 - 3s - loss: 0.0171 - val_loss: 0.0125 - 3s/epoch - 3ms/step
Epoch 12/20
1305/1305 - 3s - loss: 0.0173 - val_loss: 0.0130 - 3s/epoch - 3ms/step
Epoch 13/20
1

In [13]:
# define an encoder model (without the decoder)
encoder = Model(inputs=input_data_shape, outputs=bottleneck)


In [14]:
# compile autoencoder model
encoder.compile(optimizer='adam',loss='mse')
history = encoder.fit(X_train, y_train, epochs=20, batch_size=16, verbose=2, validation_data=(X_test,y_test))
# save the encoder to file
encoder.save('encoder.h5')

Epoch 1/20
5218/5218 - 19s - loss: 0.0995 - val_loss: 0.0548 - 19s/epoch - 4ms/step
Epoch 2/20
5218/5218 - 16s - loss: 0.0550 - val_loss: 0.0472 - 16s/epoch - 3ms/step
Epoch 3/20
5218/5218 - 16s - loss: 0.0492 - val_loss: 0.0391 - 16s/epoch - 3ms/step
Epoch 4/20
5218/5218 - 17s - loss: 0.0465 - val_loss: 0.0349 - 17s/epoch - 3ms/step
Epoch 5/20
5218/5218 - 17s - loss: 0.0440 - val_loss: 0.0331 - 17s/epoch - 3ms/step
Epoch 6/20
5218/5218 - 12s - loss: 0.0426 - val_loss: 0.0348 - 12s/epoch - 2ms/step
Epoch 7/20
5218/5218 - 10s - loss: 0.0412 - val_loss: 0.0295 - 10s/epoch - 2ms/step
Epoch 8/20
5218/5218 - 19s - loss: 0.0395 - val_loss: 0.0370 - 19s/epoch - 4ms/step
Epoch 9/20
5218/5218 - 16s - loss: 0.0389 - val_loss: 0.0269 - 16s/epoch - 3ms/step
Epoch 10/20
5218/5218 - 11s - loss: 0.0372 - val_loss: 0.0287 - 11s/epoch - 2ms/step
Epoch 11/20
5218/5218 - 11s - loss: 0.0366 - val_loss: 0.0342 - 11s/epoch - 2ms/step
Epoch 12/20
5218/5218 - 10s - loss: 0.0356 - val_loss: 0.0313 - 10s/epoch 

In [15]:
from xgboost import XGBClassifier

#Compressing the input data using Encoder Model and fitting it on the XG Boost model.
# load the model from file
encoder = load_model('encoder.h5')

# encode the train data
X_train_encode = encoder.predict(X_train)
# encode the test data
X_test_encode = encoder.predict(X_test)
# define the model
model = XGBClassifier(max_iter=20)
# fit the model on the training set
model.fit(X_train_encode, y_train)
# make predictions on the test set
yhat = model.predict(X_test_encode)


In [16]:
# calculate Evaluation metrics 
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
acc = accuracy_score(y_test, yhat)
print('accuarcy of XGBoost = ', acc)
precision1 = precision_score(y_test, yhat)
print(' precision of XGBoost = ', precision1)

recall1 = recall_score(y_test, yhat)
print(' recall of XGBoost = ',recall1 )

f11=f1_score(y_test, yhat, average='macro')
print('f1 score ofXGBoost = ',f11)

accuarcy of XGBoost =  0.9758014279553404
 precision of XGBoost =  0.9741194293649792
 recall of XGBoost =  0.9625748502994012
f1 score ofXGBoost =  0.97436992330395
