In [1]:
# importing required libraries
import numpy as np
import pandas as pd

In [2]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
FOLDER = '/content/drive/MyDrive/'

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
data = pd.read_csv(FOLDER + 'balancedhalf_data.csv')

In [6]:
data.drop(data.columns[0] , inplace=True , axis=1)

## **Make Benign = NonAttack and others as attack**

In [7]:
x = data.Label.unique().tolist()
x

['BENIGN',
 'Infiltration',
 'DDoS',
 'DoS slowloris',
 'DoS Slowhttptest',
 'DoS Hulk',
 'DoS GoldenEye',
 'Heartbleed',
 'PortScan',
 'Bot',
 'FTP-Patator',
 'SSH-Patator']

In [8]:
value = ['attack']*12
value[0] = 'nonattack'
value

['nonattack',
 'attack',
 'attack',
 'attack',
 'attack',
 'attack',
 'attack',
 'attack',
 'attack',
 'attack',
 'attack',
 'attack']

In [9]:
data.Label.replace(to_replace=x , value=value, inplace=True)

## Normalization On Data
# **Data Normalization**

In [10]:
# importing required libraries for normalizing data
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler

In [11]:
# selecting numeric attributes columns from data
numeric_col = data.select_dtypes(include='number').columns

In [12]:
# using standard scaler for normalizing
std_scaler = MinMaxScaler()
def normalization(df,col):
  for i in col:
    arr = df[i]
    arr = np.array(arr)
    df[i] = std_scaler.fit_transform(arr.reshape(len(arr),1))
  return df
# calling the normalization() function
data = normalization(data.copy(),numeric_col)

In [13]:
# data after normalization
data.head()

Unnamed: 0,Flow ID,Source IP,Source Port,Destination IP,Destination Port,Protocol,Timestamp,Flow Duration,Total Fwd Packets,Total Backward Packets,...,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min,Label
0,0.445135,0.18769,0.989151,0.196143,0.500191,0.352941,0.005155,1.083333e-07,5e-06,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,nonattack
1,0.147324,0.264781,0.00676,0.28318,0.598764,0.352941,0.066974,6.333333e-07,0.0,3e-06,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,nonattack
2,0.241959,0.186718,0.161669,0.19604,0.000809,1.0,0.004347,2.683333e-06,5e-06,7e-06,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,nonattack
3,0.325442,0.260168,0.975982,0.701248,0.00676,0.352941,0.015157,0.002392658,9.2e-05,0.000162,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,nonattack
4,0.195175,0.162074,0.839017,0.164278,0.000809,1.0,0.000269,0.00039055,5e-06,7e-06,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,nonattack


# **One-hot-encoding**

In [14]:
# selecting numeric attributes columns from data
non_numeric_col = data.select_dtypes(exclude='number').columns

In [15]:
# creating a dataframe with only categorical attributes
categorical = data[non_numeric_col]
categorical.head()

Unnamed: 0,Label
0,nonattack
1,nonattack
2,nonattack
3,nonattack
4,nonattack


In [16]:
# one-hot-encoding categorical attributes using pandas.get_dummies() function
categorical = pd.get_dummies(categorical,columns=non_numeric_col)
categorical.head()

Unnamed: 0,Label_attack,Label_nonattack
0,0,1
1,0,1
2,0,1
3,0,1
4,0,1


In [17]:
X = data.drop('Label' , axis=1)

In [18]:
X.head(1)

Unnamed: 0,Flow ID,Source IP,Source Port,Destination IP,Destination Port,Protocol,Timestamp,Flow Duration,Total Fwd Packets,Total Backward Packets,...,act_data_pkt_fwd,min_seg_size_forward,Active Mean,Active Std,Active Max,Active Min,Idle Mean,Idle Std,Idle Max,Idle Min
0,0.445135,0.18769,0.989151,0.196143,0.500191,0.352941,0.005155,1.083333e-07,5e-06,0.0,...,5e-06,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
y = categorical

In [20]:
y.head(1)

Unnamed: 0,Label_attack,Label_nonattack
0,0,1


https://glassboxmedicine.com/2019/05/26/classification-sigmoid-vs-softmax/#:~:text=The%20probabilities%20produced%20by%20a%20softmax%20will%20always%20sum%20to,decrease%20by%20an%20equivalent%20amount.

In [21]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

#  Convolutional Neural Networks

In [22]:
X_train.shape , y_train.shape , X_test.shape

((744324, 83), (744324, 2), (366608, 83))

In [23]:
X_train = X_train.to_numpy().reshape(-1, 83,1)
X_test = X_test.to_numpy().reshape(-1, 83,1)

## Model Training

In [24]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense,Dropout,LSTM,Embedding,SimpleRNN, GRU,Activation,Flatten
from tensorflow.keras.layers import Conv1D,MaxPool1D, Flatten
from tensorflow.keras.utils import plot_model
from tensorflow.keras.preprocessing import sequence
from sklearn.metrics import (precision_score, recall_score,confusion_matrix,f1_score, accuracy_score,mean_squared_error,mean_absolute_error)
import pandas as pd
from tensorflow.keras import callbacks
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger
import matplotlib.pyplot as plt

In [25]:
model_cnn = Sequential()
model_cnn.add(Conv1D(64, 3, padding="same",activation="relu",input_shape=(83, 1)))
model_cnn.add(Conv1D(64, 3, padding="same", activation="relu"))
model_cnn.add(MaxPool1D(pool_size=(2)))
model_cnn.add(Conv1D(128, 3, padding="same", activation="relu"))
model_cnn.add(Conv1D(128, 3, padding="same", activation="relu"))
model_cnn.add(MaxPool1D(pool_size=(2)))
model_cnn.add(Flatten())
model_cnn.add(Dense(128, activation="relu"))
model_cnn.add(Dropout(0.5))
model_cnn.add(Dense(2, activation="sigmoid"))

In [26]:
model_cnn.compile(loss="binary_crossentropy", optimizer="adam",metrics=['accuracy'])
es = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=0.0001,patience=5) ## early stoppoing

#FIT AND SAVE MODEL FOR FUTURE
model_cnn.fit(X_train, y_train, epochs=1000,validation_data=(X_test, y_test),callbacks=[es])
model_cnn.save(FOLDER+"models/binary_model_50.hdf5")

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000


In [27]:
model_cnn.save('binary_model_50.hdf5')

In [28]:
model_cnn.save('binary_model_50.hdf5')