In [1]:
# All  import statements needed for the notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.metrics import *

In [2]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
data = pd.read_csv('/content/drive/MyDrive/balancedhalf_data.csv')

In [5]:
data.drop(data.columns[0] , inplace=True , axis=1)

In [6]:
unknown = ['SSH-Patator','DoS slowloris','DoS Slowhttptest','Bot','Infiltration','Heartbleed']
att = data.loc[(data['Label'].isin(unknown))]

In [7]:
data.drop(att.index,axis=0 , inplace=True, errors='ignore')

In [8]:
data.Label.unique()

array(['BENIGN', 'DDoS', 'DoS Hulk', 'DoS GoldenEye', 'PortScan',
       'FTP-Patator'], dtype=object)

In [9]:
att.Label.unique()

array(['Infiltration', 'DoS slowloris', 'DoS Slowhttptest', 'Heartbleed',
       'Bot', 'SSH-Patator'], dtype=object)

## PreProcessing

In [10]:
# importing required libraries for normalizing data
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
# selecting numeric attributes columns from data
numeric_col = data.select_dtypes(include='number').columns

In [11]:
# using standard scaler for normalizing
std_scaler = MinMaxScaler()
def normalization(df,att,col):
  for i in col:
    arr = df[i]
    arr = np.array(arr)
    x = np.array(att[i])
    df[i] = std_scaler.fit_transform(arr.reshape(len(arr),1))
    #To use the same scaler which was used in preprocessing the train data 
    att[i] = std_scaler.transform(x.reshape(len(x),1))
  return df,att
# calling the normalization() function
data , att = normalization(data.copy(),att.copy(),numeric_col)

In [12]:
data.shape , att.shape

((1091727, 84), (19205, 84))

In [13]:
att.Label = 'unknown'

In [14]:
X = att.drop('Label' , axis=1)
X = X.to_numpy().reshape(-1, 83,1)

y = att.Label

In [15]:
X_train = data.drop('Label' , axis=1)
X_train = X_train.to_numpy().reshape(-1, 83,1)

y_train = data.Label

# **Load Model and Predict**


In [16]:
from tensorflow import keras
model = keras.models.load_model('/content/drive/MyDrive/models/model_split_softmax_cnn_model.hdf5')

In [17]:
predicted = model.predict(X)
Y_train_predicted = model.predict(X_train)



# **Evaluation of the Model**

In [18]:
Threshold_values = [0.4 ,0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 0.95]

In [19]:
y_train

0               BENIGN
1               BENIGN
2               BENIGN
3               BENIGN
4               BENIGN
              ...     
1105030    FTP-Patator
1105031    FTP-Patator
1105032    FTP-Patator
1105033    FTP-Patator
1105034    FTP-Patator
Name: Label, Length: 1091727, dtype: object

In [20]:
mapping = y_train.unique().tolist()

In [21]:
mapping

['BENIGN', 'DDoS', 'DoS Hulk', 'DoS GoldenEye', 'PortScan', 'FTP-Patator']

In [22]:
my_dict = {
    0 : 'BENIGN',
    1 : 'DDoS',
    2 : 'DoS Hulk',
    3 : 'DoS GoldenEye',
    4 : 'PortScan',
    5 : 'FTP-Patator'
}

In [23]:
max_probability = np.amax(predicted, axis = 1)
max_probability_train = np.amax(Y_train_predicted, axis = 1)

In [24]:
y_prediction_index = np.argmax(Y_train_predicted, axis = 1)

In [25]:
y_prediction_index[:100]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [26]:
y_prediction_test_label = np.vectorize(my_dict.get)(y_prediction_index)

In [27]:
y_prediction_test_label.shape , y_train.shape

((1091727,), (1091727,))

In [28]:
type(y_prediction_test_label) , type(y_train)

(numpy.ndarray, pandas.core.series.Series)

In [29]:
y_train = y_train.to_numpy()

In [30]:
y_train[:10] == y_prediction_test_label[:10]

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [31]:
RIGHT_BOOLEAN = y_train == y_prediction_test_label

In [32]:
RIGHT_BOOLEAN.shape

(1091727,)

In [33]:
TOTAL = data.shape[0]+att.shape[0]

In [34]:
for threshold in Threshold_values:
  truepredicted = 0
  #for each threshold value we will check out the accuracy of the model
  print('USING THRESHOLD VALUE ',threshold)
  ## Checking in UNKNOWN SET
  for prob in max_probability:
    if prob<threshold:
      ##add unknown count
      truepredicted += 1
  
  ## Checking in train data now...
  for index in range(data.shape[0]):
    max_prob = max_probability_train[index]
    label = y_prediction_test_label[index]
    if max_prob > threshold and RIGHT_BOOLEAN[index]:
      truepredicted += 1

  Accuracy = truepredicted/TOTAL
  print('Accuracy is ',Accuracy)

USING THRESHOLD VALUE  0.4
Accuracy is  0.6155615285183972
USING THRESHOLD VALUE  0.5
Accuracy is  0.6162627415539385
USING THRESHOLD VALUE  0.6
Accuracy is  0.6164301685431691
USING THRESHOLD VALUE  0.7
Accuracy is  0.6165246837790251
USING THRESHOLD VALUE  0.8
Accuracy is  0.6192008151714057
USING THRESHOLD VALUE  0.9
Accuracy is  0.6191981147360954
USING THRESHOLD VALUE  0.95
Accuracy is  0.6192080163322328


In [36]:
predicted[:100]

array([[1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 