## Importing Libraries

In [1]:
from time import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix

### Loading dataset

In [2]:
data = pd.read_csv('dataset.csv')
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,7,21,30,2,3,29,2,3,57,5,1,0
1,7,32,70,2,4,67,2,4,132,5,1,0
2,7,15,1,2,2,0,2,2,0,5,1,0
3,7,33,77,2,7,72,2,7,142,5,1,0
4,7,55,78,3,7,73,3,7,144,6,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
355886,7,25,46,2,15,45,4,15,90,7,0,8505
355887,7,82,14,7,10,8,0,10,16,0,0,22
355888,7,82,14,7,10,8,0,10,17,1,0,1
355889,7,30,62,2,20,60,3,20,120,3,0,0


### Data Preprocessing


In [3]:
y = data.iloc[:,0].values
x = data.iloc[:,1:].values
x,y

(array([[  21,   30,    2, ...,    5,    1,    0],
        [  32,   70,    2, ...,    5,    1,    0],
        [  15,    1,    2, ...,    5,    1,    0],
        ...,
        [  82,   14,    7, ...,    1,    0,    1],
        [  30,   62,    2, ...,    3,    0,    0],
        [  30,   62,    2, ...,    7,    0, 9156]], dtype=int64),
 array([7, 7, 7, ..., 7, 7, 7], dtype=int64))

### Splitting the dataset into training and testing 

In [4]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25)

### Scaling the dataset 

In [5]:
sc = StandardScaler()
x_train_sc = sc.fit_transform(x_train)
x_test_sc = sc.transform(x_test)

# Applying Machine Learning Algorithms

## Applying logistic regression

In [6]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

In [7]:
s = time()
lr.fit(x_train_sc,y_train)
lr_time = time()-s
print("Time elapsed",lr_time)

Time elapsed 23.518442630767822


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### Training accuracy

In [8]:
lr.score(x_train_sc,y_train)

0.9889216913059441

### Testing accuracy

In [9]:
lr.score(x_test_sc,y_test)

0.988681959695638

### Accuracy of the model

In [10]:
y_lr_train_pred = lr.predict(x_train_sc)
lr_train_accuracy = accuracy_score(y_train,y_lr_train_pred)
print("Accuracy of Logistic Regression Training Dataset: ",lr_train_accuracy)

Accuracy of Logistic Regression Training Dataset:  0.9889216913059441


In [11]:
y_lr_test_pred = lr.predict(x_test_sc)
lr_test_accuracy = accuracy_score(y_test,y_lr_test_pred)
print("Accuracy of Logistic Regression Testing Dataset: ",lr_test_accuracy)

Accuracy of Logistic Regression Testing Dataset:  0.988681959695638


## Applying KNN

In [12]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()

In [13]:
s = time()
knn.fit(x_train_sc,y_train)
knn_time = time()-s
print("Time elapsed",knn_time)

Time elapsed 1.969843864440918


### Training accuracy

In [14]:
knn.score(x_train_sc,y_train)

0.9935073693044306

### Testing accuracy

In [15]:
knn.score(x_test_sc,y_test)

0.993750913198386

### Accuracy of the model

In [16]:
y_knn_train_pred = knn.predict(x_train_sc)
knn_train_accuracy = accuracy_score(y_train,y_knn_train_pred)
print("Accuracy of K Nearest Neigbour Training Dataset: ",knn_train_accuracy)

Accuracy of K Nearest Neigbour Training Dataset:  0.9935073693044306


In [17]:
y_knn_test_pred = knn.predict(x_test_sc)
knn_test_accuracy = accuracy_score(y_test,y_knn_test_pred)
print("Accuracy of K Nearest Neigbour Testing Dataset: ",knn_test_accuracy)

Accuracy of K Nearest Neigbour Testing Dataset:  0.993750913198386


## Applying Decision Tree Classifier

In [18]:
from sklearn.tree import DecisionTreeClassifier

dtc = DecisionTreeClassifier()

In [19]:
s = time()
dtc.fit(x_train_sc,y_train)
dtc_time = time()-s
print("Time elapsed",dtc_time)

Time elapsed 1.0803682804107666


### Training accuracy

In [20]:
dtc.score(x_train_sc,y_train)

0.9944777047632606

### Testing accuracy

In [21]:
dtc.score(x_test_sc,y_test)

0.9940543760466658

### Accuracy of the model

In [22]:
y_dtc_train_pred = dtc.predict(x_train_sc)
dtc_train_accuracy = accuracy_score(y_train,y_dtc_train_pred)
print("Accuracy of Decision Tree Classifier Training Dataset: ",dtc_train_accuracy)

Accuracy of Decision Tree Classifier Training Dataset:  0.9944777047632606


In [23]:
y_dtc_test_pred = dtc.predict(x_test_sc)
dtc_test_accuracy = accuracy_score(y_test,y_dtc_test_pred)
print("Accuracy of Decision Tree Classifier Testing Dataset: ",dtc_test_accuracy)

Accuracy of Decision Tree Classifier Testing Dataset:  0.9940543760466658


## Applying Random Forest Classifier

In [24]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()

In [25]:
s = time()
rfc.fit(x_train_sc,y_train)
rfc_time = time()-s
print("Time elapsed",rfc_time)

Time elapsed 29.00949478149414


### Training accuracy

In [26]:
rfc.score(x_train_sc,y_train)

0.9944777047632606

### Testing accuracy

In [27]:
rfc.score(x_test_sc,y_test)

0.994076854776168

### Accuracy of the model

In [28]:
y_rfc_train_pred = rfc.predict(x_train_sc)
rfc_train_accuracy = accuracy_score(y_train,y_rfc_train_pred)
print("Accuracy of Random Forest Classifier Training Dataset: ",rfc_train_accuracy)

Accuracy of Random Forest Classifier Training Dataset:  0.9944777047632606


In [29]:
y_rfc_test_pred = rfc.predict(x_test_sc)
rfc_test_accuracy = accuracy_score(y_test,y_rfc_test_pred)
print("Accuracy of Random Forest Classifier Testing Dataset: ",rfc_test_accuracy)

Accuracy of Random Forest Classifier Testing Dataset:  0.994076854776168


## Applying Naive bayes 

In [30]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()

In [31]:
s = time()
nb.fit(x_train,y_train)
nb_time = time()-s
print("Time elapsed",nb_time)

Time elapsed 0.20424652099609375


### Training accuracy

In [32]:
nb.score(x_train,y_train)

0.760004945339018

### Testing accuracy

In [33]:
nb.score(x_test,y_test)

0.7615568768053229

### Accuracy of the model

In [34]:
y_nb_train_pred = nb.predict(x_train)
nb_train_accuracy = accuracy_score(y_train,y_nb_train_pred)
print("Accuracy of Naive Bayes Training Dataset: ",nb_train_accuracy)

Accuracy of Naive Bayes Training Dataset:  0.760004945339018


In [35]:
y_nb_test_pred = nb.predict(x_test)
nb_test_accuracy = accuracy_score(y_test,y_nb_test_pred)
print("Accuracy of Naive Bayes Testing Dataset: ",nb_test_accuracy)

Accuracy of Naive Bayes Testing Dataset:  0.7615568768053229


## Applying SVM

In [36]:
from sklearn.svm import LinearSVC
svc = LinearSVC()

In [37]:
s = time()
svc.fit(x_train_sc,y_train)
svc_time = time()-s
print("Time elapsed",svc_time)

Time elapsed 118.11468291282654


### Training accuracy

In [38]:
svc.score(x_train_sc,y_train)

0.9839276481915794

### Testing accuracy

In [39]:
svc.score(x_test_sc,y_test)

0.983938947770672

### Accuracy of the model

In [40]:
y_svc_train_pred = svc.predict(x_train_sc)
svc_train_accuracy = accuracy_score(y_train,y_svc_train_pred)
print("Accuracy of Support Vector Machine Training Dataset: ",svc_train_accuracy)

Accuracy of Support Vector Machine Training Dataset:  0.9839276481915794


In [41]:
y_svc_test_pred = svc.predict(x_test_sc)
svc_test_accuracy = accuracy_score(y_test,y_svc_test_pred)
print("Accuracy of Support Vector Machine Testing Dataset: ",svc_test_accuracy)

Accuracy of Support Vector Machine Testing Dataset:  0.983938947770672


## Applying Artificial Neural Network using Tensorflow

In [42]:
import tensorflow as tf
from tensorflow.keras import Sequential,layers

In [43]:
y_train.shape,y_test.shape

((266918,), (88973,))

In [44]:
y_train = y_train.reshape(-1,1)

In [45]:
y_test = y_test.reshape(-1,1)

In [46]:
y_train.shape,y_test.shape

((266918, 1), (88973, 1))

In [47]:
x_train.shape,x_train_sc.shape,x_test.shape,x_test_sc.shape

((266918, 11), (266918, 11), (88973, 11), (88973, 11))

In [48]:
ann = Sequential([
    layers.Dense(10,input_shape=(11,),activation='relu'),
    layers.Dense(20,activation='relu'),
    layers.Dense(20,activation='relu'),
    layers.Dense(8,activation='sigmoid'),
    
])

ann.compile(optimizer='adam',
             loss = 'sparse_categorical_crossentropy',
             metrics=['accuracy'])
s = time()
ann.fit(x_train_sc,y_train,epochs= 20)
ann_time = time()-s
print("Time elapsed",ann_time)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Time elapsed 319.8182575702667


## Training Accuracy

In [49]:
loss_train, ann_train_accuracy = ann.evaluate(x_train_sc,y_train)
ann_train_accuracy



0.9943952560424805

In [50]:
loss_test, ann_test_accuracy = ann.evaluate(x_test_sc,y_test)
ann_test_accuracy



0.9940094351768494

## Evaluation Metrics

In [51]:
target_names = ['DoSattack','dataProbing','malitiousControl', 'malitiousOperation','scan','spying', 'wrongSetUp','Normal']

In [52]:
target_names

['DoSattack',
 'dataProbing',
 'malitiousControl',
 'malitiousOperation',
 'scan',
 'spying',
 'wrongSetUp',
 'Normal']

## Classification Report of Logistic Regression

### For Training Dataset

In [53]:
print(classification_report(y_train, y_lr_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.94      0.50      0.65      2950
       dataProbing       1.00      0.80      0.89       189
  malitiousControl       0.97      0.92      0.95       672
malitiousOperation       0.77      0.73      0.75       497
              scan       0.91      0.49      0.64      1150
            spying       0.00      0.00      0.00       405
        wrongSetUp       0.94      1.00      0.97       103
            Normal       0.99      1.00      0.99    260952

          accuracy                           0.99    266918
         macro avg       0.81      0.68      0.73    266918
      weighted avg       0.99      0.99      0.99    266918



### For Testing Dataset

In [54]:
print(classification_report(y_test, y_lr_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.95      0.50      0.65      1050
       dataProbing       1.00      0.91      0.95        53
  malitiousControl       0.98      0.93      0.95       217
malitiousOperation       0.70      0.75      0.73       160
              scan       0.89      0.50      0.64       381
            spying       0.00      0.00      0.00       127
        wrongSetUp       0.83      1.00      0.90        19
            Normal       0.99      1.00      0.99     86966

          accuracy                           0.99     88973
         macro avg       0.79      0.70      0.73     88973
      weighted avg       0.99      0.99      0.99     88973



## Confusion Matrix for Logistic Regression

### For Training dataset

In [61]:
confusion_matrix(y_train, y_lr_train_pred)

array([[  1476,      0,      0,      0,      0,      0,      0,   1474],
       [     0,    152,      0,      0,      0,      0,      0,     37],
       [     0,      0,    619,      0,      0,      0,      0,     53],
       [     0,      0,      0,    363,      0,      0,      0,    134],
       [     0,      0,     16,      0,    567,      0,      7,    560],
       [     0,      0,      0,      0,     52,      0,      0,    353],
       [     0,      0,      0,      0,      0,      0,    103,      0],
       [    94,      0,      3,    111,      4,     59,      0, 260681]],
      dtype=int64)

### For Testing dataset

In [62]:
confusion_matrix(y_test, y_lr_test_pred)

array([[  524,     0,     0,     0,     0,     0,     0,   526],
       [    0,    48,     0,     0,     0,     0,     0,     5],
       [    0,     0,   201,     0,     0,     0,     0,    16],
       [    0,     0,     0,   120,     0,     0,     0,    40],
       [    0,     0,     4,     0,   192,     0,     4,   181],
       [    0,     0,     0,     0,    18,     0,     0,   109],
       [    0,     0,     0,     0,     0,     0,    19,     0],
       [   30,     0,     0,    51,     5,    18,     0, 86862]],
      dtype=int64)

## Classification Report for K Nearest Neighbour

### For Training Dataset

In [57]:
print(classification_report(y_train, y_knn_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.63      1.00      0.77      2950
       dataProbing       1.00      1.00      1.00       189
  malitiousControl       1.00      1.00      1.00       672
malitiousOperation       1.00      1.00      1.00       497
              scan       1.00      1.00      1.00      1150
            spying       1.00      1.00      1.00       405
        wrongSetUp       1.00      1.00      1.00       103
            Normal       1.00      0.99      1.00    260952

          accuracy                           0.99    266918
         macro avg       0.95      1.00      0.97    266918
      weighted avg       1.00      0.99      0.99    266918



### For Testing Dataset

In [58]:
print(classification_report(y_test, y_knn_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.65      1.00      0.79      1050
       dataProbing       1.00      1.00      1.00        53
  malitiousControl       1.00      1.00      1.00       217
malitiousOperation       1.00      1.00      1.00       160
              scan       1.00      1.00      1.00       381
            spying       1.00      1.00      1.00       127
        wrongSetUp       1.00      1.00      1.00        19
            Normal       1.00      0.99      1.00     86966

          accuracy                           0.99     88973
         macro avg       0.96      1.00      0.97     88973
      weighted avg       1.00      0.99      0.99     88973



## Confusion Matrix for K Nearest Neighbour

### For Training dataset

In [63]:
confusion_matrix(y_train, y_knn_train_pred)

array([[  2950,      0,      0,      0,      0,      0,      0,      0],
       [     0,    189,      0,      0,      0,      0,      0,      0],
       [     0,      0,    672,      0,      0,      0,      0,      0],
       [     0,      0,      0,    497,      0,      0,      0,      0],
       [     0,      0,      0,      0,   1150,      0,      0,      0],
       [     0,      0,      0,      0,      0,    405,      0,      0],
       [     0,      0,      0,      0,      0,      0,    103,      0],
       [  1728,      0,      0,      0,      4,      1,      0, 259219]],
      dtype=int64)

### For Testing dataset

In [64]:
confusion_matrix(y_test, y_knn_test_pred)

array([[ 1050,     0,     0,     0,     0,     0,     0,     0],
       [    0,    53,     0,     0,     0,     0,     0,     0],
       [    0,     0,   217,     0,     0,     0,     0,     0],
       [    0,     0,     0,   160,     0,     0,     0,     0],
       [    0,     0,     0,     0,   381,     0,     0,     0],
       [    0,     0,     0,     0,     0,   127,     0,     0],
       [    0,     0,     0,     0,     0,     0,    19,     0],
       [  555,     0,     0,     0,     1,     0,     0, 86410]],
      dtype=int64)

## Classification Report for Decision Tree Classifier

### For Training Dataset

In [None]:
print(classification_report(y_train, y_dtc_train_pred, target_names=target_names))

### For Testing Dataset

In [None]:
print(classification_report(y_test, y_dtc_test_pred, target_names=target_names))

## Confusion Matrix for Decision Tree Classifier

### For Training dataset

In [65]:
confusion_matrix(y_train, y_dtc_train_pred)

array([[  1476,      0,      0,      0,      0,      0,      0,   1474],
       [     0,    189,      0,      0,      0,      0,      0,      0],
       [     0,      0,    672,      0,      0,      0,      0,      0],
       [     0,      0,      0,    497,      0,      0,      0,      0],
       [     0,      0,      0,      0,   1150,      0,      0,      0],
       [     0,      0,      0,      0,      0,    405,      0,      0],
       [     0,      0,      0,      0,      0,      0,    103,      0],
       [     0,      0,      0,      0,      0,      0,      0, 260952]],
      dtype=int64)

### For Testing dataset

In [66]:
confusion_matrix(y_test, y_dtc_test_pred)

array([[  524,     0,     0,     0,     0,     0,     0,   526],
       [    0,    53,     0,     0,     0,     0,     0,     0],
       [    0,     0,   217,     0,     0,     0,     0,     0],
       [    0,     0,     0,   160,     0,     0,     0,     0],
       [    0,     0,     0,     0,   381,     0,     0,     0],
       [    0,     0,     0,     0,     0,   127,     0,     0],
       [    0,     0,     0,     0,     0,     0,    19,     0],
       [    0,     0,     0,     0,     0,     3,     0, 86963]],
      dtype=int64)

## Classification Report for Random Forest Classifier

### For Training Dataset

In [67]:
print(classification_report(y_train, y_rfc_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       1.00      0.50      0.67      2950
       dataProbing       1.00      1.00      1.00       189
  malitiousControl       1.00      1.00      1.00       672
malitiousOperation       1.00      1.00      1.00       497
              scan       1.00      1.00      1.00      1150
            spying       1.00      1.00      1.00       405
        wrongSetUp       1.00      1.00      1.00       103
            Normal       0.99      1.00      1.00    260952

          accuracy                           0.99    266918
         macro avg       1.00      0.94      0.96    266918
      weighted avg       0.99      0.99      0.99    266918



### For Testing Dataset

In [70]:
print(classification_report(y_test, y_rfc_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       1.00      0.50      0.67      1050
       dataProbing       1.00      1.00      1.00        53
  malitiousControl       1.00      1.00      1.00       217
malitiousOperation       1.00      1.00      1.00       160
              scan       1.00      1.00      1.00       381
            spying       1.00      1.00      1.00       127
        wrongSetUp       1.00      1.00      1.00        19
            Normal       0.99      1.00      1.00     86966

          accuracy                           0.99     88973
         macro avg       1.00      0.94      0.96     88973
      weighted avg       0.99      0.99      0.99     88973



## Confusion Matrix for Random Forest Classifier

### For Training dataset

In [68]:
confusion_matrix(y_train, y_rfc_train_pred)

array([[  1476,      0,      0,      0,      0,      0,      0,   1474],
       [     0,    189,      0,      0,      0,      0,      0,      0],
       [     0,      0,    672,      0,      0,      0,      0,      0],
       [     0,      0,      0,    497,      0,      0,      0,      0],
       [     0,      0,      0,      0,   1150,      0,      0,      0],
       [     0,      0,      0,      0,      0,    405,      0,      0],
       [     0,      0,      0,      0,      0,      0,    103,      0],
       [     0,      0,      0,      0,      0,      0,      0, 260952]],
      dtype=int64)

### For Testing dataset

In [69]:
confusion_matrix(y_test, y_rfc_test_pred)

array([[  524,     0,     0,     0,     0,     0,     0,   526],
       [    0,    53,     0,     0,     0,     0,     0,     0],
       [    0,     0,   217,     0,     0,     0,     0,     0],
       [    0,     0,     0,   160,     0,     0,     0,     0],
       [    0,     0,     0,     0,   381,     0,     0,     0],
       [    0,     0,     0,     0,     0,   127,     0,     0],
       [    0,     0,     0,     0,     0,     0,    19,     0],
       [    0,     0,     0,     0,     1,     0,     0, 86965]],
      dtype=int64)

## Classification Report for Naive Bayes

### For Training Dataset

In [73]:
print(classification_report(y_train, y_nb_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.33      1.00      0.50      2950
       dataProbing       0.04      0.80      0.07       189
  malitiousControl       0.19      0.92      0.31       672
malitiousOperation       0.67      1.00      0.80       497
              scan       1.00      0.13      0.23      1150
            spying       0.01      0.93      0.01       405
        wrongSetUp       1.00      1.00      1.00       103
            Normal       1.00      0.76      0.86    260952

          accuracy                           0.76    266918
         macro avg       0.53      0.82      0.47    266918
      weighted avg       0.99      0.76      0.85    266918



### For Testing Dataset

In [74]:
print(classification_report(y_test, y_nb_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.35      1.00      0.52      1050
       dataProbing       0.04      0.91      0.07        53
  malitiousControl       0.19      0.93      0.31       217
malitiousOperation       0.64      1.00      0.78       160
              scan       1.00      0.15      0.27       381
            spying       0.01      0.95      0.01       127
        wrongSetUp       1.00      1.00      1.00        19
            Normal       1.00      0.76      0.86     86966

          accuracy                           0.76     88973
         macro avg       0.53      0.84      0.48     88973
      weighted avg       0.99      0.76      0.85     88973



## Confusion Matrix for Naive Bayes

### For Training dataset

In [71]:
confusion_matrix(y_train, y_nb_train_pred)

array([[  2950,      0,      0,      0,      0,      0,      0,      0],
       [     0,    152,      0,      0,      0,      0,      0,     37],
       [     0,      0,    619,      0,      0,      6,      0,     47],
       [     0,      0,      0,    497,      0,      0,      0,      0],
       [     7,      0,      0,      0,    151,    637,      0,    355],
       [     0,      0,     29,      0,      0,    376,      0,      0],
       [     0,      0,      0,      0,      0,      0,    103,      0],
       [  5871,   3912,   2629,    243,      0,  50286,      0, 198011]],
      dtype=int64)

### For Testing dataset

In [72]:
confusion_matrix(y_test, y_nb_test_pred)

array([[ 1050,     0,     0,     0,     0,     0,     0,     0],
       [    0,    48,     0,     0,     0,     0,     0,     5],
       [    0,     0,   201,     0,     0,     3,     0,    13],
       [    0,     0,     0,   160,     0,     0,     0,     0],
       [    4,     0,     0,     0,    59,   195,     0,   123],
       [    0,     0,     6,     0,     0,   121,     0,     0],
       [    0,     0,     0,     0,     0,     0,    19,     0],
       [ 1968,  1261,   865,    89,     0, 16683,     0, 66100]],
      dtype=int64)

## Classification Report for Support Vector Machine Classifier

### For Training Dataset

In [77]:
print(classification_report(y_train, y_svc_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.94      0.50      0.65      2950
       dataProbing       0.00      0.00      0.00       189
  malitiousControl       0.00      0.00      0.00       672
malitiousOperation       0.99      0.61      0.75       497
              scan       0.00      0.00      0.00      1150
            spying       0.00      0.00      0.00       405
        wrongSetUp       0.00      0.00      0.00       103
            Normal       0.98      1.00      0.99    260952

          accuracy                           0.98    266918
         macro avg       0.36      0.26      0.30    266918
      weighted avg       0.97      0.98      0.98    266918



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### For Testing Dataset

In [76]:
print(classification_report(y_test, y_svc_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

         DoSattack       0.95      0.50      0.65      1050
       dataProbing       0.00      0.00      0.00        53
  malitiousControl       0.00      0.00      0.00       217
malitiousOperation       1.00      0.54      0.70       160
              scan       0.00      0.00      0.00       381
            spying       0.00      0.00      0.00       127
        wrongSetUp       0.00      0.00      0.00        19
            Normal       0.98      1.00      0.99     86966

          accuracy                           0.98     88973
         macro avg       0.37      0.25      0.29     88973
      weighted avg       0.97      0.98      0.98     88973



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Confusion Matrix for Support Vector Machine Classifier

### For Training dataset

In [75]:
confusion_matrix(y_train, y_svc_train_pred)

array([[  1476,      0,      0,      0,      0,      0,      0,   1474],
       [     0,      0,      0,      0,      0,      0,      0,    189],
       [     0,      0,      0,      0,      0,      0,      0,    672],
       [     0,      0,      0,    301,      0,      0,      0,    196],
       [     9,      0,      9,      0,      0,      0,      0,   1132],
       [     0,      0,      0,      0,      0,      0,      0,    405],
       [     0,      0,      0,      0,      0,      0,      0,    103],
       [    80,      0,      0,      3,      0,     18,      0, 260851]],
      dtype=int64)

### For Testing dataset

In [78]:
confusion_matrix(y_test, y_svc_test_pred)

array([[  524,     0,     0,     0,     0,     0,     0,   526],
       [    0,     0,     0,     0,     0,     0,     0,    53],
       [    0,     0,     0,     0,     0,     0,     0,   217],
       [    0,     0,     0,    86,     0,     0,     0,    74],
       [    2,     0,     1,     0,     0,     0,     0,   378],
       [    0,     0,     0,     0,     0,     0,     0,   127],
       [    0,     0,     0,     0,     0,     0,     0,    19],
       [   27,     0,     0,     0,     0,     5,     0, 86934]],
      dtype=int64)

## Confusion Matrix for Artificial Neural Network

### For Training dataset

In [80]:
y_ann_train_pred = ann.predict(x_train_sc)
y_ann_train_pred

array([[7.2847986e-12, 1.5272352e-13, 4.2896353e-11, ..., 1.0045301e-31,
        0.0000000e+00, 8.7242723e-03],
       [2.3713056e-24, 4.8744302e-38, 1.7330922e-31, ..., 1.5927503e-14,
        0.0000000e+00, 7.9665965e-01],
       [2.2866093e-34, 3.7798688e-37, 1.6733315e-22, ..., 7.8293112e-34,
        0.0000000e+00, 6.1887595e-06],
       ...,
       [1.1497803e-25, 0.0000000e+00, 8.6730574e-22, ..., 0.0000000e+00,
        0.0000000e+00, 3.5658479e-04],
       [2.4099606e-07, 1.3548691e-12, 1.6811490e-04, ..., 5.2219775e-07,
        1.3782188e-25, 4.8303452e-01],
       [2.2625654e-27, 4.8740560e-24, 3.1362187e-28, ..., 0.0000000e+00,
        0.0000000e+00, 6.7496830e-01]], dtype=float32)

In [81]:
y_train.shape

(266918, 1)

In [82]:
y_ann_train_pred.shape

(266918, 8)

In [83]:
y_ann_train_pred_labels = [np.argmax(i) for i in y_ann_train_pred]
y_ann_train_pred_labels[:5]

[7, 7, 7, 7, 7]

In [85]:
print(tf.math.confusion_matrix(labels=y_train, predictions=y_ann_train_pred_labels))

tf.Tensor(
[[  1476      0      0      0      0      0      0   1474]
 [     0    189      0      0      0      0      0      0]
 [     0      0    672      0      0      0      0      0]
 [     0      0      0    497      0      0      0      0]
 [     0      0      8      0   1133      0      0      9]
 [     0      0      0      0      0    405      0      0]
 [     0      0      0      0      0      0    103      0]
 [     4      0      0      0      1      0      0 260947]], shape=(8, 8), dtype=int32)


### For Testing dataset

In [86]:
y_ann_test_pred = ann.predict(x_test_sc)
y_ann_test_pred

array([[2.9712998e-25, 0.0000000e+00, 3.5876079e-21, ..., 0.0000000e+00,
        0.0000000e+00, 8.8471174e-04],
       [4.8102618e-22, 0.0000000e+00, 5.3144629e-32, ..., 6.9545979e-31,
        0.0000000e+00, 9.9758792e-01],
       [2.3646934e-24, 4.8729055e-38, 1.7421201e-31, ..., 1.5627602e-14,
        0.0000000e+00, 7.9762280e-01],
       ...,
       [7.2847986e-12, 1.5272352e-13, 4.2896353e-11, ..., 1.0045301e-31,
        0.0000000e+00, 8.7242723e-03],
       [4.4785172e-01, 8.5244658e-08, 8.5576748e-06, ..., 8.2493934e-06,
        1.0657477e-08, 6.6548026e-01],
       [5.6315715e-25, 6.0918276e-33, 2.7661749e-19, ..., 0.0000000e+00,
        0.0000000e+00, 2.7843827e-01]], dtype=float32)

In [87]:
y_test.shape

(88973, 1)

In [88]:
y_ann_test_pred.shape

(88973, 8)

In [89]:
y_ann_test_pred_labels = [np.argmax(i) for i in y_ann_test_pred]
y_ann_test_pred_labels[:5]

[7, 7, 7, 7, 7]

In [90]:
print(tf.math.confusion_matrix(labels=y_test, predictions=y_ann_test_pred_labels))

tf.Tensor(
[[  524     0     0     0     0     0     0   526]
 [    0    53     0     0     0     0     0     0]
 [    0     0   217     0     0     0     0     0]
 [    0     0     0   160     0     0     0     0]
 [    0     0     0     0   374     0     0     7]
 [    0     0     0     0     0   127     0     0]
 [    0     0     0     0     0     0    19     0]
 [    0     0     0     0     0     0     0 86966]], shape=(8, 8), dtype=int32)


## Classification Report for Artificial Neural Network

### For Training dataset

In [None]:
print(classification_report(y_train, y_ann_train_pred_labels, target_names=target_names))

### For Testing dataset

In [None]:
print(classification_report(y_test, y_ann_test_pred_labels, target_names=target_names))

In [None]:
y_ann_train_labels = np.array(y_ann_train_pred_labels)
y_ann_test_labels = np.array(y_ann_test_pred_labels)
y_ann_train_labels, y_ann_test_labels

In [None]:
dataset = [[lr_time,lr_train_accuracy,lr_test_accuracy],
           [knn_time,knn_train_accuracy,knn_test_accuracy],
           [dtc_time,dtc_train_accuracy,dtc_test_accuracy],
           [rfc_time,rfc_train_accuracy,rfc_test_accuracy],
           [nb_time,nb_train_accuracy,nb_test_accuracy],
           [svc_time,svc_train_accuracy,svc_test_accuracy],
           [ann_time,ann_train_accuracy,ann_test_accuracy]
          ]

In [None]:
dataset

In [None]:
comparison = pd.DataFrame(dataset, columns = ['Time_Taken','Training_Accuracy','Testing_Accuracy'], 
                         index=['Logistic Regression','K Nearest Neighbour','Decision Tree','Random Forest','Naive Bayes','Support Vector Machine','Artificial Neural Network',])
comparison

In [None]:
comparison.to_csv('performance.csv')