## Importing Libraries

In [9]:
from time import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix

### Loading dataset

In [10]:
data = pd.read_csv('dataset.csv')
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,7,21,30,2,3,29,2,3,57,5,1,0
1,7,32,70,2,4,67,2,4,132,5,1,0
2,7,15,1,2,2,0,2,2,0,5,1,0
3,7,33,77,2,7,72,2,7,142,5,1,0
4,7,55,78,3,7,73,3,7,144,6,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
355886,7,25,46,2,15,45,4,15,90,7,0,8505
355887,7,82,14,7,10,8,0,10,16,0,0,22
355888,7,82,14,7,10,8,0,10,17,1,0,1
355889,7,30,62,2,20,60,3,20,120,3,0,0


### Data Preprocessing


In [11]:
y = data.iloc[:,0].values
x = data.iloc[:,1:].values
x,y

(array([[  21,   30,    2, ...,    5,    1,    0],
        [  32,   70,    2, ...,    5,    1,    0],
        [  15,    1,    2, ...,    5,    1,    0],
        ...,
        [  82,   14,    7, ...,    1,    0,    1],
        [  30,   62,    2, ...,    3,    0,    0],
        [  30,   62,    2, ...,    7,    0, 9156]], dtype=int64),
 array([7, 7, 7, ..., 7, 7, 7], dtype=int64))

### Splitting the dataset into training and testing 

In [12]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25)

### Scaling the dataset 

In [13]:
sc = StandardScaler()
x_train_sc = sc.fit_transform(x_train)
x_test_sc = sc.transform(x_test)

# Appliyng Machine Learning Algorithms

## Applying logistic regression

In [14]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

In [15]:
s = time()
lr.fit(x_train_sc,y_train)
lr_time = time()-s
print("Time elapsed",lr_time)

Time elapsed 23.019588947296143


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### Training accuracy

In [16]:
lr.score(x_train_sc,y_train)

0.9891502259120779

### Testing accuracy

In [17]:
lr.score(x_test_sc,y_test)

0.988569566048127

### Accuracy of the model

In [18]:
y_lr_train_pred = lr.predict(x_train_sc)
lr_train_accuracy = accuracy_score(y_train,y_lr_train_pred)
print("Accuracy of Logistic Regression Training Dataset: ",lr_train_accuracy)

Accuracy of Logistic Regression Training Dataset:  0.9891502259120779


In [19]:
y_lr_test_pred = lr.predict(x_test_sc)
lr_test_accuracy = accuracy_score(y_test,y_lr_test_pred)
print("Accuracy of Logistic Regression Testing Dataset: ",lr_test_accuracy)

Accuracy of Logistic Regression Testing Dataset:  0.988569566048127


## Applying KNN

In [20]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()

In [21]:
s = time()
knn.fit(x_train_sc,y_train)
knn_time = time()-s
print("Time elapsed",knn_time)

Time elapsed 1.7969074249267578


### Training accuracy

In [22]:
knn.score(x_train_sc,y_train)

0.9934961298975715

### Testing accuracy

In [23]:
knn.score(x_test_sc,y_test)

0.9937846312926394

### Accuracy of the model

In [24]:
y_knn_train_pred = knn.predict(x_train_sc)
knn_train_accuracy = accuracy_score(y_train,y_knn_train_pred)
print("Accuracy of K Nearest Neigbour Training Dataset: ",knn_train_accuracy)

Accuracy of K Nearest Neigbour Training Dataset:  0.9934961298975715


In [25]:
y_knn_test_pred = knn.predict(x_test_sc)
knn_test_accuracy = accuracy_score(y_test,y_knn_test_pred)
print("Accuracy of K Nearest Neigbour Testing Dataset: ",knn_test_accuracy)

Accuracy of K Nearest Neigbour Testing Dataset:  0.9937846312926394


## Applying Decision Tree Classifier

In [26]:
from sklearn.tree import DecisionTreeClassifier

dtc = DecisionTreeClassifier()

In [27]:
s = time()
dtc.fit(x_train_sc,y_train)
dtc_time = time()-s
print("Time elapsed",dtc_time)

Time elapsed 0.9068188667297363


### Training accuracy

In [28]:
dtc.score(x_train_sc,y_train)

0.9944589724184956

### Testing accuracy

In [29]:
dtc.score(x_test_sc,y_test)

0.9940993335056703

### Accuracy of the model

In [30]:
y_dtc_train_pred = dtc.predict(x_train_sc)
dtc_train_accuracy = accuracy_score(y_train,y_dtc_train_pred)
print("Accuracy of Decision Tree Classifier Training Dataset: ",dtc_train_accuracy)

Accuracy of Decision Tree Classifier Training Dataset:  0.9944589724184956


In [31]:
y_dtc_test_pred = dtc.predict(x_test_sc)
dtc_test_accuracy = accuracy_score(y_test,y_dtc_test_pred)
print("Accuracy of Decision Tree Classifier Testing Dataset: ",dtc_test_accuracy)

Accuracy of Decision Tree Classifier Testing Dataset:  0.9940993335056703


## Applying Random Forest Classifier

In [32]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()

In [33]:
s = time()
rfc.fit(x_train_sc,y_train)
rfc_time = time()-s
print("Time elapsed",rfc_time)

Time elapsed 47.627418994903564


### Training accuracy

In [34]:
rfc.score(x_train_sc,y_train)

0.9944589724184956

### Testing accuracy

In [35]:
rfc.score(x_test_sc,y_test)

0.9941330515999236

### Accuracy of the model

In [36]:
y_rfc_train_pred = rfc.predict(x_train_sc)
rfc_train_accuracy = accuracy_score(y_train,y_rfc_train_pred)
print("Accuracy of Random Forest Classifier Training Dataset: ",rfc_train_accuracy)

Accuracy of Random Forest Classifier Training Dataset:  0.9944589724184956


In [37]:
y_rfc_test_pred = rfc.predict(x_test_sc)
rfc_test_accuracy = accuracy_score(y_test,y_rfc_test_pred)
print("Accuracy of Random Forest Classifier Testing Dataset: ",rfc_test_accuracy)

Accuracy of Random Forest Classifier Testing Dataset:  0.9941330515999236


## Applying Naive bayes 

In [38]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()

In [39]:
s = time()
nb.fit(x_train,y_train)
nb_time = time()-s
print("Time elapsed",nb_time)

Time elapsed 0.15570878982543945


### Training accuracy

In [40]:
nb.score(x_train,y_train)

0.7548835222802509

### Testing accuracy

In [41]:
nb.score(x_test,y_test)

0.7541501354343453

### Accuracy of the model

In [42]:
y_nb_train_pred = nb.predict(x_train)
nb_train_accuracy = accuracy_score(y_train,y_nb_train_pred)
print("Accuracy of Naive Bayes Training Dataset: ",nb_train_accuracy)

Accuracy of Naive Bayes Training Dataset:  0.7548835222802509


In [43]:
y_nb_test_pred = nb.predict(x_test)
nb_test_accuracy = accuracy_score(y_test,y_nb_test_pred)
print("Accuracy of Naive Bayes Testing Dataset: ",nb_test_accuracy)

Accuracy of Naive Bayes Testing Dataset:  0.7541501354343453


## Applying SVM

In [44]:
from sklearn.svm import LinearSVC
svc = LinearSVC()

In [103]:
s = time()
svc.fit(x_train_sc,y_train)
svc_time = time()-s
print("Time elapsed",svc_time)

  y = column_or_1d(y, warn=True)


Time elapsed 75.70233845710754


### Training accuracy

In [46]:
svc.score(x_train_sc,y_train)

0.984418435624424

### Testing accuracy

In [47]:
svc.score(x_test_sc,y_test)

0.9836467242871433

### Accuracy of the model

In [48]:
y_svc_train_pred = svc.predict(x_train_sc)
svc_train_accuracy = accuracy_score(y_train,y_svc_train_pred)
print("Accuracy of Support Vector Machine Training Dataset: ",svc_train_accuracy)

Accuracy of Support Vector Machine Training Dataset:  0.984418435624424


In [49]:
y_svc_test_pred = svc.predict(x_test_sc)
svc_test_accuracy = accuracy_score(y_test,y_svc_test_pred)
print("Accuracy of Support Vector Machine Testing Dataset: ",svc_test_accuracy)

Accuracy of Support Vector Machine Testing Dataset:  0.9836467242871433


## Applying Artificial Neural Network using Tensorflow

In [50]:
import tensorflow as tf
from tensorflow.keras import Sequential,layers

In [51]:
y_train.shape,y_test.shape

((266918,), (88973,))

In [52]:
y_train = y_train.reshape(-1,1)

In [53]:
y_test = y_test.reshape(-1,1)

In [54]:
y_train.shape,y_test.shape

((266918, 1), (88973, 1))

In [55]:
x_train.shape,x_train_sc.shape,x_test.shape,x_test_sc.shape

((266918, 11), (266918, 11), (88973, 11), (88973, 11))

In [102]:
ann = Sequential([
    layers.Dense(10,input_shape=(11,),activation='relu'),
    layers.Dense(20,activation='relu'),
    layers.Dense(20,activation='relu'),
    layers.Dense(8,activation='sigmoid'),
    
])

ann.compile(optimizer='adam',
             loss = 'sparse_categorical_crossentropy',
             metrics=['accuracy'])
s = time()
ann.fit(x_train_sc,y_train,epochs= 20)
ann_time = time()-s
print("Time elapsed",ann_time)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Time elapsed 310.154185295105


## Training Accuracy

In [105]:
loss_train, ann_train_accuracy = ann.evaluate(x_train_sc,y_train)
ann_train_accuracy



0.9944440126419067

In [106]:
loss_test, ann_test_accuracy = ann.evaluate(x_test_sc,y_test)
ann_test_accuracy



0.9941105842590332

## Evaluation Metrics

In [59]:
target_names = ['DoSattack','dataProbing','malitiousControl', 'malitiousOperation','scan','spying', 'wrongSetUp','Normal']

In [60]:
target_names

['Normal',
 'DoSattack',
 'scan',
 'malitiousControl',
 'malitiousOperation',
 'spying',
 'dataProbing',
 'wrongSetUp']

## Classification Report of Logistic Regression

### For Training Dataset

In [61]:
print(classification_report(y_train, y_lr_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       0.95      0.50      0.66      2986
         DoSattack       1.00      0.83      0.90       183
              scan       0.98      0.93      0.95       679
  malitiousControl       0.82      0.75      0.78       508
malitiousOperation       0.88      0.47      0.62      1129
            spying       0.00      0.00      0.00       378
       dataProbing       0.94      1.00      0.97        91
        wrongSetUp       0.99      1.00      0.99    260964

          accuracy                           0.99    266918
         macro avg       0.82      0.68      0.73    266918
      weighted avg       0.99      0.99      0.99    266918



### For Testing Dataset

In [62]:
print(classification_report(y_test, y_lr_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       0.94      0.49      0.64      1014
         DoSattack       1.00      0.83      0.91        59
              scan       1.00      0.91      0.95       210
  malitiousControl       0.80      0.68      0.74       149
malitiousOperation       0.90      0.50      0.64       402
            spying       0.00      0.00      0.00       154
       dataProbing       0.86      1.00      0.93        31
        wrongSetUp       0.99      1.00      0.99     86954

          accuracy                           0.99     88973
         macro avg       0.81      0.68      0.73     88973
      weighted avg       0.99      0.99      0.99     88973



## Confusion Matrix for Logistic Regression

### For Training dataset

In [63]:
confusion_matrix(y_train, y_lr_train_pred)

array([[  1507,      0,      0,      0,      0,      0,      0,   1479],
       [     0,    151,      0,      0,      0,      0,      0,     32],
       [     0,      0,    629,      9,     28,      0,      0,     13],
       [     0,      0,      0,    381,      0,      0,      0,    127],
       [     0,      0,     10,      0,    534,      0,      6,    579],
       [     0,      0,      0,      0,     26,      0,      0,    352],
       [     0,      0,      0,      0,      0,      0,     91,      0],
       [    86,      0,      1,     76,     16,     56,      0, 260729]],
      dtype=int64)

### For Testing dataset

In [64]:
confusion_matrix(y_test, y_lr_test_pred)

array([[  493,     0,     0,     0,     0,     0,     0,   521],
       [    0,    49,     0,     0,     0,     0,     0,    10],
       [    0,     0,   191,     4,    10,     0,     0,     5],
       [    0,     0,     0,   102,     0,     0,     0,    47],
       [    0,     0,     0,     0,   201,     0,     5,   196],
       [    0,     0,     0,     0,     9,     0,     0,   145],
       [    0,     0,     0,     0,     0,     0,    31,     0],
       [   29,     0,     0,    21,     4,    11,     0, 86889]],
      dtype=int64)

## Classification Report for K Nearest Neighbour

### For Training Dataset

In [65]:
print(classification_report(y_train, y_knn_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       0.63      1.00      0.78      2986
         DoSattack       1.00      1.00      1.00       183
              scan       1.00      1.00      1.00       679
  malitiousControl       1.00      1.00      1.00       508
malitiousOperation       1.00      1.00      1.00      1129
            spying       1.00      1.00      1.00       378
       dataProbing       1.00      1.00      1.00        91
        wrongSetUp       1.00      0.99      1.00    260964

          accuracy                           0.99    266918
         macro avg       0.95      1.00      0.97    266918
      weighted avg       1.00      0.99      0.99    266918



### For Testing Dataset

In [66]:
print(classification_report(y_test, y_knn_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       0.65      1.00      0.79      1014
         DoSattack       1.00      1.00      1.00        59
              scan       1.00      1.00      1.00       210
  malitiousControl       1.00      1.00      1.00       149
malitiousOperation       1.00      1.00      1.00       402
            spying       1.00      1.00      1.00       154
       dataProbing       1.00      1.00      1.00        31
        wrongSetUp       1.00      0.99      1.00     86954

          accuracy                           0.99     88973
         macro avg       0.96      1.00      0.97     88973
      weighted avg       1.00      0.99      0.99     88973



## Confusion Matrix for K Nearest Neighbour

### For Training dataset

In [67]:
confusion_matrix(y_train, y_knn_train_pred)

array([[  2986,      0,      0,      0,      0,      0,      0,      0],
       [     0,    183,      0,      0,      0,      0,      0,      0],
       [     0,      0,    679,      0,      0,      0,      0,      0],
       [     0,      0,      0,    508,      0,      0,      0,      0],
       [     0,      0,      0,      0,   1129,      0,      0,      0],
       [     0,      0,      0,      0,      0,    378,      0,      0],
       [     0,      0,      0,      0,      0,      0,     91,      0],
       [  1733,      0,      0,      0,      2,      1,      0, 259228]],
      dtype=int64)

### For Testing dataset

In [68]:
confusion_matrix(y_test, y_knn_test_pred)

array([[ 1014,     0,     0,     0,     0,     0,     0,     0],
       [    0,    59,     0,     0,     0,     0,     0,     0],
       [    0,     0,   210,     0,     0,     0,     0,     0],
       [    0,     0,     0,   149,     0,     0,     0,     0],
       [    0,     0,     0,     0,   402,     0,     0,     0],
       [    0,     0,     0,     0,     0,   154,     0,     0],
       [    0,     0,     0,     0,     0,     0,    31,     0],
       [  551,     0,     0,     0,     2,     0,     0, 86401]],
      dtype=int64)

## Classification Report for Decision Tree Classifier

### For Training Dataset

In [69]:
print(classification_report(y_train, y_dtc_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       1.00      0.50      0.67      2986
         DoSattack       1.00      1.00      1.00       183
              scan       1.00      1.00      1.00       679
  malitiousControl       1.00      1.00      1.00       508
malitiousOperation       1.00      1.00      1.00      1129
            spying       1.00      1.00      1.00       378
       dataProbing       1.00      1.00      1.00        91
        wrongSetUp       0.99      1.00      1.00    260964

          accuracy                           0.99    266918
         macro avg       1.00      0.94      0.96    266918
      weighted avg       0.99      0.99      0.99    266918



### For Testing Dataset

In [70]:
print(classification_report(y_test, y_dtc_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       1.00      0.49      0.65      1014
         DoSattack       1.00      1.00      1.00        59
              scan       1.00      1.00      1.00       210
  malitiousControl       1.00      1.00      1.00       149
malitiousOperation       1.00      1.00      1.00       402
            spying       0.99      1.00      0.99       154
       dataProbing       1.00      1.00      1.00        31
        wrongSetUp       0.99      1.00      1.00     86954

          accuracy                           0.99     88973
         macro avg       1.00      0.94      0.96     88973
      weighted avg       0.99      0.99      0.99     88973



## Confusion Matrix for Decision Tree Classifier

### For Training dataset

In [71]:
confusion_matrix(y_train, y_dtc_train_pred)

array([[  1507,      0,      0,      0,      0,      0,      0,   1479],
       [     0,    183,      0,      0,      0,      0,      0,      0],
       [     0,      0,    679,      0,      0,      0,      0,      0],
       [     0,      0,      0,    508,      0,      0,      0,      0],
       [     0,      0,      0,      0,   1129,      0,      0,      0],
       [     0,      0,      0,      0,      0,    378,      0,      0],
       [     0,      0,      0,      0,      0,      0,     91,      0],
       [     0,      0,      0,      0,      0,      0,      0, 260964]],
      dtype=int64)

### For Testing dataset

In [72]:
confusion_matrix(y_test, y_dtc_test_pred)

array([[  493,     0,     0,     0,     0,     0,     0,   521],
       [    0,    59,     0,     0,     0,     0,     0,     0],
       [    0,     0,   210,     0,     0,     0,     0,     0],
       [    0,     0,     0,   149,     0,     0,     0,     0],
       [    0,     0,     0,     0,   402,     0,     0,     0],
       [    0,     0,     0,     0,     0,   154,     0,     0],
       [    0,     0,     0,     0,     0,     0,    31,     0],
       [    0,     0,     0,     0,     2,     2,     0, 86950]],
      dtype=int64)

## Classification Report for Random Forest Classifier

### For Training Dataset

In [73]:
print(classification_report(y_train, y_rfc_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       1.00      0.50      0.67      2986
         DoSattack       1.00      1.00      1.00       183
              scan       1.00      1.00      1.00       679
  malitiousControl       1.00      1.00      1.00       508
malitiousOperation       1.00      1.00      1.00      1129
            spying       1.00      1.00      1.00       378
       dataProbing       1.00      1.00      1.00        91
        wrongSetUp       0.99      1.00      1.00    260964

          accuracy                           0.99    266918
         macro avg       1.00      0.94      0.96    266918
      weighted avg       0.99      0.99      0.99    266918



### For Testing Dataset

In [74]:
print(classification_report(y_test, y_rfc_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       1.00      0.49      0.65      1014
         DoSattack       1.00      1.00      1.00        59
              scan       1.00      1.00      1.00       210
  malitiousControl       1.00      1.00      1.00       149
malitiousOperation       1.00      1.00      1.00       402
            spying       1.00      1.00      1.00       154
       dataProbing       1.00      1.00      1.00        31
        wrongSetUp       0.99      1.00      1.00     86954

          accuracy                           0.99     88973
         macro avg       1.00      0.94      0.96     88973
      weighted avg       0.99      0.99      0.99     88973



## Confusion Matrix for Random Forest Classifier

### For Training dataset

In [75]:
confusion_matrix(y_train, y_rfc_train_pred)

array([[  1507,      0,      0,      0,      0,      0,      0,   1479],
       [     0,    183,      0,      0,      0,      0,      0,      0],
       [     0,      0,    679,      0,      0,      0,      0,      0],
       [     0,      0,      0,    508,      0,      0,      0,      0],
       [     0,      0,      0,      0,   1129,      0,      0,      0],
       [     0,      0,      0,      0,      0,    378,      0,      0],
       [     0,      0,      0,      0,      0,      0,     91,      0],
       [     0,      0,      0,      0,      0,      0,      0, 260964]],
      dtype=int64)

### For Testing dataset

In [76]:
confusion_matrix(y_test, y_rfc_test_pred)

array([[  493,     0,     0,     0,     0,     0,     0,   521],
       [    0,    59,     0,     0,     0,     0,     0,     0],
       [    0,     0,   210,     0,     0,     0,     0,     0],
       [    0,     0,     0,   149,     0,     0,     0,     0],
       [    0,     0,     0,     0,   402,     0,     0,     0],
       [    0,     0,     0,     0,     0,   154,     0,     0],
       [    0,     0,     0,     0,     0,     0,    31,     0],
       [    0,     0,     0,     0,     1,     0,     0, 86953]],
      dtype=int64)

## Classification Report for Naive Bayes

### For Training Dataset

In [77]:
print(classification_report(y_train, y_nb_train_pred, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       0.34      1.00      0.50      2986
         DoSattack       0.04      0.83      0.07       183
              scan       0.19      0.93      0.32       679
  malitiousControl       0.66      1.00      0.80       508
malitiousOperation       1.00      0.12      0.22      1129
            spying       0.01      0.93      0.01       378
       dataProbing       1.00      1.00      1.00        91
        wrongSetUp       1.00      0.75      0.86    260964

          accuracy                           0.75    266918
         macro avg       0.53      0.82      0.47    266918
      weighted avg       0.99      0.75      0.85    266918



### For Testing Dataset

In [78]:
print(classification_report(y_test, y_nb_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       0.34      1.00      0.51      1014
         DoSattack       0.04      0.83      0.07        59
              scan       0.18      0.91      0.30       210
  malitiousControl       0.67      1.00      0.80       149
malitiousOperation       1.00      0.13      0.24       402
            spying       0.01      0.95      0.02       154
       dataProbing       1.00      1.00      1.00        31
        wrongSetUp       1.00      0.75      0.86     86954

          accuracy                           0.75     88973
         macro avg       0.53      0.82      0.47     88973
      weighted avg       0.99      0.75      0.85     88973



## Confusion Matrix for Naive Bayes

### For Training dataset

In [79]:
confusion_matrix(y_train, y_nb_train_pred)

array([[  2986,      0,      0,      0,      0,      0,      0,      0],
       [     0,    151,      0,      0,      0,      0,      0,     32],
       [     0,      0,    629,      0,      0,      7,      0,     43],
       [     0,      0,      0,    508,      0,      0,      0,      0],
       [    14,      0,      0,      0,    140,    624,      0,    351],
       [     0,      0,     27,      0,      0,    351,      0,      0],
       [     0,      0,      0,      0,      0,      0,     91,      0],
       [  5905,   3808,   2616,    258,      0,  51741,      0, 196636]],
      dtype=int64)

### For Testing dataset

In [80]:
confusion_matrix(y_test, y_nb_test_pred)

array([[ 1014,     0,     0,     0,     0,     0,     0,     0],
       [    0,    49,     0,     0,     0,     0,     0,    10],
       [    0,     0,   191,     0,     0,     2,     0,    17],
       [    0,     0,     0,   149,     0,     0,     0,     0],
       [    8,     0,     0,     0,    54,   224,     0,   116],
       [    0,     0,     8,     0,     0,   146,     0,     0],
       [    0,     0,     0,     0,     0,     0,    31,     0],
       [ 1927,  1267,   878,    74,     0, 17343,     0, 65465]],
      dtype=int64)

## Classification Report for Support Vector Machine Classifier

### For Training Dataset

In [81]:
confusion_matrix(y_test, y_nb_test_pred)

array([[ 1014,     0,     0,     0,     0,     0,     0,     0],
       [    0,    49,     0,     0,     0,     0,     0,    10],
       [    0,     0,   191,     0,     0,     2,     0,    17],
       [    0,     0,     0,   149,     0,     0,     0,     0],
       [    8,     0,     0,     0,    54,   224,     0,   116],
       [    0,     0,     8,     0,     0,   146,     0,     0],
       [    0,     0,     0,     0,     0,     0,    31,     0],
       [ 1927,  1267,   878,    74,     0, 17343,     0, 65465]],
      dtype=int64)

## Classification Report for Support Vector Machine Classifier

### For Training Dataset

In [82]:
print(classification_report(y_train, y_svc_train_pred, target_names=target_names))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


                    precision    recall  f1-score   support

            Normal       0.94      0.50      0.66      2986
         DoSattack       0.00      0.00      0.00       183
              scan       0.00      0.00      0.00       679
  malitiousControl       1.00      0.75      0.86       508
malitiousOperation       0.00      0.00      0.00      1129
            spying       0.00      0.00      0.00       378
       dataProbing       0.00      0.00      0.00        91
        wrongSetUp       0.98      1.00      0.99    260964

          accuracy                           0.98    266918
         macro avg       0.37      0.28      0.31    266918
      weighted avg       0.98      0.98      0.98    266918



  _warn_prf(average, modifier, msg_start, len(result))


### For Testing Dataset

In [83]:
print(classification_report(y_test, y_svc_test_pred, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       0.94      0.49      0.64      1014
         DoSattack       0.00      0.00      0.00        59
              scan       0.00      0.00      0.00       210
  malitiousControl       0.98      0.68      0.81       149
malitiousOperation       0.00      0.00      0.00       402
            spying       0.00      0.00      0.00       154
       dataProbing       0.00      0.00      0.00        31
        wrongSetUp       0.98      1.00      0.99     86954

          accuracy                           0.98     88973
         macro avg       0.36      0.27      0.30     88973
      weighted avg       0.97      0.98      0.98     88973



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Confusion Matrix for Support Vector Machine Classifier

### For Training dataset

In [84]:
confusion_matrix(y_train, y_svc_train_pred)

array([[  1507,      0,      0,      0,      0,      0,      0,   1479],
       [     0,      0,      0,      0,      0,      0,      0,    183],
       [     0,      0,      0,      0,      0,      0,      0,    679],
       [     0,      0,      0,    381,      0,      0,      0,    127],
       [     9,      0,     10,      0,      0,      0,      0,   1110],
       [     0,      0,      0,      0,      0,      0,      0,    378],
       [     0,      0,      0,      0,      0,      0,      0,     91],
       [    80,      0,      0,      1,      0,     12,      0, 260871]],
      dtype=int64)

### For Testing dataset

In [85]:
confusion_matrix(y_test, y_svc_test_pred)

array([[  493,     0,     0,     0,     0,     0,     0,   521],
       [    0,     0,     0,     0,     0,     0,     0,    59],
       [    0,     0,     0,     0,     0,     0,     0,   210],
       [    0,     0,     0,   102,     0,     0,     0,    47],
       [    2,     0,     0,     0,     0,     0,     0,   400],
       [    0,     0,     0,     0,     0,     0,     0,   154],
       [    0,     0,     0,     0,     0,     0,     0,    31],
       [   27,     0,     0,     2,     0,     2,     0, 86923]],
      dtype=int64)

## Confusion Matrix for Artificial Neural Network

### For Training dataset

In [108]:
y_ann_train_pred = ann.predict(x_train_sc)
y_ann_train_pred

array([[1.2271275e-18, 6.2446466e-18, 1.3138791e-07, ..., 1.2894417e-06,
        3.2677865e-22, 9.6070826e-01],
       [4.7563413e-30, 2.0728058e-18, 3.0238551e-31, ..., 4.2628978e-19,
        0.0000000e+00, 5.7525247e-02],
       [2.0524337e-27, 3.4451483e-19, 1.8556358e-18, ..., 4.5061130e-17,
        0.0000000e+00, 8.2059377e-01],
       ...,
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 0.0000000e+00,
        0.0000000e+00, 7.1368957e-01],
       [0.0000000e+00, 3.7470693e-27, 1.0678927e-11, ..., 1.3656958e-16,
        0.0000000e+00, 6.4118016e-05],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 0.0000000e+00,
        0.0000000e+00, 9.8896778e-01]], dtype=float32)

In [109]:
y_train.shape

(266918, 1)

In [110]:
y_ann_train_pred.shape

(266918, 8)

In [111]:
y_ann_train_pred_labels = [np.argmax(i) for i in y_ann_train_pred]
y_ann_train_pred_labels[:5]

[7, 7, 7, 7, 7]

In [112]:
print(tf.math.confusion_matrix(labels=y_train, predictions=y_ann_train_pred_labels))

tf.Tensor(
[[  1507      0      0      0      0      0      0   1479]
 [     0    183      0      0      0      0      0      0]
 [     0      0    679      0      0      0      0      0]
 [     0      0      0    508      0      0      0      0]
 [     0      0      0      0   1129      0      0      0]
 [     0      0      0      0      0    378      0      0]
 [     0      0      0      0      0      0     91      0]
 [     3      0      1      0      0      0      0 260960]], shape=(8, 8), dtype=int32)


### For Testing dataset

In [113]:
y_ann_test_pred = ann.predict(x_test_sc)
y_ann_test_pred

array([[1.0671364e-24, 7.6887861e-18, 2.7698234e-30, ..., 6.1102570e-19,
        0.0000000e+00, 5.9270155e-01],
       [3.0931572e-15, 2.1055540e-24, 5.0639678e-15, ..., 3.9616470e-14,
        0.0000000e+00, 9.9710220e-01],
       [7.9377699e-30, 4.3327612e-31, 1.0577173e-32, ..., 1.0247739e-36,
        0.0000000e+00, 2.3322988e-01],
       ...,
       [5.6175099e-07, 1.1239664e-10, 5.0504173e-07, ..., 9.5696964e-07,
        8.1079569e-15, 8.8947737e-01],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 0.0000000e+00,
        0.0000000e+00, 5.2788258e-03],
       [0.0000000e+00, 0.0000000e+00, 0.0000000e+00, ..., 0.0000000e+00,
        0.0000000e+00, 9.8404014e-01]], dtype=float32)

In [114]:
y_test.shape

(88973, 1)

In [115]:
y_ann_test_pred.shape

(88973, 8)

In [116]:
y_ann_test_pred_labels = [np.argmax(i) for i in y_ann_test_pred]
y_ann_test_pred_labels[:5]

[7, 7, 7, 7, 7]

In [117]:
print(tf.math.confusion_matrix(labels=y_test, predictions=y_ann_test_pred_labels))

tf.Tensor(
[[  493     0     0     0     0     0     0   521]
 [    0    59     0     0     0     0     0     0]
 [    0     0   210     0     0     0     0     0]
 [    0     0     0   149     0     0     0     0]
 [    0     0     0     0   402     0     0     0]
 [    0     0     0     0     0   154     0     0]
 [    0     0     0     0     0     0    31     0]
 [    3     0     0     0     0     0     0 86951]], shape=(8, 8), dtype=int32)


## Classification Report for Artificial Neural Network

### For Training dataset

In [140]:
print(classification_report(y_train, y_ann_train_pred_labels, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       1.00      0.50      0.67      2986
         DoSattack       1.00      1.00      1.00       183
              scan       1.00      1.00      1.00       679
  malitiousControl       1.00      1.00      1.00       508
malitiousOperation       1.00      1.00      1.00      1129
            spying       1.00      1.00      1.00       378
       dataProbing       1.00      1.00      1.00        91
        wrongSetUp       0.99      1.00      1.00    260964

          accuracy                           0.99    266918
         macro avg       1.00      0.94      0.96    266918
      weighted avg       0.99      0.99      0.99    266918



### For Testing dataset

In [141]:
print(classification_report(y_test, y_ann_test_pred_labels, target_names=target_names))

                    precision    recall  f1-score   support

            Normal       0.99      0.49      0.65      1014
         DoSattack       1.00      1.00      1.00        59
              scan       1.00      1.00      1.00       210
  malitiousControl       1.00      1.00      1.00       149
malitiousOperation       1.00      1.00      1.00       402
            spying       1.00      1.00      1.00       154
       dataProbing       1.00      1.00      1.00        31
        wrongSetUp       0.99      1.00      1.00     86954

          accuracy                           0.99     88973
         macro avg       1.00      0.94      0.96     88973
      weighted avg       0.99      0.99      0.99     88973



In [125]:
y_ann_train_labels = np.array(y_ann_train_pred_labels)
y_ann_test_labels = np.array(y_ann_test_pred_labels)
y_ann_train_labels, y_ann_test_labels

(array([7, 7, 7, ..., 7, 7, 7], dtype=int64),
 array([7, 7, 7, ..., 7, 7, 7], dtype=int64))

In [142]:
dataset = [[lr_time,lr_train_accuracy,lr_test_accuracy],
           [knn_time,knn_train_accuracy,knn_test_accuracy],
           [dtc_time,dtc_train_accuracy,dtc_test_accuracy],
           [rfc_time,rfc_train_accuracy,rfc_test_accuracy],
           [nb_time,nb_train_accuracy,nb_test_accuracy],
           [svc_time,svc_train_accuracy,svc_test_accuracy],
           [ann_time,ann_train_accuracy,ann_test_accuracy]
          ]

In [143]:
dataset

[[23.019588947296143, 0.9891502259120779, 0.988569566048127],
 [1.7969074249267578, 0.9934961298975715, 0.9937846312926394],
 [0.9068188667297363, 0.9944589724184956, 0.9940993335056703],
 [47.627418994903564, 0.9944589724184956, 0.9941330515999236],
 [0.15570878982543945, 0.7548835222802509, 0.7541501354343453],
 [75.70233845710754, 0.984418435624424, 0.9836467242871433],
 [310.154185295105, 0.9944440126419067, 0.9941105842590332]]

In [145]:
comparison = pd.DataFrame(dataset, columns = ['Time_Taken','Training_Accuracy','Testing_Accuracy'], 
                         index=['Logistic Regression','K Nearest Neighbour','Decision Tree','Random Forest','Naive Bayes','Support Vector Machine','Artificial Neural Network',])
comparison

Unnamed: 0,Time_Taken,Training_Accuracy,Testing_Accuracy
Logistic Regression,23.019589,0.98915,0.98857
K Nearest Neighbour,1.796907,0.993496,0.993785
Decision Tree,0.906819,0.994459,0.994099
Random Forest,47.627419,0.994459,0.994133
Naive Bayes,0.155709,0.754884,0.75415
Support Vector Machine,75.702338,0.984418,0.983647
Artificial Neural Network,310.154185,0.994444,0.994111


In [146]:
comparison.to_csv('performance.csv')