In [1]:
# Importing Libraries
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sklearn

# Preprocessing purpose
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

# Splitting Data
from sklearn.model_selection import train_test_split

# For Decision tree, accuracy, Classification Report, Confusion Matrix
from sklearn import metrics
from sklearn import tree

# Mail Notification
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Uploading datasets for training,testing and prediction
def readData():
    dataset = pd.read_csv('./Datasets/UNSW_NB15_training-set.csv')
    return dataset

In [3]:
def standardScaling(dataset):
    SS = StandardScaler()
    # extract numerical attributes and scale it to have unit standard deviation 
    cols = dataset.select_dtypes(include=['float64','int64']).columns
    intCols = SS.fit_transform(dataset.select_dtypes(include=['float64','int64']))
    # turn the result back to a dataframe
    intColsDf = pd.DataFrame(intCols, columns = cols)
    return intColsDf

In [4]:
def LabelEncoding(dataset):
    LE = LabelEncoder()
    # extract categorical attributes from both training and test sets
    objCols = dataset.select_dtypes(include=['object']).copy()
    # encode the categorical attributes
    LEobjCols = objCols.apply(LE.fit_transform)
    # separate target column from encoded data 
    encObjCols = LEobjCols.drop(['attack_cat'], axis=1)
    return encObjCols

In [5]:
def mlTraining(train_x, train_y):
    X_train,X_test,Y_train,Y_test = train_test_split(train_x,train_y,test_size=0.80, random_state=10)
    DTC_Classifier = tree.DecisionTreeClassifier(criterion='entropy', random_state=0) #Decision Tree Classifier
    DTC_Classifier.fit(X_train, Y_train)
    return [DTC_Classifier, X_train, X_test, Y_train, Y_test]

In [6]:
dataset = readData()
intCols = standardScaling(dataset)
objCols = LabelEncoding(dataset)
train_x = pd.concat([intCols, objCols],axis=1)
train_y = dataset['attack_cat']
trainingOutput = mlTraining(train_x, train_y)
model = trainingOutput[0]
X_train = trainingOutput[1]
X_test = trainingOutput[2]
Y_train = trainingOutput[3]
Y_test = trainingOutput[4]

In [8]:
accuracy = metrics.accuracy_score(Y_train, model.predict(X_train))
confusion_matrix = metrics.confusion_matrix(Y_train, model.predict(X_train))
classification = metrics.classification_report(Y_train, model.predict(X_train))
print()
print('============================== Decision Tree Classifier Model Evaluation ==============================')
print ("Model Accuracy:" "\n", accuracy)
print()
print("Confusion matrix:" "\n", confusion_matrix)
print()
print("Classification report:" "\n", classification) 
print()


Model Accuracy:
 1.0

Confusion matrix:
 [[ 128    0    0    0    0    0    0    0    0    0]
 [   0  116    0    0    0    0    0    0    0    0]
 [   0    0  767    0    0    0    0    0    0    0]
 [   0    0    0 2231    0    0    0    0    0    0]
 [   0    0    0    0 1248    0    0    0    0    0]
 [   0    0    0    0    0 3867    0    0    0    0]
 [   0    0    0    0    0    0 7305    0    0    0]
 [   0    0    0    0    0    0    0  713    0    0]
 [   0    0    0    0    0    0    0    0   82    0]
 [   0    0    0    0    0    0    0    0    0    9]]

Classification report:
                 precision    recall  f1-score   support

      Analysis       1.00      1.00      1.00       128
      Backdoor       1.00      1.00      1.00       116
           DoS       1.00      1.00      1.00       767
      Exploits       1.00      1.00      1.00      2231
       Fuzzers       1.00      1.00      1.00      1248
       Generic       1.00      1.00      1.00      3867
        N

In [9]:
test_accuracy = metrics.accuracy_score(Y_test, model.predict(X_test))
test_confusion_matrix = metrics.confusion_matrix(Y_test, model.predict(X_test))
classification = metrics.classification_report(Y_test, model.predict(X_test))
print()
print('============================== Decision Tree Classifier Model Test Results ==============================')
print()
print ("Model Accuracy:" "\n", accuracy)
print()
print("Confusion matrix:" "\n", confusion_matrix)
print()
print("Classification report:" "\n", classification) 
print()        



Model Accuracy:
 0.8758236419396958

Confusion matrix:
 [[   56    96    98   182   111     0     0     6     0     0]
 [   81    29    70   175    94     7     0    10     1     0]
 [  137    88  1086  1472   255    86     0   164    33     1]
 [  190   170  1422  5988   519   174     0   332    83    23]
 [  112   106   255   480  3718    37     0    44    60     2]
 [    0     4    67   153    52 14712     0     5    10     1]
 [    0     0     0     0     0     0 29695     0     0     0]
 [    3     5   153   277    42     3     0  2275    25     0]
 [    0     2    47    50    30    18     0    21   127     1]
 [    0     0     7    22     3     1     0     0     1     1]]

Classification report:
                 precision    recall  f1-score   support

      Analysis       0.10      0.10      0.10       549
      Backdoor       0.06      0.06      0.06       467
           DoS       0.34      0.33      0.33      3322
      Exploits       0.68      0.67      0.68      8901
     