# Intrusion Detection System

* Objective: Prediction of intrusion/no intrusion using data generated from:
    * Spider 3D
    * POWDER experiments


# Imports

In [1]:
import pandas as pd
from datetime import date
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 

import os
import sys
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
import joblib


from sklearn.metrics import classification_report

from sklearn import metrics as metrics


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 500)
pd.options.display.float_format = '{:20,.4f}'.format
pd.options.display.max_colwidth=500
np.set_printoptions(threshold=sys.maxsize)

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-7k8c3e1f because the default path (/home/qs/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


# Get Data

## SPIDER 3D

In [2]:
# read in SPIDER 3D data and aggregate into one csv file 
# distinguish run with model column
traffic_predictions_SPIDER=pd.DataFrame()
for i in np.arange(0,23,1):
    data=pd.read_csv('model'+str(i)+'.csv')
    data['model']=i
    traffic_predictions_SPIDER=pd.concat([traffic_predictions_SPIDER,data])
traffic_predictions_SPIDER=traffic_predictions_SPIDER.reset_index(drop=True)

# clean up column names and drop null column
traffic_predictions_SPIDER.columns = traffic_predictions_SPIDER.columns.str.replace(' ', '')
traffic_predictions_SPIDER=traffic_predictions_SPIDER.drop(columns=['SubflowFwdBytes'])
traffic_predictions_SPIDER=traffic_predictions_SPIDER.rename(columns={'SubflowFwdByutes':'SubflowFwdBytes'})

# rearrange for test
traffic_predictions_SPIDER=traffic_predictions_SPIDER[['model',
                                                         'FlowBytes/s',
                                                        'BWDPacketLengthStd',
                                                        'DestinationPort',
                                                        'SubflowFwdBytes',
                                                        'TotalLengthofFwdPackets',
                                                        'Init_Win_bytes_forward',
                                                        'act_data_pkt_fwd',
                                                        'FwdIATMin',
                                                        'BwdPackets/s',
                                                        'AveragePacketSize' ]]
# pull out traffic features for prediction
traffic_SPIDER=np.array(traffic_predictions_SPIDER.iloc[:,1:])


# set up dataframe to place predicted value
traffic_predictions_SPIDER['RF_prediction_value']=np.nan
traffic_predictions_SPIDER['CLF_prediction_value']=np.nan

traffic_predictions_SPIDER['RF_prediction']=np.nan
traffic_predictions_SPIDER['CLF_prediction']=np.nan

## POWDER

In [3]:
traffic_predictions_POWDER=pd.DataFrame()
for i in np.arange(23,31,1):
    data=pd.read_csv('model'+str(i)+'.csv')
    data['model']=i
    traffic_predictions_POWDER=pd.concat([traffic_predictions_POWDER,data])
traffic_predictions_POWDER=traffic_predictions_POWDER.reset_index(drop=True)

traffic_predictions_POWDER.columns = traffic_predictions_POWDER.columns.str.replace(' ', '')

traffic_predictions_POWDER=traffic_predictions_POWDER.fillna(0)
traffic_predictions_POWDER=traffic_predictions_POWDER[['model',
                                                        'FlowBytes/s',
                                                        'BWDPacketLengthStd',
                                                        'DestinationPort',
                                                        'SubflowFwdBytes',
                                                        'TotalLengthofFwdPackets',
                                                        'Init_Win_bytes_forward',
                                                        'act_data_pkt_fwd',
                                                        'FwdIATMin',
                                                        'BwdPackets/s',
                                                        'AveragePacketSize' ]]


# pull out traffic features for prediction
traffic_POWDER=np.array(traffic_predictions_POWDER.iloc[:,1:])
scaler =joblib.load('scaler.pkl')
scaler = StandardScaler()
traffic_POWDER = scaler.fit_transform(traffic_POWDER)



# set up dataframe to place predicted value
traffic_predictions_POWDER['RF_prediction_value']=np.nan
traffic_predictions_POWDER['CLF_prediction_value']=np.nan

traffic_predictions_POWDER['RF_prediction']=np.nan
traffic_predictions_POWDER['CLF_prediction']=np.nan

# Load Models and Predict

In [4]:
rf_model = joblib.load('RF_intrusiondetection.pkl')
clf_model = joblib.load('clf_DecisionTree.pkl')

## SPIDER 3D PREDICTIONS

In [5]:
rf_predictions_SPIDER=rf_model.predict(traffic_SPIDER)
clf_predictions_SPIDER=clf_model.predict(traffic_SPIDER)

In [6]:
# insert predictions into df
for i in np.arange(0,len(traffic_SPIDER),1):
    traffic_predictions_SPIDER.loc[traffic_predictions_SPIDER.index == i, 'RF_prediction_value']=rf_predictions_SPIDER[i]
    traffic_predictions_SPIDER.loc[traffic_predictions_SPIDER.index == i, 'CLF_prediction_value']=clf_predictions_SPIDER[i]
    
    
traffic_predictions_SPIDER.loc[traffic_predictions_SPIDER.RF_prediction_value >=0.5, 'RF_prediction']='Intrusion Detected'
traffic_predictions_SPIDER.loc[traffic_predictions_SPIDER.RF_prediction_value <0.5, 'RF_prediction']='No Intrusion Detected'

traffic_predictions_SPIDER.loc[traffic_predictions_SPIDER.CLF_prediction_value >=0.5, 'CLF_prediction']='Intrusion Detected'
traffic_predictions_SPIDER.loc[traffic_predictions_SPIDER.CLF_prediction_value <0.5, 'CLF_prediction']='No Intrusion Detected'

In [7]:
# Look at ten predictions
traffic_predictions_SPIDER[[ 'FlowBytes/s',
                            'BWDPacketLengthStd',
                            'DestinationPort',
                            'SubflowFwdBytes',
                            'TotalLengthofFwdPackets',
                            'Init_Win_bytes_forward',
                            'act_data_pkt_fwd',
                            'FwdIATMin',
                            'BwdPackets/s',
                            'AveragePacketSize',
                            'RF_prediction_value',
                            'RF_prediction',
                            'CLF_prediction_value',
                            'CLF_prediction']].head(10)

Unnamed: 0,FlowBytes/s,BWDPacketLengthStd,DestinationPort,SubflowFwdBytes,TotalLengthofFwdPackets,Init_Win_bytes_forward,act_data_pkt_fwd,FwdIATMin,BwdPackets/s,AveragePacketSize,RF_prediction_value,RF_prediction,CLF_prediction_value,CLF_prediction
0,-125.0,-1.0,80,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,No Intrusion Detected,0.0,No Intrusion Detected
1,-125.0,-1.0,80,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,No Intrusion Detected,0.0,No Intrusion Detected
2,-125.0,-1.0,80,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,No Intrusion Detected,0.0,No Intrusion Detected
3,-125.0,-1.0,80,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,No Intrusion Detected,0.0,No Intrusion Detected
4,996.0,0.1683,80,0.1683,0.1683,0.0112,15.0,0.0,695.0,0.0112,0.0,No Intrusion Detected,0.0,No Intrusion Detected
5,672.0,0.037,80,0.037,0.037,0.0046,8.0,0.0,1155.0,0.0046,0.0,No Intrusion Detected,0.0,No Intrusion Detected
6,756.0,0.0462,80,0.0462,0.0462,0.0051,9.0,0.0,1169.0,0.0051,0.0,No Intrusion Detected,0.0,No Intrusion Detected
7,840.0,0.0567,80,0.0567,0.0567,0.0057,10.0,0.0,1173.0,0.0057,0.0,No Intrusion Detected,0.0,No Intrusion Detected
8,588.0,0.0281,80,0.0281,0.0281,0.004,7.0,0.0,1164.0,0.004,0.0,No Intrusion Detected,0.0,No Intrusion Detected
9,588.0,0.0281,80,0.0281,0.0281,0.004,7.0,0.0,1160.0,0.004,0.0,No Intrusion Detected,0.0,No Intrusion Detected


In [10]:
SPIDER_MODEL_PREDICTIONS=traffic_predictions_SPIDER.groupby('model')['RF_prediction'].unique().reset_index().merge(traffic_predictions_SPIDER.groupby('model')['CLF_prediction'].unique().reset_index())
SPIDER_MODEL_PREDICTIONS



Unnamed: 0,model,RF_prediction,CLF_prediction
0,0,[No Intrusion Detected],[No Intrusion Detected]
1,1,[No Intrusion Detected],[No Intrusion Detected]
2,2,[No Intrusion Detected],[No Intrusion Detected]
3,3,[No Intrusion Detected],[No Intrusion Detected]
4,4,[No Intrusion Detected],[No Intrusion Detected]
5,5,[No Intrusion Detected],[No Intrusion Detected]
6,6,[No Intrusion Detected],"[No Intrusion Detected, Intrusion Detected]"
7,7,[No Intrusion Detected],[No Intrusion Detected]
8,8,[No Intrusion Detected],[No Intrusion Detected]
9,9,[No Intrusion Detected],[No Intrusion Detected]


In [11]:
traffic_predictions_SPIDER.groupby(['model','CLF_prediction'])['CLF_prediction'].count()

model  CLF_prediction       
0      No Intrusion Detected    304
1      No Intrusion Detected    304
2      No Intrusion Detected    304
3      No Intrusion Detected    304
4      No Intrusion Detected    304
5      No Intrusion Detected    304
6      Intrusion Detected         1
       No Intrusion Detected    303
7      No Intrusion Detected    304
8      No Intrusion Detected    304
9      No Intrusion Detected    304
10     No Intrusion Detected    304
11     Intrusion Detected         1
       No Intrusion Detected    303
12     No Intrusion Detected    304
13     Intrusion Detected         1
       No Intrusion Detected    303
14     Intrusion Detected         1
       No Intrusion Detected    303
15     No Intrusion Detected    304
16     No Intrusion Detected    304
17     No Intrusion Detected    304
18     No Intrusion Detected    304
19     Intrusion Detected         1
       No Intrusion Detected    303
20     No Intrusion Detected    304
21     No Intrusion Detected    304

In [12]:
traffic_predictions_SPIDER.to_csv('SPIDER_PREDICTIONS_all.csv')

## POWDER PREDICTIONS

In [26]:
traffic_POWDER[0].reshape(1, -1)

array([[-0.29717268, -0.28972531,  0.        , -0.28972531, -0.28972531,
        -0.28644092, -0.24824588,  3.48510343, -0.28644092, -0.28644092]])

In [27]:
rf_model.predict(traffic_POWDER[0].reshape(1, -1))

array([0.9])

In [8]:
rf_predictions_POWDER=rf_model.predict(traffic_POWDER)
clf_predictions_POWDER=clf_model.predict(traffic_POWDER)

In [9]:
rf_predictions_POWDER

array([0.9, 0.9, 0.9, 0.1, 0. , 0. , 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
       0.1, 0.1, 0.1, 0.1, 0. , 0. , 0. , 0. , 0. , 0. , 0.1, 0.1, 0.1,
       0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
       0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
       0.1, 0.1, 0.1, 0.1, 0. , 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9,
       0.9, 0.9, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ,
       0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.

In [10]:
# insert predictions into df
for i in np.arange(0,len(traffic_POWDER),1):
    traffic_predictions_POWDER.loc[traffic_predictions_POWDER.index == i, 'RF_prediction_value']=rf_predictions_POWDER[i]
    traffic_predictions_POWDER.loc[traffic_predictions_POWDER.index == i, 'CLF_prediction_value']=clf_predictions_POWDER[i]
    
    
traffic_predictions_POWDER.loc[traffic_predictions_POWDER.RF_prediction_value >=0.5, 'RF_prediction']='Intrusion Detected'
traffic_predictions_POWDER.loc[traffic_predictions_POWDER.RF_prediction_value <0.5, 'RF_prediction']='No Intrusion Detected'

traffic_predictions_POWDER.loc[traffic_predictions_POWDER.CLF_prediction_value >=0.5, 'CLF_prediction']='Intrusion Detected'
traffic_predictions_POWDER.loc[traffic_predictions_POWDER.CLF_prediction_value <0.5, 'CLF_prediction']='No Intrusion Detected'

In [11]:
# Look at ten predictions
traffic_predictions_POWDER[[ 'FlowBytes/s',
                            'BWDPacketLengthStd',
                            'DestinationPort',
                            'SubflowFwdBytes',
                            'TotalLengthofFwdPackets',
                            'Init_Win_bytes_forward',
                            'act_data_pkt_fwd',
                            'FwdIATMin',
                            'BwdPackets/s',
                            'AveragePacketSize',
                            'RF_prediction_value',
                            'RF_prediction',
                            'CLF_prediction_value',
                            'CLF_prediction']].head(10)

Unnamed: 0,FlowBytes/s,BWDPacketLengthStd,DestinationPort,SubflowFwdBytes,TotalLengthofFwdPackets,Init_Win_bytes_forward,act_data_pkt_fwd,FwdIATMin,BwdPackets/s,AveragePacketSize,RF_prediction_value,RF_prediction,CLF_prediction_value,CLF_prediction
0,0.0,0.0,80,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.9,Intrusion Detected,1.0,Intrusion Detected
1,156.0,0.624,80,0.624,0.624,0.624,1.0,1.0,0.624,0.624,0.9,Intrusion Detected,1.0,Intrusion Detected
2,52.0,0.416,80,0.416,0.416,0.416,1.0,1.0,0.416,0.416,0.9,Intrusion Detected,1.0,Intrusion Detected
3,122.0,0.6507,80,0.6507,0.6507,0.3253,2.0,0.0,0.3253,0.3253,0.1,No Intrusion Detected,0.0,No Intrusion Detected
4,514452.0,2072.093,80,2072.093,2072.093,0.3861,5367.0,0.0,0.3861,0.3861,0.0,No Intrusion Detected,0.0,No Intrusion Detected
5,588518.0,2353.8831,80,2353.8831,2353.8831,0.3777,6232.0,0.0,0.3777,0.3777,0.0,No Intrusion Detected,0.0,No Intrusion Detected
6,2128782.0,9354.21,80,9354.21,9354.21,1.2431,7525.0,0.0,1.2431,1.2431,0.1,No Intrusion Detected,1.0,Intrusion Detected
7,2255188.0,10018.4238,80,10018.4238,10018.4238,1.3033,7687.0,0.0,1.3033,1.3033,0.1,No Intrusion Detected,1.0,Intrusion Detected
8,2350163.0,10484.4692,80,10484.4692,10484.4692,1.3763,7618.0,0.0,1.3763,1.3763,0.1,No Intrusion Detected,1.0,Intrusion Detected
9,2145422.0,9488.6027,80,9488.6027,9488.6027,1.2711,7465.0,0.0,1.2711,1.2711,0.1,No Intrusion Detected,1.0,Intrusion Detected


In [85]:
traffic_predictions_POWDER.to_csv('POWDER_PREDICTIONS.csv')