In [1]:
import tensorflow as tf
import time

In [2]:
import pandas as pd
import seaborn as sns
import pickle
from sklearn import preprocessing
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder,StandardScaler,MinMaxScaler,LabelEncoder
import numpy as np

In [3]:
numeric_features = ["duration","orig_bytes","resp_bytes", "missed_bytes","local_orig","local_resp",
                    "orig_pkts","orig_ip_bytes","resp_pkts","resp_ip_bytes"]

onehotlists = ["proto","service",'conn_state','history',"tunnel_parents"]

In [4]:
with open(r"f17_pipeline.pkl", "rb") as input_file:
    F17_preprocessor = pickle.load(input_file)
    input_file.close()

In [5]:
def detailed_label_converter (x):
    if x == '-':
        return 0
    elif  x == "PartOfAHorizontalPortScan":
        return 1
    elif x == "DDoS":
        return 2
    else:
        return 3
    
def binary_label_converter (x):
    if str(x).lower() == 'benign':
        return 0
    else:
        return 1
    
def insertnumbers (x):
    if ((x == '-') | (x == '(empty)')):
        return 99
    else:
        return x

In [6]:
def matrix_to3D(X_train, X_test):
    dim1 = X_train.shape[1]
    divs = [i for i in range(1,dim1+1) if (dim1%i == 0)]
    if len(divs) == 2: # i.e. prime number
        # Add zeros column
        X_train = np.concatenate((X_train, np.zeros((X_train.shape[0],1))), axis=1)
        X_test = np.concatenate((X_test, np.zeros((X_test.shape[0],1))), axis=1)
        dim1 = X_train.shape[1]
        divs = [i for i in range(1,dim1+1) if (dim1%i == 0)]        
    mid_idx = len(divs)//2

    return X_train.reshape(-1, divs[mid_idx], int(dim1/divs[mid_idx]), 1), X_test.reshape(-1, divs[mid_idx], int(dim1/divs[mid_idx]), 1)

In [7]:
ANN_model = tf.keras.models.load_model('ANN_Binary_3')

In [8]:
CNN_model = tf.keras.models.load_model('CNN_Binary_3')

In [9]:
CNN2D_model = tf.keras.models.load_model('CNN2D_Binary_3')

In [10]:
LSTM_model = tf.keras.models.load_model('LSTM_Binary_3')

In [11]:
CNN_LSTM_model = tf.keras.models.load_model('CNN_LSTM_Binary_3')

In [12]:
# Load in data for inferences
#inference_df = pd.read_csv("./IoT23_Dataset/original/datasets/3_data_v2/S04_R_5_000_000.csv")
df1 = pd.read_csv('./IoT23_Dataset/original/datasets/3_data_v2/S04_R_1_000_IoT0.csv')
#df2 = pd.read_csv('./IoT23_Dataset/original/datasets/3_data_v2/S04_R_1_000_IoT1.csv')
#df3 = pd.read_csv('./IoT23_Dataset/original/datasets/3_data_v2/S04_R_1_000_IoT2.csv')

In [13]:
df1.drop(columns=['ts', 'uid', 'id.orig_h', 'id.orig_p','id.resp_h','id.resp_p', 'detailed-label'], inplace=True)

In [14]:
df1

Unnamed: 0,proto,service,duration,orig_bytes,resp_bytes,conn_state,local_orig,local_resp,missed_bytes,history,orig_pkts,orig_ip_bytes,resp_pkts,resp_ip_bytes,tunnel_parents,label
0,tcp,-,-,-,-,S0,-,-,0,S,1,40,0,0,-,Malicious
1,tcp,-,-,-,-,S0,-,-,0,S,1,40,0,0,-,Malicious
2,tcp,-,0.000002,0,0,S0,-,-,0,S,2,80,0,0,-,Malicious
3,tcp,-,0.000005,0,0,S0,-,-,0,S,2,80,0,0,-,Malicious
4,tcp,-,-,-,-,OTH,-,-,0,C,0,0,0,0,-,Malicious
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,tcp,-,-,-,-,S0,-,-,0,S,1,40,0,0,-,Malicious
3996,tcp,-,-,-,-,S0,-,-,0,S,1,60,0,0,-,Benign
3997,tcp,-,-,-,-,S0,-,-,0,S,1,40,0,0,-,Benign
3998,tcp,-,-,-,-,S0,-,-,0,S,1,40,0,0,-,Benign


In [15]:
#df1, df2 = train_test_split(inference_df, test_size=0.6)

In [16]:
def do_preprocessing(data):
    temp = data
    temp[["duration","orig_bytes","resp_bytes", "missed_bytes","local_orig","local_resp",
                    "orig_pkts","orig_ip_bytes","resp_pkts","resp_ip_bytes"]] = temp[["duration","orig_bytes","resp_bytes", "missed_bytes","local_orig","local_resp",
                    "orig_pkts","orig_ip_bytes","resp_pkts","resp_ip_bytes"]].applymap(insertnumbers)
    
    temp['label'] = temp['label'].map(binary_label_converter)
    labels = temp.pop('label').values
    X_test = F17_preprocessor.transform(temp)
    
    return X_test, labels

In [None]:
total_time = 0
for i in range(30):
    df1 = pd.read_csv('./IoT23_Dataset/original/datasets/3_data_v2/S04_R_1_000_IoT0.csv')
    df1.drop(columns=['ts', 'uid', 'id.orig_h', 'id.orig_p','id.resp_h','id.resp_p', 'detailed-label'], inplace=True)
    start_time = time.time_ns()
    X_test, y_test = do_preprocessing(df1)
    end_time = time.time_ns()
    total_time += (end_time - start_time)/1000000
print(total_time/30)

AVG: ~27ms for 4000 rows

In [18]:
len(X_test[0])

206

In [19]:
len(X_test)

4000

In [None]:
# ANN
#start_time_1 = time.time_ns()
#X_test = scaler.fit_transform(inference_df.values)

total_time = 0

if len(X_test.shape) < 3:
    X_test_1D = X_test.reshape(-1, X_test.shape[1], 1)
else:
    X_test_1D = X_test
#end_time_1 = time.time_ns()
#print("Preprocessing time: "+str(int((end_time_1 - start_time_1)/1000000))+"ms")

for i in range(50):
    start_time = time.time_ns()
    ypred = ANN_model.predict(X_test_1D)
    end_time = time.time_ns()
    total_time += (end_time - start_time)/1000000

avg_time = total_time/50
print("Average prediction time: "+str(avg_time)+"ms")

In [26]:
count = 0
for i in range(len(ypred)):
    if np.argmax(ypred[i]) == y_test[i]:
        count += 1

acc = float(count)/len(ypred)
print(acc)

0.591


In [None]:
# CNN 1D
total_time = 0

if len(X_test.shape) < 3:
    X_test_1D = X_test.reshape(-1, X_test.shape[1], 1)
else:
    X_test_1D = X_test

for i in range(50):
    start_time = time.time_ns()
    ypred = CNN_model.predict(X_test_1D)
    end_time = time.time_ns()
    total_time += (end_time - start_time)/1000000
    
avg_time = total_time/50
print("Average prediction time: "+str(avg_time)+"ms")

In [30]:
count = 0
for i in range(len(ypred)):
    if np.argmax(ypred[i]) == y_test[i]:
        count += 1

acc = float(count)/len(ypred)
print(acc)

0.595


In [32]:
X_temp = X_test

In [None]:
# CNN 2D
total_time = 0

X_temp_2D, X_test_2D = matrix_to3D(X_temp, X_test)
#X_test_CNN2D = X_test_2D.reshape(-1, X_test_2D.shape[1], X_test_2D.shape[2])

for i in range(50):
    start_time = time.time_ns()
    ypred = CNN2D_model.predict(X_test_2D)
    end_time = time.time_ns()
    total_time += (end_time - start_time)/1000000
    
avg_time = total_time/50
print("Average prediction time: "+str(avg_time)+"ms")

In [34]:
count = 0
for i in range(len(ypred)):
    if np.argmax(ypred[i]) == y_test[i]:
        count += 1

acc = float(count)/len(ypred)
print(acc)

0.59575


In [None]:
# LSTM
total_time = 0

X_temp_2D, X_test_2D = matrix_to3D(X_temp, X_test)
X_test_LSTM = X_test_2D.reshape(-1, X_test_2D.shape[1], X_test_2D.shape[2])

for i in range(50):
    start_time = time.time_ns()
    ypred = LSTM_model.predict(X_test_LSTM)
    end_time = time.time_ns()
    total_time += (end_time - start_time)/1000000

avg_time = total_time/50
print("Average prediction time: "+str(avg_time)+"ms")

In [39]:
count = 0
for i in range(len(ypred)):
    if np.argmax(ypred[i]) == y_test[i]:
        count += 1

acc = float(count)/len(ypred)
print(acc)

0.75


In [None]:
# CNN+LSTM
total_time = 0

if len(X_test.shape) < 3:
    X_test_1D = X_test.reshape(-1,X_test.shape[1],1)
else:
    X_test_1D = X_test

for i in range(50):
    start_time = time.time_ns()
    ypred = CNN_LSTM_model.predict(X_test)
    end_time = time.time_ns()
    total_time += (end_time - start_time)/1000000

avg_time = total_time/50
print("Average prediction time: "+str(avg_time)+"ms")

In [243]:
count = 0
for i in range(len(ypred)):
    if np.argmax(ypred[i]) == y_test[i]:
        count += 1

acc = float(count)/len(ypred)
print(acc)

0.47925
