In [1]:
import pandas as pd

import numpy as np

import random as rng

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping

from tqdm.keras import TqdmCallback

from keras import models
from keras.utils import to_categorical
from keras import backend as K

from keract import get_activations

from sklearn.preprocessing import MinMaxScaler, LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.optimize import minimize
from pymoo.operators.sampling.rnd import FloatRandomSampling
from pymoo.operators.mutation.pm import PolynomialMutation
from pymoo.operators.crossover.sbx import SBX
from pymoo.operators.crossover.pntx import TwoPointCrossover
from pymoo.core.problem import Problem, ElementwiseProblem

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Read in dataset file and shuffle

ton_iot = pd.read_csv("Data/ToN_IoT.csv")
ton_iot.head()

Unnamed: 0,ts,src_ip,src_port,dst_ip,dst_port,proto,service,duration,src_bytes,dst_bytes,...,http_response_body_len,http_status_code,http_user_agent,http_orig_mime_types,http_resp_mime_types,weird_name,weird_addl,weird_notice,label,type
0,1554198358,3.122.49.24,1883,192.168.1.152,52976,tcp,-,80549.53026,1762852,41933215,...,0,0,-,-,-,bad_TCP_checksum,-,F,0,normal
1,1554198358,192.168.1.79,47260,192.168.1.255,15600,udp,-,0.0,0,0,...,0,0,-,-,-,-,-,-,0,normal
2,1554198359,192.168.1.152,1880,192.168.1.152,51782,tcp,-,0.0,0,0,...,0,0,-,-,-,bad_TCP_checksum,-,F,0,normal
3,1554198359,192.168.1.152,34296,192.168.1.152,10502,tcp,-,0.0,0,0,...,0,0,-,-,-,-,-,-,0,normal
4,1554198362,192.168.1.152,46608,192.168.1.190,53,udp,dns,0.000549,0,298,...,0,0,-,-,-,bad_UDP_checksum,-,F,0,normal


In [3]:
# Remove unnecessary columns

ton_iot = ton_iot.drop(['ts','src_ip','src_port','dst_ip','dst_port','proto','service','dns_query',
                   'http_user_agent','http_orig_mime_types','http_resp_mime_types','weird_name',
                    'weird_addl','weird_notice','conn_state','http_trans_depth','http_method',
                    'http_uri','http_version','ssl_cipher','ssl_resumed','ssl_established',
                    'ssl_subject','ssl_issuer','dns_AA','dns_RD','dns_RA','dns_rejected','ssl_version',
                   'label'], axis=1)
ton_iot.head()

Unnamed: 0,duration,src_bytes,dst_bytes,missed_bytes,src_pkts,src_ip_bytes,dst_pkts,dst_ip_bytes,dns_qclass,dns_qtype,dns_rcode,http_request_body_len,http_response_body_len,http_status_code,type
0,80549.53026,1762852,41933215,0,252181,14911156,2,236,0,0,0,0,0,0,normal
1,0.0,0,0,0,1,63,0,0,0,0,0,0,0,0,normal
2,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,normal
3,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,normal
4,0.000549,0,298,0,0,0,2,354,0,0,0,0,0,0,normal


In [4]:
ton_iot['type'] = ton_iot['type'].replace(['normal', 'scanning', 'dos', 'injection', 'ddos', 'password', 
                                               'xss', 'ransomware', 'backdoor', 'mitm'],[0,1,2,3,4,5,6,7,8,9])

y = ton_iot.type.values

ton_iot.drop("type", axis=1, inplace=True)

x = ton_iot.values

# scaler = MinMaxScaler()

# x = scaler.fit_transform(x)

y = to_categorical(y)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=20)

In [5]:
es = EarlyStopping(monitor='val_loss',
                       patience=2500, 
                       min_delta=0.0001, 
                       mode='min',
                       verbose=1,
                       restore_best_weights=True)

model = keras.Sequential()
model.add(layers.Dense(12, input_shape=(14,), activation='relu'))
model.add(layers.Dense(100, activation='relu'))
model.add(layers.Dense(50, activation='relu'))
model.add(layers.Dense(100, activation='relu'))
model.add(layers.Dense(50, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(learning_rate=0.0001), metrics=['accuracy'])

model.fit(x_train, y_train, epochs=20000, batch_size=128, verbose=0,
         callbacks=[TqdmCallback(verbose=1), es], validation_split=0.33)

  9%|▊         | 1731/20000 [1:25:05<20:10:27,  3.98s/epoch, loss=0.179, accuracy=0.946, val_loss=0.213, val_accuracy=0.956]

In [5]:
# Save the model
# model.save("final_model")

# Load model from save
model = keras.models.load_model("final_model")

In [27]:
y_pred = model.predict(x_test)

cm = confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))

print(cm)

[[58808     2     5    24    10    47   468   553     0     3]
 [    8  3918     5     2     0     0     5     1     0     9]
 [   23    55  3934     9     0     0    18     1     0    21]
 [   78    11     1  3451    26     9   308     2     0     4]
 [  102     0     9    47  3733     9   162     7     0     3]
 [    7     0     0    10     2  3857   189     0     0    33]
 [   61     0    15    12     9     1  3851    37     1     1]
 [  718     5     0     0     0     0   660  2596     0     0]
 [    8     0     0     0     0     0    23     0  4035     0]
 [   66     0     1    13     1     0    32     0     0    74]]


In [28]:
# Evaluate the model

score = model.evaluate(x_test, y_test, verbose=1)



In [31]:
# Create and test the function for neuron activation coverage
def Neuron_Activation_Coverage(model, input):
    
    if len(input.shape) < 2:
        input = np.expand_dims(input, axis=0)

    activations = get_activations(model, input, auto_compile=True)
    del activations["dense_6_input"]
    
    total_nodes = 0
    non_zeros = 0
    for value in activations.values():
        total_nodes += value.size
        non_zeros += np.count_nonzero(value)

    return non_zeros / total_nodes

nac = Neuron_Activation_Coverage(model, x_test[0])

print("NAC: ", nac)

NAC:  0.4032258064516129


In [None]:
###### NAC Alternative

# inp = x_test[1]
# inp = np.expand_dims(inp, axis=0)

# from tensorflow.keras import backend as K
# nodes = 0
# non_zero_nodes = 0
# for layerIndex, layer in enumerate(model.layers):
#     func = K.function([model.get_layer(index=0).input], layer.output)
#     layerOutput = func([[inp]])  # input_data is a numpy array
#     nodes += layerOutput.size
#     non_zero_nodes += np.count_nonzero(layerOutput)

# neuronac = non_zero_nodes / nodes

# print(neuronac)

In [None]:
###### NAC Alternative 2

# test_input = x_test[1]
# test_input = np.expand_dims(test_input, axis=0)


# nodes = 0
# non_zero_nodes = 0

# for n in range(0, len(model.layers)):
#     get_nth_layer_output = K.function([model.layers[0].input], [model.layers[n].output])
#     layer_output = get_nth_layer_output([test_input])[0]
#     nodes += layer_output.size

#     non_zero_nodes += np.count_nonzero(layer_output)


# neuron_coverage = non_zero_nodes / nodes

# print("Neuron coverage:", neuron_coverage)

In [32]:
# Problem class for NSGA
class NCMax(ElementwiseProblem):
    def __init__(self, input):
        super().__init__(
            n_var=4, n_obj=1, n_constr=0, xl=-1000, xu=1000)
        self.input = input
    
    def _evaluate(self, x, out, *args, **kwargs):        
        perturbed_input = np.copy(self.input)
        perturbed_input[1] += x[0]
        perturbed_input[4] += x[1]
        perturbed_input[5] += x[2]
        perturbed_input[11] += x[3]
        
        nc = Neuron_Activation_Coverage(model, perturbed_input)
        ret_val = 1.0 / nc
        # constr = x.max() - 0.01
        out["F"] = ret_val
        # out["G"] = constr

alg = NSGA2(pop_size = 100, offsprings=30,
            sampling=FloatRandomSampling(),
            crossovers=SBX(),
            mutation=PolynomialMutation(),)


In [33]:
challenging_inputs = np.zeros(shape=(92209, 14))
random_indexes = rng.sample(range(0, 92209), 1000)

# Applying NSGA to input(s)
for i in range(0, 10):
    problem_input = x_test[random_indexes[i]]
    problem = NCMax(problem_input)
    
    res = minimize(problem, alg, ("n_gen", 50))

    if res.X.size > 4:                             # Sometimes the result object contains more than one element, if that's the case we simply pick one of them.
        noise = np.reshape(res.X[0], (4))
    else:
        noise = np.reshape(res.X, (4))
    
    challenging_input = np.copy(x_test[random_indexes[i]])
    challenging_input[1] += noise[0]
    challenging_input[4] += noise[1]
    challenging_input[5] += noise[2]
    challenging_input[11] += noise[3]

    challenging_inputs[i] = challenging_input
    
x_test_nacs = []
challenging_inputs_nacs = []
absolute_increase = []
relative_increase = []

# Calculates NAC for image(s) pre and post nsga
for i in range(0, 10):
    pre_NSGA = Neuron_Activation_Coverage(model, x_test[random_indexes[i]])
    post_NSGA = Neuron_Activation_Coverage(model, challenging_inputs[i])
    x_test_nacs.append(pre_NSGA)
    challenging_inputs_nacs.append(post_NSGA)
    absolute_increase.append(post_NSGA - pre_NSGA)
    relative_increase.append((post_NSGA - pre_NSGA) / pre_NSGA)

In [34]:
# Converts data to a more readable format
dataset = {
    "Normal Inputs": x_test_nacs,
    "Challenging Inputs": challenging_inputs_nacs,
    "Absolute Increase": absolute_increase,
    "Relative Increase": relative_increase
}

table = pd.DataFrame(dataset)
print(table)
# table.index += 1

# table.to_excel("temp.xlsx")

   Normal Inputs  Challenging Inputs  Absolute Increase  Relative Increase
0       0.362903            0.575269           0.212366           0.585185
1       0.362903            0.577957           0.215054           0.592593
2       0.255376            0.577957           0.322581           1.263158
3       0.190860            0.537634           0.346774           1.816901
4       0.333333            0.577957           0.244624           0.733871
5       0.276882            0.564516           0.287634           1.038835
6       0.317204            0.580645           0.263441           0.830508
7       0.330645            0.575269           0.244624           0.739837
8       0.403226            0.569892           0.166667           0.413333
9       0.362903            0.564516           0.201613           0.555556
