In [1]:
import pandas as pd

import numpy as np

import random as rng

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping

from tqdm.keras import TqdmCallback

from keras import models
from keras.utils import to_categorical
from keras import backend as K

from keract import get_activations

from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.optimize import minimize
from pymoo.operators.sampling.rnd import FloatRandomSampling
from pymoo.operators.mutation.pm import PolynomialMutation
from pymoo.operators.crossover.sbx import SBX
from pymoo.core.problem import Problem, ElementwiseProblem

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Read in dataset file and shuffle

ton_iot = pd.read_csv("Train_Test_Network.csv")
ton_iot.head()

Unnamed: 0,ts,src_ip,src_port,dst_ip,dst_port,proto,service,duration,src_bytes,dst_bytes,...,http_response_body_len,http_status_code,http_user_agent,http_orig_mime_types,http_resp_mime_types,weird_name,weird_addl,weird_notice,label,type
0,1554198358,3.122.49.24,1883,192.168.1.152,52976,tcp,-,80549.53026,1762852,41933215,...,0,0,-,-,-,bad_TCP_checksum,-,F,0,normal
1,1554198358,192.168.1.79,47260,192.168.1.255,15600,udp,-,0.0,0,0,...,0,0,-,-,-,-,-,-,0,normal
2,1554198359,192.168.1.152,1880,192.168.1.152,51782,tcp,-,0.0,0,0,...,0,0,-,-,-,bad_TCP_checksum,-,F,0,normal
3,1554198359,192.168.1.152,34296,192.168.1.152,10502,tcp,-,0.0,0,0,...,0,0,-,-,-,-,-,-,0,normal
4,1554198362,192.168.1.152,46608,192.168.1.190,53,udp,dns,0.000549,0,298,...,0,0,-,-,-,bad_UDP_checksum,-,F,0,normal


In [3]:
# Remove unnecessary columns

ton_iot = ton_iot.drop(['ts','src_ip','src_port','dst_ip','dst_port','proto','service','dns_query',
                   'http_user_agent','http_orig_mime_types','http_resp_mime_types','weird_name',
                    'weird_addl','weird_notice','conn_state','http_trans_depth','http_method',
                    'http_uri','http_version','ssl_cipher','ssl_resumed','ssl_established',
                    'ssl_subject','ssl_issuer','dns_AA','dns_RD','dns_RA','dns_rejected','ssl_version',
                   'label'], axis=1)
ton_iot.head()

Unnamed: 0,duration,src_bytes,dst_bytes,missed_bytes,src_pkts,src_ip_bytes,dst_pkts,dst_ip_bytes,dns_qclass,dns_qtype,dns_rcode,http_request_body_len,http_response_body_len,http_status_code,type
0,80549.53026,1762852,41933215,0,252181,14911156,2,236,0,0,0,0,0,0,normal
1,0.0,0,0,0,1,63,0,0,0,0,0,0,0,0,normal
2,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,normal
3,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,normal
4,0.000549,0,298,0,0,0,2,354,0,0,0,0,0,0,normal


In [4]:
ton_iot['type'] = ton_iot['type'].replace(['normal', 'scanning', 'dos', 'injection', 'ddos', 'password', 
                                               'xss', 'ransomware', 'backdoor', 'mitm'],[0,1,2,3,4,5,6,7,8,9])

y = ton_iot.type.values

ton_iot.drop("type", axis=1, inplace=True)

x = ton_iot.values

# scaler = MinMaxScaler()

# x = scaler.fit_transform(x)

y = to_categorical(y)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=16)

In [6]:
es = EarlyStopping(monitor='val_loss', 
                       patience=50, 
                       min_delta=0.00001, 
                       mode='min',
                       verbose=1,
                       restore_best_weights=True)

model = keras.Sequential()
model.add(layers.Dense(12, input_shape=(14,), activation='relu'))
model.add(layers.Dense(48, activation='tanh'))
model.add(layers.Dense(48, activation='relu'))
model.add(layers.Dense(48, activation='tanh'))
model.add(layers.Dense(48, activation='relu'))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(x_train, y_train, epochs=20000, batch_size=1000, verbose=0,
         callbacks=[TqdmCallback(verbose=1), es], validation_split=0.33)
# model.fit(x_train, y_train, epochs=10000, batch_size=1000, verbose=0,
  #        callbacks=[TqdmCallback(verbose=1)], validation_split=0.33)

  0%|          | 68/20000 [02:13<10:31:47,  1.90s/epoch, loss=0.283, accuracy=0.894, val_loss=0.257, val_accuracy=0.898]

KeyboardInterrupt: 

In [8]:
y_pred = model.predict(x_test)

cm = confusion_matrix(y_test.argmax(axis=1), y_pred.argmax(axis=1))

print(cm)

[[57702    17    14   465     6   601   494   816    13    15]
 [    5  3975    14     7     0     1     0     0     0     5]
 [   38    38  3851    22     0     1     4     0     0    18]
 [ 1281    16     1  2487    27    13    66     2     2     4]
 [  202     2     7   111  3536    22    88     6     0     0]
 [  364     0     0   177     1  3330    41     0     0    18]
 [  193     0    20   193     7     1  3558    31     5     0]
 [  509     8     0     0     0     0   704  2830     0     0]
 [    9     0     0     0     0     0     0     0  4004     0]
 [   76     0     7    62     6     8     9     0     0    43]]


In [None]:
# Normalize the numeric values and one hot encoding non numeric fields.

numeric_inputs = []
non_numeric_inputs = []

for name, val in ton_iot.items():
    dtype = val.dtype
    if dtype == object:
        non_numeric_inputs.append(name)
    else:
        numeric_inputs.append(name)
    continue

scaler = MinMaxScaler()

ton_iot[numeric_inputs] = scaler.fit_transform(ton_iot[numeric_inputs])

encoder = LabelEncoder()
for input in non_numeric_inputs:
    ton_iot[input] = encoder.fit_transform(ton_iot[input])

# ton_iot = pd.get_dummies(ton_iot, columns=non_numeric_inputs)

In [None]:
# Splitting the dataset into train and test datasets

x_train, x_test, y_train, y_test = train_test_split(ton_iot, labels, test_size=0.2)
x_train, x_test, y_train, y_test = x_train.to_numpy(), x_test.to_numpy(), y_train.to_numpy(), y_test.to_numpy()

len(x_train)

In [None]:
# Build the model

model = keras.Sequential(
    [
        layers.Embedding(output_dim=100, input_dim=100, mask_zero=True),
        layers.Bidirectional(layers.LSTM(50, return_sequences=True)),
        layers.Dropout(0.2),
        layers.Bidirectional(layers.LSTM(50, activation="relu", return_sequences=False)),
        layers.Dropout(0.2),
        layers.Dense(50, activation="linear"),
        layers.Dropout(0.2),
        layers.Dense(1, activation="softmax")
    ]
)

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model

# model.fit(x=x_train, y=y_train, epochs=1)

In [None]:
# Save the model
# model.save("temp_model")

# Load model from save
model = keras.models.load_model("temp_model")

In [None]:
# Evaluate the model

# score = model.evaluate(x_test, y_test, verbose=1)

In [None]:
# Create and test the function for neuron activation coverage
def Neuron_Activation_Coverage(model, input):
    
    if len(input.shape) < 2:
        input = np.expand_dims(input, axis=0)

    activations = get_activations(model, input, auto_compile=True)
    del activations["embedding_input"]
    
    total_nodes = 0
    non_zeros = 0
    for value in activations.values():
        total_nodes += value.size
        non_zeros += np.count_nonzero(value)

    return non_zeros / total_nodes

nac = Neuron_Activation_Coverage(model, x_test[0])

print("NAC: ", nac)

In [None]:
###### NAC Alternative

# inp = x_test[1]
# inp = np.expand_dims(inp, axis=0)

# from tensorflow.keras import backend as K
# nodes = 0
# non_zero_nodes = 0
# for layerIndex, layer in enumerate(model.layers):
#     func = K.function([model.get_layer(index=0).input], layer.output)
#     layerOutput = func([[inp]])  # input_data is a numpy array
#     nodes += layerOutput.size
#     non_zero_nodes += np.count_nonzero(layerOutput)

# neuronac = non_zero_nodes / nodes

# print(neuronac)

In [None]:
###### NAC Alternative 2

# test_input = x_test[1]
# test_input = np.expand_dims(test_input, axis=0)


# nodes = 0
# non_zero_nodes = 0

# for n in range(0, len(model.layers)):
#     get_nth_layer_output = K.function([model.layers[0].input], [model.layers[n].output])
#     layer_output = get_nth_layer_output([test_input])[0]
#     nodes += layer_output.size

#     non_zero_nodes += np.count_nonzero(layer_output)


# neuron_coverage = non_zero_nodes / nodes

# print("Neuron coverage:", neuron_coverage)

In [None]:
# Problem class for NSGA
class NCMax(ElementwiseProblem):
    def __init__(self, input):
        super().__init__(
            n_var=38, n_obj=1, n_constr=1, xl=-1.0, xu=1.0)
        self.input = input
    
    def _evaluate(self, x, out, *args, **kwargs):
        perturbed_input = self.input + x
        nc = Neuron_Activation_Coverage(model, perturbed_input)
        ret_val = 1.0 / nc
        constr = x.max() - 0.2
        out["F"] = ret_val
        out["G"] = constr

alg = NSGA2(pop_size = 100, offsprings=30,
            sampling=FloatRandomSampling(),
            crossovers=SBX(),
            mutation=PolynomialMutation(),)


In [None]:
challenging_inputs = np.zeros(shape=(92209, 38))
random_indexes = rng.sample(range(0, 92209), 100)

# Applying NSGA to input(s)
for i in range(0, 10):
    problem = NCMax(x_test[random_indexes[i]])
    
    res = minimize(problem, alg, ("n_gen", 5))
    noise = res.X
    if res.X.size > 38:                             # Sometimes the result object contains more than one element, if that's the case we simply pick one of them.
        noise = np.reshape(res.X[0], (38))
    else:
        noise = np.reshape(res.X, (38))

    challenging_input = x_test[random_indexes[i]] + noise
    challenging_inputs[i] = challenging_input
    
x_test_nacs = []
challenging_inputs_nacs = []
absolute_increase = []
relative_increase = []

# Calculates NAC for image(s) pre and post nsga
for i in range(0, 10):
    pre_NSGA = Neuron_Activation_Coverage(model, x_test[random_indexes[i]])
    post_NSGA = Neuron_Activation_Coverage(model, challenging_inputs[i])
    x_test_nacs.append(pre_NSGA)
    challenging_inputs_nacs.append(post_NSGA)
    absolute_increase.append(post_NSGA - pre_NSGA)
    relative_increase.append((post_NSGA - pre_NSGA) / pre_NSGA) 

In [None]:
# Converts data to a more readable format
dataset = {
    "Normal Inputs": x_test_nacs,
    "Challenging Inputs": challenging_inputs_nacs,
    "Absolute Increase": absolute_increase,
    "Relative Increase": relative_increase
}

table = pd.DataFrame(dataset)
print(table)
# table.index += 1

# table.to_excel("temp.xlsx")