# Statistical Test

<img src = "https://drive.google.com/uc?id=1Cw2E09wgzQdFxPph8KrSB8WCZn71u_XC" height = "500" >


In [None]:
from __future__ import division
import numpy as np
from sys import stdout
from sklearn.metrics import pairwise_kernels
import tensorflow as tf

#
#taken from https://github.com/emanuele/kernel_two_sample_test
#

def MMD2u(K, m, n):
    """The MMD^2_u unbiased statistic.
    """
    Kx = K[:m, :m]
    Ky = K[m:, m:]
    Kxy = K[:m, m:]
    return 1.0 / (m * (m - 1.0)) * (Kx.sum() - Kx.diagonal().sum()) + \
        1.0 / (n * (n - 1.0)) * (Ky.sum() - Ky.diagonal().sum()) - \
        2.0 / (m * n) * Kxy.sum()


def compute_null_distribution(K, m, n, iterations=10000, verbose=False,
                              random_state=None, marker_interval=1000):
    """Compute the bootstrap null-distribution of MMD2u.
    """
    if type(random_state) == type(np.random.RandomState()):
        rng = random_state
    else:
        rng = np.random.RandomState(random_state)

    mmd2u_null = np.zeros(iterations)
    for i in range(iterations):
        if verbose and (i % marker_interval) == 0:
            print(i),
            stdout.flush()
        idx = rng.permutation(m+n)
        K_i = K[idx, idx[:, None]]
        mmd2u_null[i] = MMD2u(K_i, m, n)

    if verbose:
        print("")

    return mmd2u_null


def compute_null_distribution_given_permutations(K, m, n, permutation,
                                                 iterations=None):
    """Compute the bootstrap null-distribution of MMD2u given
    predefined permutations.
    Note:: verbosity is removed to improve speed.
    """
    if iterations is None:
        iterations = len(permutation)

    mmd2u_null = np.zeros(iterations)
    for i in range(iterations):
        idx = permutation[i]
        K_i = K[idx, idx[:, None]]
        mmd2u_null[i] = MMD2u(K_i, m, n)

    return mmd2u_null

def makeScaleMatrix(num_gen, num_orig):
        # first 'N' entries have '1/N', next 'M' entries have '-1/M'
        s1 =  tf.constant(1.0 / num_gen, shape = [num_gen, 1])
        s2 = -tf.constant(1.0 / num_orig, shape = [num_orig, 1])

        return tf.concat(0, [s1, s2])

def MMD(x1, x2, sess,sigma = [80]):
        x1 = tf.cast(x1, tf.float32)
        x2 = tf.cast(x2, tf.float32)
        # concatenation of the two datas
        # first 'N' rows are the generated ones, next 'M' are from the data
        X = tf.concat(0, [x1, x2])
        # dot product between all combinations of rows in 'X'
        XX = tf.matmul(X, tf.transpose(X))

        # dot product of rows with themselves
        X2 = tf.reduce_sum(X * X, 1, keep_dims = True)

        # exponent entries of the RBF kernel (without the sigma) for each
        # combination of the rows in 'X'
        # -0.5 * (x^Tx - 2*x^Ty + y^Ty)
        exponent = XX - 0.5 * X2 - 0.5 * tf.transpose(X2)

        # scaling constants for each of the rows in 'X'
        s = makeScaleMatrix(np.shape(sess.run(x1))[0], np.shape(sess.run(x2))[0])
        # scaling factors of each of the kernel values, corresponding to the
        # exponent values
        S = tf.matmul(s, tf.transpose(s))
        S = tf.cast(S, tf.float32)
        loss = 0
        # for each bandwidth parameter, compute the MMD value and add them all
        for i in range(len(sigma)):
            # kernel values for each combination of the rows in 'X' 
            kernel_val = tf.exp(1.0 / sigma[i] * exponent)
            kernel_val = tf.cast(kernel_val, tf.float32)
            kernel_val = S * kernel_val
        return sess.run(kernel_val)

def kernel_two_sample_test(X, Y, kernel_function='rbf', iterations=10000,
                           verbose=False, random_state=None, myKernel=False, sess=None, **kwargs):
    """Compute MMD^2_u, its null distribution and the p-value of the
    kernel two-sample test.
    Note that extra parameters captured by **kwargs will be passed to
    pairwise_kernels() as kernel parameters. E.g. if
    kernel_two_sample_test(..., kernel_function='rbf', gamma=0.1),
    then this will result in getting the kernel through
    kernel_function(metric='rbf', gamma=0.1).
    """
    m = len(X)
    n = len(Y)
    XY = np.vstack([X, Y])
    if myKernel:
       K = MMD(X,Y,sess) 
    else:
       K = pairwise_kernels(XY, metric=kernel_function, **kwargs)
    mmd2u = MMD2u(K, m, n)
    if verbose:
        print("MMD^2_u = %s" % mmd2u)
        print("Computing the null distribution.")

    mmd2u_null = compute_null_distribution(K, m, n, iterations,
                                           verbose=verbose,
                                           random_state=random_state)
    p_value = max(1.0/iterations, (mmd2u_null > mmd2u).sum() /
                  float(iterations))
    if verbose:
        print("p-value ~= %s \t (resolution : %s)" % (p_value, 1.0/iterations))
    return mmd2u, mmd2u_null, p_value
'''
given one hot or labels, split dataset given the classes
@param x: data
@param y:labels
'''
def get_classes(x,y):
   classes = []
   #deal with one hot
   if len(np.shape(y))>1:
      num_classes = np.shape(y)[1]
      indexes = np.zeros((num_classes))
      for i in range(num_classes):
         size = np.hstack([int(np.sum(y[:,i])),np.shape(x)[1:]])
         classes.append(np.zeros((size)))
      for i in range(np.shape(y)[0]):
         curclass = np.argmax(y[i,:])  
         classes[curclass][indexes[curclass],:]=x[i,:]
         indexes[curclass] = indexes[curclass] +1
   #deal with other
   else: 
      num_classes = np.int_(np.max(y))+1
      print (num_classes)
      indexes = np.zeros((num_classes))
      for i in range(num_classes):
         size = np.hstack([int(np.sum(y==i)),np.shape(x)[1:]])
         classes.append(np.zeros((size)))
         #print(size)
      for i in range(np.shape(y)[0]): 
         curclass = np.int_(y[i])  
         classes[curclass][int(indexes[curclass])]=x[i,:]
         indexes[curclass] = indexes[curclass] +1
   return classes  
'''
sample randomly from given data
@param x1: 1. data
@param y1: 1. labels
@param x2: 2. data
@param y2: 2. labels
@param size: size of resulting samples 
'''
def getSamples(x1,y1,x2,y2,size=200):
       indices1 = np.arange(np.shape(x1)[0])
       np.random.shuffle(indices1)
       indices2 = np.arange(np.shape(x2)[0])
       np.random.shuffle(indices2)
       return x1[indices1[0:size]],y1[indices1[0:size]],x2[indices2[0:size]],y2[indices2[0:size]]
'''
given several samples, compose a mixed sample for given ratio
@param x1: first data
@param x2:second data
@param size1: number of datapoints from first data
@param size2: number of datapoints from second sample
@param ratio: ratio that first data has towards second data
'''
# Ratio of x2
def samplestats(x1,x2,size1,size2,ratio):
    if ratio==1.0:
       indices1 = np.arange(np.shape(x1)[0])
       np.random.shuffle(indices1)
       indices2 = np.arange(np.shape(x2)[0])
       np.random.shuffle(indices2)
       return x2[indices2[0:size2]]
    elif ratio>0.0 and ratio <1.0:
       indices1 = np.arange(np.shape(x1)[0])
       np.random.shuffle(indices1)
       indices2 = np.arange(np.shape(x2)[0])
       np.random.shuffle(indices2)
       t = list(np.shape(x2))
              # 0.3. 30.            70
       t[0] = int(size2*ratio)+int(size1*(1.0-ratio))
       t = tuple(t)
       xt2 = np.zeros((t))
       xt2[0:int(size2*ratio)] = x2[indices2[0:int(size2*ratio)]]
       xt2[int(size2*ratio):] = x1[indices1[0:int(size1*(1.0-ratio))]]
       indices1 = np.arange(np.shape(x1)[0])
       np.random.shuffle(indices1)     
       return xt2
    else:
       print('no value between 0 and 1.0 given!')
'''
sample from data randomly and observe p-value
@param x1: first sample
@param x2: second sample
@param size1: size of sample for first data for test
@param size2: size of second sample for second data for test
@param ratio: for blending experiments only, define ratio of blending
@param runs: times the test is run for newly sampled samples given setting
'''
def compute_stats(x1,x2,size1, size2,ratio, runs):
    ret = np.zeros((runs))
    for i in range(runs):
       #print(np.shape(x1))
       #print(np.shape(x2))
       #xt1,xt2 = samplestats(x1,x2,size1,size2,ratio)
       _, _, p_value = kernel_two_sample_test(x1,x2)
       ret[i] = p_value
    return ret

# Outlier Class

## Concept
<img src = "https://drive.google.com/uc?id=1xg9jBacagaN8DpYjt0z4M-7vFGkVLyXS" height = "400">
<br>
<br>
<h2>Données</h2>
<img src = "https://drive.google.com/uc?id=13w1ipz9MLWy3C1k2Xwrf7tza5CnF_Fl4" width = "1000">


In [None]:
import numpy as np
from google.colab import drive
drive.mount('/content/drive')
path_df = '/content/drive/My Drive/****/Adversarial samples/Crafted train/train_DeepFool.npy'
path_fgsm = '/content/drive/My Drive/****/Adversarial samples/Crafted train/train_FGSM0.0001.npy'
path_jsma = '/content/drive/My Drive/****/Adversarial samples/Crafted train/train_JSMA.npy'
path_cw = '/content/drive/My Drive/****/Adversarial samples/Crafted train/train_carliniL2.npy'
path_pgd = '/content/drive/My Drive/****/Adversarial samples/Crafted train/train_pgd.npy'

df = np.load(path_df)
fgsm = np.load(path_fgsm)
jsma = np.load(path_jsma)
cw = np.load(path_cw)
pgd = np.load(path_pgd)
from numpy import load
PGD = '/content/drive/My Drive/****/Adversarial samples/test_pgd.npy'
FGSM = '/content/drive/My Drive/****/Adversarial samples/FGSM0.0001.npy'
JSMA = '/content/drive/My Drive/****/Adversarial samples/JSMA.npy'
DeepFool = '/content/drive/My Drive/****/Adversarial samples/DeepFool.npy'
CWL2 = '/content/drive/My Drive/****/Adversarial samples/carliniL2.npy'
BIM = '/content/drive/My Drive/****//Adversarial samples/BIM.npy'
FGSM = load(FGSM)
fgsm_test = np.reshape(FGSM, (FGSM.shape[0], 113))
JSMA = load(JSMA)
jsma_test = np.reshape(JSMA, (JSMA.shape[0], 113))
BIM = load(BIM)
bim_test = np.reshape(BIM, (BIM.shape[0], 113))
CWL2 = load(CWL2)
cw_test = np.reshape(CWL2, (CWL2.shape[0], 113))
DeepFool = load(DeepFool)
df_test = np.reshape(DeepFool, (DeepFool.shape[0], 113))
PGD = load(PGD)
pgd_test = np.reshape(PGD, (PGD.shape[0], 113))
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import copy
import time as time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.feature_selection import chi2

%matplotlib inline

pd.options.display.max_columns = 200
pd.options.display.max_rows = 200
#Downloading and extracting the dataset if it doesn't exist
!if [ ! -d "./NSL-KDD" ]; then wget http://205.174.165.80/CICDataset/NSL-KDD/Dataset/NSL-KDD.zip; mkdir NSL-KDD; unzip NSL-KDD.zip -d NSL-KDD; fi
    
#Importing the training and testing datasets from .CSV to Pandas DataFrames
features = ['1 Duration', '2 Protocol-type : ', '3 Service : ', '4 Flag : ', '5 Src-bytes', '6 Dst-bytes', '7 Land', '8 Wrong-fragment', '9 Urgent', '10 Hot', '11 Num-failed-logins', '12 Logged-in', '13 Num-compromised', '14 Root-shell', '15 Su-attempted', '16 Num-root', '17 Num-file-creations', '18 Num-shells', '19 Num-access-files', '20 Num-outbound-cmds', '21 Is-host-login', '22 Is-guest-login', '23 Count', '24 Srv-count', '25 Serror-rate', '26 Srv-serror-rate', '27 Rerror-rate', '28 Srv-rerror-rate', '29 Same-srv-rate', '30 Diff-srv-rate', '31 Srv-diff-host-rate', '32 Dst-host-count', '33 Dst-host-srv-count', '34 Dst-host-same-srv-rate', '35 Dst-host-diff-srv-rate', '36 Dst-host-same-src-port-rate', '37 Dst-host-srv-diff-host-rate', '38 Dst-host-serror-rate', '39 Dst-host-srv-serror-rate', '40 Dst-host-rerror-rate', '41 Dst-host-srv-rerror-rate', '42 Attack_type', '43 Difficulty']
df_training = pd.read_csv('./NSL-KDD/KDDTrain+.txt', names=features)
df_testing = pd.read_csv('./NSL-KDD/KDDTest+.txt', names=features)
# Stack the training and test sets
data = pd.concat([df_training, df_testing], axis=0)
# Dropping features
colToDrop = ['43 Difficulty', '20 Num-outbound-cmds','1 Duration', '5 Src-bytes', '6 Dst-bytes','15 Su-attempted','16 Num-root', '17 Num-file-creations', '18 Num-shells', '19 Num-access-files']
for col in colToDrop:
  data.drop(col, inplace=True, axis=1)
  # Transform the nominal attribute "Attack type" into binary (0 : normal / 1 : attack)
labels = (data['42 Attack_type'] != 'normal').astype('int64')
data['42 Labels'] = labels
data.drop('42 Attack_type', inplace=True, axis=1)
# One Hot Encode the 3 first nominal attributes and drop them
for i in ['4 Flag : ', '3 Service : ', '2 Protocol-type : ']:
    # Create the One Hot Encode DataFrame
    dum = pd.get_dummies(data[i])
    # Insert into the dataset DataFrame by Series
    for column_name in list(dum.columns):
        data.insert(1, str(i)+column_name, dum[column_name])
        data[str(i)+column_name] = data[str(i)+column_name].astype('int64')
    # Drop the old attribute's column
    data.drop(i, inplace=True, axis=1)
# Split training and test sets
df_training = data[:df_training.shape[0]]    
df_testing = data[df_training.shape[0]:]
attributes =  ['1 Duration', '5 Src-bytes', '6 Dst-bytes', '8 Wrong-fragment', '9 Urgent', '10 Hot', '11 Num-failed-logins', '13 Num-compromised', '15 Su-attempted', '16 Num-root', '17 Num-file-creations', '18 Num-shells', '19 Num-access-files', '23 Count', '24 Srv-count', '25 Serror-rate', '26 Srv-serror-rate', '27 Rerror-rate', '28 Srv-rerror-rate', '29 Same-srv-rate', '30 Diff-srv-rate', '31 Srv-diff-host-rate', '32 Dst-host-count', '33 Dst-host-srv-count', '34 Dst-host-same-srv-rate', '35 Dst-host-diff-srv-rate', '36 Dst-host-same-src-port-rate', '37 Dst-host-srv-diff-host-rate', '38 Dst-host-serror-rate', '39 Dst-host-srv-serror-rate', '40 Dst-host-rerror-rate', '41 Dst-host-srv-rerror-rate']
for col in colToDrop:
  if col in attributes:
    attributes.remove(col)
# Min-Max normalization on the non binary features
attributes = ['8 Wrong-fragment','9 Urgent','10 Hot','11 Num-failed-logins','13 Num-compromised','23 Count','24 Srv-count','25 Serror-rate','26 Srv-serror-rate',
'27 Rerror-rate','28 Srv-rerror-rate','29 Same-srv-rate','30 Diff-srv-rate','31 Srv-diff-host-rate', '32 Dst-host-count','33 Dst-host-srv-count',
 '34 Dst-host-same-srv-rate','35 Dst-host-diff-srv-rate','36 Dst-host-same-src-port-rate','37 Dst-host-srv-diff-host-rate','38 Dst-host-serror-rate','39 Dst-host-srv-serror-rate','40 Dst-host-rerror-rate',
 '41 Dst-host-srv-rerror-rate']
for i in attributes:
    # The min and max are only computed from the training set
    minval = df_training[i].min()
    maxval = df_training[i].max()
    df_training[i] = ((df_training[i] - minval) / (maxval - minval)) 
    df_testing[i] = ((df_testing[i] - minval) / (maxval - minval))
# Get NumPy arrays from DataFrames
nd_training = df_training.values
nd_testing = df_testing.values
# Separating arguments (x) from lables (y)
x_train = nd_training[:, :-1]
y_train = nd_training[:, -1]
x_test = nd_testing[:, :-1]
y_test = nd_testing[:, -1]
# Make a copy of the data set as NumPy arrays
x_train_np = x_train.copy()
y_train_np = y_train.copy()
x_test_np = x_test.copy()
y_test_np = y_test.copy()
# Convert from numpy array to torch tensors
x_train = torch.from_numpy(x_train).float()
y_train = torch.from_numpy(y_train).long()
x_test = torch.from_numpy(x_test).float()
y_test = torch.from_numpy(y_test).long()

attack_label = (df_training[df_training['42 Labels'] == 1].values)[:, :-1]
normal_label = (df_training[df_training['42 Labels'] == 0].values)[:, :-1]

attack_label_test = (df_testing[df_testing['42 Labels'] == 1].values)[:, :-1]
normal_label_test = (df_testing[df_testing['42 Labels'] == 0].values)[:, :-1]

adv_label = np.zeros((50000, 113), dtype=float)
adv_label[: 10000] = df[: 10000]
adv_label[10000: 20000] = jsma[10000: 20000]
adv_label[20000: 30000] = pgd[20000: 30000]
adv_label[30000: 40000] = cw[30000: 40000]
adv_label[40000: 50000] = fgsm[40000: 50000]

adv_label_test = np.zeros((5000, 113), dtype=float)
adv_label_test[: 1000] = df_test[: 1000]
adv_label_test[1000: 2000] = jsma_test[1000: 2000]
adv_label_test[2000: 3000] = pgd_test[2000: 3000]
adv_label_test[3000: 4000] = cw_test[3000: 4000]
adv_label_test[4000: 5000] = fgsm_test[4000: 5000]

normal_label = normal_label[: 50000]
attack_label = attack_label[: 50000]
x_train_ds = np.zeros((150000, 113), dtype=float)
y_train_ds = np.zeros((150000, 1), dtype=float)
indices = np.arange(150000)
np.random.shuffle(indices)
for i in range(150000):
  if i < 50000:
    x_train_ds[indices[i]] = normal_label[i%50000]
    y_train_ds[indices[i]] = 0
  elif i>50000 and i< 100000:
    x_train_ds[indices[i]] = attack_label[i%50000]
    y_train_ds[indices[i]] = 1
  else:
    x_train_ds[indices[i]] = adv_label[i%50000]
    y_train_ds[indices[i]] = 2

normal_label_test = normal_label_test[: 9000]
attack_label_test = attack_label_test[: 12000]
x_test_ds = np.zeros((26000, 113), dtype=float)
y_test_ds = np.zeros((26000, 1), dtype=float)
indices = np.arange(26000)
np.random.shuffle(indices)
for i in range(26000):
  if i < 12000:
    x_test_ds[indices[i]] = attack_label_test[i%12000]
    y_test_ds[indices[i]] = 1
  elif i>12000 and i< 21000:
    x_test_ds[indices[i]] = normal_label_test[i%9000]
    y_test_ds[indices[i]] = 0
  else:
    x_test_ds[indices[i]] = adv_label_test[i%5000]
    y_test_ds[indices[i]] = 2

!pip install pytorch-tabnet
import torch
from torchvision.models.resnet import resnet50
from torchsummary import summary
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Using {device} for inference')
from pytorch_tabnet.tab_model import TabNetClassifier
clf = TabNetClassifier()  


y_train_ds = np.reshape(y_train_ds, (150000))
y_test_ds = np.reshape(y_test_ds, (26000))


clf.fit(
  x_train_ds, y_train_ds,
  eval_set=[(x_test_ds, y_test_ds)]
)

def prediction(model, data):
  preds = model.predict(data)
  preds2 = model.predict(data)
  pred = preds2[0].astype(int)
  return np.argmax(model.predict_proba(data))

Mounted at /content/drive
--2022-06-30 10:43:17--  http://205.174.165.80/CICDataset/NSL-KDD/Dataset/NSL-KDD.zip
Connecting to 205.174.165.80:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6598776 (6.3M) [application/zip]
Saving to: ‘NSL-KDD.zip’


2022-06-30 10:43:23 (1.61 MB/s) - ‘NSL-KDD.zip’ saved [6598776/6598776]

Archive:  NSL-KDD.zip
  inflating: NSL-KDD/index.html      
  inflating: NSL-KDD/KDDTest1.jpg    
  inflating: NSL-KDD/KDDTest-21.arff  
  inflating: NSL-KDD/KDDTest-21.txt  
  inflating: NSL-KDD/KDDTest+.arff   
  inflating: NSL-KDD/KDDTest+.txt    
  inflating: NSL-KDD/KDDTrain1.jpg   
  inflating: NSL-KDD/KDDTrain+.arff  
  inflating: NSL-KDD/KDDTrain+.txt   
  inflating: NSL-KDD/KDDTrain+_20Percent.arff  
  inflating: NSL-KDD/KDDTrain+_20Percent.txt  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch-tabnet
  Downloading pytorch_tabnet-3.1.1-py3-none-any.whl (39 kB)
Installing collected packages: pytorch-tabnet
Successfully installed pytorch-tabnet-3.1.1
Using cpu for inference
Device used : cpu
epoch 0  | loss: 0.48782 | val_0_accuracy: 0.44562 |  0:00:10s
epoch 1  | loss: 0.31465 | val_0_accuracy: 0.47142 |  0:00:21s
epoch 2  | loss: 0.30327 | val_0_accuracy: 0.50219 |  0:00:32s
epoch 3  | loss: 0.2815  | val_0_accuracy: 0.726   |  0:00:43s
epoch 4  | loss: 0.26539 | val_0_accuracy: 0.73535 |  0:00:53s
epoch 5  | loss: 0.25717 | val_0_accuracy: 0.73115 |  0:01:05s
epoch 6  | loss: 0.25352 | val_0_accuracy: 0.73277 |  0:01:15s
epoch 7  | loss: 0.25111 | val_0_accuracy: 0.74677 |  0:01:26s
epoch 8  | loss: 0.25517 | val_0_accuracy: 0.72538 |  0:01:36s
epoch 9  | loss: 0.2449  | val_0_accuracy: 0.72946 |  0:01:46s
epoch 10 | loss: 0.2421  | val_0_accuracy: 0.71381 | 

# Loading The Data

In [None]:
hsja = "/content/drive/My Drive/****/Adversarial samples/hsjaL2Attack.npy"
hsja = load(hsja)
hsja = np.reshape(hsja, (hsja.shape[0], 113))

Attacks = '/content/drive/My Drive/****/Adversarial samples/Attack_Class.npy'
Attacks = np.load(Attacks)
Attacks = np.reshape(Attacks, (Attacks.shape[0], 113))


path = '/content/drive/My Drive/****/Adversarial samples/Boundary/boundary_full.npy'
boundary = np.loadtxt(path)

Normal = '/content/drive/My Drive/****/Adversarial samples/Benign_Class.npy'
Normal = np.loadtxt(Normal)

# Loading The NIDS

In [None]:
model500Path = "/content/drive/My Drive/****/content/LSTMNIDS500/content/LSTM-NIDS-500"
model = tf.saved_model.load(model500Path)
'''
Makes prediction on x 
return True if x is of label y
'''
def predict(x):
  inp = tf.convert_to_tensor(x)
  inp = tf.cast(inp, tf.float32)
  inp = tf.reshape(inp, ( 1, 1, 113))
  labeling = model(inp)
  return np.argmax(labeling)

#Creating Aversarial Mix Of Data
<img src = "https://drive.google.com/uc?id=1DbbnVcYLC2NCyMjmeaSXV96UV7zidg4n" height = "400">

In [None]:
import random
indices = random.sample(range(48093), 48093)
# Labels : 0 Normal, 1 attack, 2 boundary, 3 hsja
Labels = np.zeros((48093, 1), int)
Mix = np.zeros((48093, 113), float)
indices1 = indices[0: 9594]
indices2 = indices[9594 : 22427]
indices3 = indices[22427 : 35260]
indices4 = indices[35260 : 48093]
for i in range(Normal.shape[0]):
  Mix[indices1[i]] = Normal[i]
  Labels[indices1[i]] = 0
for i in range(Attacks.shape[0]):
  Mix[indices2[i]] = Attacks[i]
  Labels[indices2[i]] = 1
for i in range(boundary.shape[0]):
  Mix[indices3[i]] = boundary[i]
  Labels[indices3[i]] = 2
for i in range(hsja.shape[0]):
  Mix[indices4[i]] = hsja[i]
  Labels[indices4[i]] = 3

# Creating Mix Of Bening Data
<img src = "https://drive.google.com/uc?id=109u40oGbUw43kJF7dLjOK_ItZTIADDgQ" height = "400">


In [None]:
indices = random.sample(range(22427),22427)
from_dataset_data = np.zeros((22427, 113), float)
from_dataset_labels = np.zeros((22427, 1), int)
indices1 = indices[0: 9594]
indices2 = indices[9594 : 22427]
for i in range(Normal.shape[0]):
  from_dataset_data[indices1[i]] = Normal[i]
  from_dataset_labels[indices1[i]] = 0
for i in range(Attacks.shape[0]):
  from_dataset_data[indices2[i]] = Attacks[i]
  from_dataset_labels[i] = 1

# Creating The Test

In [None]:
from statistics import mean
def test_stat(sample):
  # Generate Random Normal sample
  ben_sample = samplestats(Normal,Normal,Normal.shape[0], Normal.shape[0],1.0)
  ben_sample = ben_sample[: 10]
  ret = compute_stats(ben_sample,sample,10, 10,ratio=1.0,runs=10)
  if mean(ret) < 0.05:
    return True
  else:
    return False

# Test 1
<img src = "https://drive.google.com/uc?id=1rhqP3_jpFfvfPTotOqyqXPl8XJ5q57tT" height = "400">


In [None]:
# Test 1: Boundary
import time
start_time = time.time()
preds = []
detected_sam = 0
for i in range(0, 100, 10):
  sam = boundary[i:i+10]
  ret = test_stat(sam)
  if ret:
    detected_sam += 1
  else:
    for j in range(10):
      pred = predict(sam[j])
      preds.append(pred)

print('Detection rate of Boudnary sample is ', detected_sam*10, "%")
print("--- %s seconds ---" % (time.time() - start_time))




Detection rate of Boudnary sample is  100 %
--- 21.20804452896118 seconds ---


In [None]:
# Test 1: HSJA
import time
start_time = time.time()
preds = []
detected_sam = 0
for i in range(0, 100, 10):
  sam = hsja[i:i+10]
  ret = test_stat(sam)
  if ret:
    detected_sam += 1
  else:
    for j in range(10):
      pred = predict(sam[j])
      preds.append(pred)

print('Detection rate of HopSkipJump sample is ', detected_sam*10, "%")
print("--- %s seconds ---" % (time.time() - start_time))


Detection rate of HopSkipJump sample is  100 %
--- 21.777459859848022 seconds ---


# Test 2
<img src = "https://drive.google.com/uc?id=1TqQntxEdlb-2JHz9ceoE9nBWajkSHr6R" height = "400">


In [None]:
# Test 2: Boundary
import time
start_time = time.time()
preds = []
detected_sam = 0
recovered_sam = 0
missed_sam = 0
NIDS_det = 0
NIDS_mis = 0
for i in range(0, 100, 1):
  sam = boundary[i]
  sam = np.reshape(sam, (1, 113))
  pred = prediction(clf, sam)
  if pred == 0:
    missed_sam += 1
    predn = predict(sam)
    preds.append(predn)
    if predn == 1:
      NIDS_det += 1
    else:
      NIDS_mis += 1
  elif pred == 1:
    recovered_sam += 1
  else:
    detected_sam += 1

print("Attack Boundary")
print("detectection rate by outlier ", detected_sam, "%")
print("recovery rate by outlier ", recovered_sam, "%")
print("error rate by outlier ", missed_sam, "%")
print("--- %s seconds ---" % (time.time() - start_time))  


  

Attack Boundary
detectection rate by outlier  32 %
recovery rate by outlier  55 %
error rate by outlier  13 %
--- 2.033456563949585 seconds ---


In [None]:
# Test 2: HSJA
import time
start_time = time.time()
preds = []
detected_sam = 0
recovered_sam = 0
missed_sam = 0
NIDS_det = 0
NIDS_mis = 0
for i in range(0, 100, 1):
  sam = hsja[i]
  sam = np.reshape(sam, (1, 113))
  pred = prediction(clf, sam)
  if pred == 0:
    missed_sam += 1
    predn = predict(sam)
    preds.append(predn)
    if predn == 1:
      NIDS_det += 1
    else:
      NIDS_mis += 1
  elif pred == 1:
    recovered_sam += 1
  else:
    detected_sam += 1

print("Attack HopSKipJump")
print("detectection rate by outlier ", detected_sam, "%")
print("recovery rate by outlier ", recovered_sam, "%")
print("error rate by outlier ", missed_sam, "%")



Attack HopSKipJump
detectection rate by outlier  82 %
recovery rate by outlier  12 %
error rate by outlier  6 %


In [None]:
class Sample():          
    def __init__(self, sample, isAdv, labels):   
        self.sample = sample
        self.isAdv = isAdv
        self.labels = labels

In [None]:
# Getting real predictions of samples
start = 0
areAdv = [False] * 4809
indice = 0
samples = []
for i in range(10, 48090, 10):
  sample = Mix[start: start + 10]
  sample_lab = Labels[start: start + 10]
  start = i
  isAdv = False
  for j in range(10):
    if sample_lab[j] == 2 or sample_lab[j] == 3:
      isAdv = True
  if isAdv:
    areAdv[indice] = True
  else:
    areAdv[indice] = False
  obj = Sample(sample, areAdv[indice] , sample_lab)
  samples.append(obj)
  indice += 1

In [None]:
# Creating benign samples
start = 0
benn_samples = [] 
for i in range(0, 22420, 10):
  portion = from_dataset_data[start: start + 10]
  lab = from_dataset_labels[start: start + 10]
  #print(i,portion.shape, lab.shape )
  start += 10
  s = Sample(portion, False, lab )
  #print(s)
  benn_samples.append(s)

In [None]:
# Merging the two samples
merged_samples = [None] * 7050
indices = random.sample(range(7050), 7050)
indicesben = indices[0:2242 ]
indicesadv = indices[2242: 7050]
for i in range(len(indicesben)):
  #print(i, indicesben[i])
  merged_samples[indicesben[i]] = benn_samples[i]
for i in range(len(indicesadv)):
  merged_samples[indicesadv[i]] = samples[i]


# Test 3

<img src = "https://drive.google.com/uc?id=1u5L5sEio6NvDiqfneW9anHMGSoxvWEDa" height = "400">


In [None]:
import time
start_time = time.time()
vp = 0
fp = 0
fn = 0
vn = 0
detect_rate = 0
rec_rate = 0
error_rate = 0
detection_nids = 0
error_nids = 0
number =  100 #len(merged_samples)
adv_num = 0
adv_num_nids = 0
activ_test_stat = True
activ_outlier = True
for i in range(number):
  passs = False
  data = merged_samples[i]
  sam = data.sample
  if activ_test_stat:
    ret = test_stat(sam)
    if ret and data.isAdv :
      vp += 1
    elif ret and not data.isAdv:
      fp += 1
    elif not ret and data.isAdv :
      fn += 1
    elif not ret and not data.isAdv:
      vn += 1

  for j in range(10):
    if data.labels[j] == 2 or data.labels[j] == 3:
      adv_num += 1
  if (ret and activ_outlier) or (not activ_test_stat and activ_outlier):
    passs = True
    for i in range(10):
      reshaped_data = np.reshape(sam[i], (1, 113))
      pred = prediction(clf, reshaped_data)
      if pred == 2 and (data.labels[i] == 2  or data.labels[i] == 3 ):
        detect_rate += 1
      elif pred == 1 and (data.labels[i] == 2  or data.labels[i] == 3 ):
        rec_rate += 1
      elif pred == 0 and (data.labels[i] == 2  or data.labels[i] == 3 ):
        error_rate += 1

      if pred == 0: 
        pred_nids = predict( sam[i])
        if data.labels[i] != 0:
          adv_num_nids += 1
        if pred_nids == 1 and data.labels[i] != 0:
          detection_nids += 1
        elif pred_nids == 0 and data.labels[i] != 0:
          error_nids += 1
  if (not ret and  activ_test_stat) or (not activ_test_stat and not activ_outlier) or not passs :
    for i in range(10):
      pred_nids = predict(sam[i])
      if data.labels[i] != 0:
          adv_num_nids += 1
      if pred_nids == 1 and data.labels[i] != 0:
        detection_nids += 1
      elif pred_nids == 0 and data.labels[i] != 0:
        error_nids += 1
         
print("True positives  Stat Test ",(vp/number) * 100)
print("False positives Stat Test ", (fp/number) * 100)
print("True negatives Stat Test", (vn/number) * 100)
print("False negatives Stat Test ", (fn/number) * 100)
print("outlier detection rate ", (detect_rate/adv_num) * 100 )
print("outlier recovery rate ", (rec_rate/adv_num) * 100 )
print("outlier error rate ", (error_rate/adv_num) * 100 )

print("--- %s seconds ---" % (time.time() - start_time))  


True positives  Stat Test  48.0
False positives Stat Test  25.0
True negatives Stat Test 13.0
False negatives Stat Test  14.000000000000002
outlier detection rate  43.93939393939394
outlier recovery rate  26.36363636363636
outlier error rate  8.787878787878787
--- 225.1943597793579 seconds ---


# END

In [None]:
'''
Results for 10 samples
test_stat = True, outlier_class = True

True positives  Stat Test  0.7
False positives Stat Test  0.1
True negatives Stat Test 0.1
False negatives Stat Test  0.1
outlier detection rate  0.38461538461538464
outlier recovery rate  0.38461538461538464
outlier error rate  0.1794871794871795
The nids detection  0.4827586206896552
The nids error  0.5172413793103449
--- 35.840468645095825 seconds ---

test_stat = False, outlier_class = True
True positives  Stat Test  0.0
False positives Stat Test  0.0
True negatives Stat Test 0.0
False negatives Stat Test  0.0
outlier detection rate  43.58974358974359
outlier recovery rate  38.46153846153847
outlier error rate  17.94871794871795
The nids detection  57.89473684210527
The nids error  42.10526315789473
--- 3.3734238147735596 seconds ---

test_stat = True, outlier_class = False
True positives  Stat Test  60.0
False positives Stat Test  10.0
True negatives Stat Test 10.0
False negatives Stat Test  20.0
outlier detection rate  0.0
outlier recovery rate  0.0
outlier error rate  0.0
The nids detection  34.5679012345679
The nids error  65.4320987654321
--- 28.26953148841858 seconds ---

'''

'''


error_nids = 0
number = 1000
adv_num = 0
adv_num_nids = 0
activ_test_stat = True
activ_outlier = True
for i in range(number):
  passs = False
  data = merged_samples[i]
  sam = data.sample

True positives  Stat Test  65.5
False positives Stat Test  18.7
True negatives Stat Test 11.899999999999999
False negatives Stat Test  3.9
outlier detection rate  32.11875843454791
outlier recovery rate  41.75438596491228
outlier error rate  22.53711201079622
The nids detection  36.69603524229075
The nids error  63.30396475770925
--- 2342.1765415668488 seconds ---

'''

1.0