In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import Normalizer
from scipy.special import psi, polygamma
import random
import tensorflow as tf

tf.random.set_seed(0)

In [None]:
from keras.models import Sequential
from keras.models import Model
from keras import regularizers
from keras.layers import Input, Dense, Dropout, Activation, Lambda
from keras.layers import Convolution1D,MaxPooling1D, Flatten
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Convolution1D, Dense, Dropout, Flatten, MaxPooling1D
from keras.layers import LSTM, GRU, SimpleRNN

In [None]:
#This function is used to encode a vector as follows:
'''
when the vector element == 'normal', it will be transformed to 1 and to 0 if not
'''
def encoding(y):
  enc = [1 if y[i]=='normal' else 0 for i in range(len(y))]
  return pd.DataFrame(enc)

##**Load the Data**

In [None]:
def load_data(path_train, path_test):
  train = pd.read_csv(path_train)  #load the train data
  test = pd.read_csv(path_test)  #load the test data

  train_normal = pd.DataFrame(train[train['connection_type'] == 'normal'])   #train samples which have normal as label
  train_attack = pd.DataFrame(train[train['connection_type'] != 'normal'])   #train samples which have attack as label
  test_normal = pd.DataFrame(test[test['connection_type'] == 'normal'])      #test samples which have normal as label
  test_attack = pd.DataFrame(test[test['connection_type'] != 'normal'])      #test samples which have attack as label

  nbr_of_normal_train_samples = 10000  #number of the normal samples that we will work with in the train data
  nbr_of_attack_train_samples = 10000  #number of the attack samples that we will work with in the train data
  nbr_of_normal_test_samples = 2500   #number of the normal samples that we will work with in the test data
  nbr_of_attack_test_samples = 2500    #number of the attack samples that we will work with in the test data

  tf.random.set_seed(0)
  train_normal = pd.DataFrame(train_normal[:nbr_of_normal_train_samples])
  train_attack = pd.DataFrame(train_attack[:nbr_of_attack_train_samples])
  test_normal = pd.DataFrame(test_normal[:nbr_of_normal_test_samples])
  test_attack = pd.DataFrame(test_attack[:nbr_of_attack_test_samples])

  #concatenate the normal samples and the attack samples into a single data with the desired number of normal samples and attack samples for both train and test
  train = pd.DataFrame(pd.concat([train_normal,train_attack]))
  test = pd.DataFrame(pd.concat([test_normal,test_attack]))

  train = train.reset_index(drop = True)   #reset index for the train data
  test = test.reset_index(drop = True)     #reset index for the test data

  #split the train and test into features data and labels
  x_train = pd.DataFrame(train.iloc[:,:41])
  y_train = pd.Series(train.iloc[:,41])
  x_test = pd.DataFrame(test.iloc[:,:41])
  y_test = pd.Series(test.iloc[:,41])

  #use the command 'get_dummies' to eliminate the categorical features
  x_train = pd.get_dummies(x_train)
  x_test = pd.get_dummies(x_test)

  #join x_train and x_test as both of them they haven't the same shape after 'get_dummies' command
  x_train , x_test = x_train.align(x_test, join = 'inner', axis = 1)

  #encode both y_train and y_test
  y_train = encoding(y_train)
  y_test = encoding(y_test)

  #Normalize the data
  scaler = Normalizer().fit(x_train)
  x_train = scaler.transform(x_train)

  scaler = Normalizer().fit(x_test)
  x_test = scaler.transform(x_test)

  #transform y_train and y_test into arrays
  y_train = np.array(y_train)
  y_test = np.array(y_test)

  return (x_train , y_train) , (x_test , y_test)

##**Transformtions**

In [None]:
def transformation(input_dim, h_dim,):
  input = Input(shape=(input_dim,))
  layer1 = Dense(h_dim, use_bias = False, activation='relu')(input)
  output = Dense(input_dim, use_bias = False, activation='sigmoid')(layer1)

  transformer = Model(input, output)
  return transformer

In [None]:
def create_transformations_list(input_dim, h_dim, num_trans):
  trans_list = []
  for i in range(num_trans):
    trans = transformation(input_dim ,h_dim[i])
    trans_list.append(trans)
  return trans_list

##**Encoding**

In [None]:
from keras.layers import Input, Dense
from keras.models import Model

def AutoEncoder(input_shape):
  input = Input(shape=(input_shape,))
  layer1 = Dense(128, use_bias = False, activation='relu')(input)
  layer2 = Dense(64, use_bias = False, activation='relu')(layer1)
  encoded = Dense(32, use_bias = False, activation='relu')(layer2)
  layer3 = Dense(64, use_bias = False, activation='relu')(encoded)
  layer4 = Dense(128, use_bias = False, activation='relu')(layer3)
  decoded = Dense(input_shape, use_bias = False, activation='relu')(layer4)

  autoencoder = Model(input, decoded)
  return autoencoder

In [None]:
def classifier(input_shape,lstm_output_size,nbr_of_classes):
  model = Sequential()
  model.add(Convolution1D(128, 3, padding="same",activation="relu",input_shape=(input_shape, 1)))
  model.add(MaxPooling1D(pool_size=3))
  model.add(LSTM(lstm_output_size))
  model.add(Dropout(0.1))
  model.add(Dense(nbr_of_classes, activation="softmax"))
  return model

##**Experiments**

In [None]:
#Estimating the parameters
def inv_psi(y, iters=5):
  # initial estimate
  cond = y >= -2.22
  x = cond * (np.exp(y) + 0.5) + (1 - cond) * -1 / (y - psi(1))

  for _ in range(iters):
      x = x - (psi(x) - y) / polygamma(1, x)
  return x

def fixed_point_ln_beta_louiville_mle(vect_alpha_0, alpha_0, beta_0,lamda_0, y, max_iter=50):
  y = np.asarray(y)
  (N,K) = (y.shape[0], y.shape[1])
  alpha_old = alpha_0
  beta_old = beta_0
  lamda_old = lamda_0
  vect_alpha_old = vect_alpha_0

  for iterr in range(max_iter):
    aph = psi(alpha_old + beta_old) + np.log(lamda_old) + (1/N)*np.sum(np.log(np.mean(y,axis=1))) - (1/N)*np.sum([np.abs(np.log(1 - (1-lamda_old)*np.mean(y[j]))) for j in range(N)])
    aph = np.nan_to_num(aph, copy=True, nan=10**7, posinf=10**10, neginf=-(10**10))
    alpha_new = inv_psi(np.clip(aph,None,10), iters=5)
    alpha_new = np.clip(alpha_new,10**(-10),None)

    #calculate beta
    bet = psi(alpha_old + beta_old) + (1/N)*np.sum(np.log(np.clip(1-np.mean(y,axis=1),10**(-10),None))) - (1/N)*np.sum([np.log(np.abs(1 - (1-lamda_old)*np.mean(y[j]))) for j in range(N)])
    bet = np.nan_to_num(bet, copy=True, nan=10**7, posinf=10**10, neginf=-(10**10))
    beta_new = inv_psi(np.clip(bet,None,10), iters=5)
    beta_new = np.clip(beta_new,10**(-10),None)

    #calculate lamda
    lamda_new = (N*alpha_old) / np.sum([(alpha_old+beta_old)*np.mean(y[j])/(1-(1-lamda_old)*np.mean(y[j])) for j in range(N)])
    lamda_new = np.nan_to_num(lamda_new, copy=True, nan=10**7, posinf=10**10, neginf=-(10**10))
    lamda_new = np.clip(lamda_new,10**(-10),None)

    #calculate the vector alpha
    vect_alpha_new = np.asarray([inv_psi(psi(np.sum(vect_alpha_old)) + (1/N)*np.sum([np.log(y[j][i]) for j in range(N)]) - (1/N)*np.sum(np.mean(y,axis=1)) ,iters=5) for i in range(K)])
    vect_alpha_new = np.nan_to_num((np.clip(vect_alpha_new,10**(-10),None)), copy=True, nan=10**7, posinf=10**10, neginf=-(10**10))

    #Update the parameters
    alpha_old = alpha_new
    beta_old = beta_new
    lamda_old = lamda_new
    vect_alpha_old = vect_alpha_new
  return vect_alpha_new, alpha_new, beta_new, lamda_new

#Normality Score
def lnbl_normality_score(vect_alpha,alpha,beta,lamda,y):
  (N,K) = (y.shape[0], y.shape[1])
  scores = np.asarray([ (alpha-np.sum(vect_alpha))*np.log(np.mean(y[j])) + (beta-1)*np.log(np.clip(1-np.mean(y[j]),10**(-10),None)) - (alpha+beta)*np.log(np.abs(1-(1-lamda)*np.mean(y[j]))) + np.sum((vect_alpha-1)*np.log(y[j])) for j in range(N)])
  scores = np.nan_to_num(scores, copy=True, nan=10**7, posinf=10**10, neginf=-(10**10))
  return scores

This cell for calculating our metrics

In [None]:
from sklearn.metrics import roc_curve, precision_recall_curve, auc
def roc_pr_curve_data(scores, labels):
    scores = scores.flatten()
    labels = labels.flatten()

    scores_pos = scores[labels == 1]
    scores_neg = scores[labels != 1]

    truth = np.concatenate((np.zeros_like(scores_neg), np.ones_like(scores_pos)))
    preds = np.concatenate((scores_neg, scores_pos))
    fpr, tpr, roc_thresholds = roc_curve(truth, preds)
    roc_auc = auc(fpr, tpr)
    print("roc_auc",roc_auc)

    # pr curve where "normal" is the positive class
    precision_norm, recall_norm, pr_thresholds_norm = precision_recall_curve(truth, preds)
    pr_auc_norm = auc(recall_norm, precision_norm)
    print("pr_auc_norm where normal is the positive class",pr_auc_norm)

    # pr curve where "anomaly" is the positive class
    precision_anom, recall_anom, pr_thresholds_anom = precision_recall_curve(truth, -preds, pos_label=0)
    pr_auc_anom = auc(recall_anom, precision_anom)
    print("pr_auc_norm where anomaly is the positive class",pr_auc_anom)

In [None]:
#load the data
path_train = "/content/train_kdd_nsl.csv"
path_test = "/content/test_kdd_nsl.csv"
(x_train, y_train) , (x_test, y_test) = load_data(path_train, path_test)

In [None]:
def experiment(single_class_ind):
  print("Class ",single_class_ind)
  tf.random.set_seed(0)

  #Transform the data
  print('Start of transformations for class',single_class_ind)
  x_train_task = x_train[y_train.flatten() == single_class_ind]
  num_trans = 10
  h_dim = [10,20,30,40,50,60,70,80,90,100]
  transformations_inds = np.tile(np.arange(num_trans), len(x_train_task))
  transformations_list = create_transformations_list(x_train_task.shape[1], h_dim, num_trans)
  data_each_transformation = []
  for transformation in transformations_list:
    print('transformation ', transformations_list.index(transformation))
    transformation.compile(loss="mse", optimizer="adamax",metrics=['accuracy'])
    transformation.fit(x_train_task, x_train_task, batch_size=64, epochs= 5)
    data_trans = transformation.predict(x_train_task)
    data_each_transformation.append(data_trans)
  data_transformed = pd.concat([pd.DataFrame(data_each_transformation[i]) for i in range(len(data_each_transformation))])
  data_transformed = data_transformed.reset_index(drop = True)
  data_organised = []
  for i in range(len(x_train_task)):
    data_trans_k = data_transformed.iloc[i::len(x_train_task),:]
    data_organised.append(data_trans_k)
  x_train_task_transformed = pd.concat([pd.DataFrame(data_organised[i]) for i in range(len(data_organised))])
  x_train_task_transformed = x_train_task_transformed.reset_index(drop = True)
  print('End of transformations for class',single_class_ind)


  #Encode the data
  print('Start of Encoding for class',single_class_ind)
  autoencoder = AutoEncoder(x_train_task_transformed.shape[1])
  autoencoder.compile(optimizer='adam', loss='mse')
  autoencoder.fit(x_train_task_transformed, x_train_task_transformed, epochs=3, batch_size=64)
  x_train_task_encoded = autoencoder.predict(x_train_task_transformed)
  print('End of Encoding for class',single_class_ind)

  #Create and train the classifier
  cls = classifier(x_train_task_transformed.shape[1],70,len(h_dim))
  cls.compile(loss="categorical_crossentropy", optimizer="adam",metrics=['accuracy'])
  cls.fit(x=x_train_task_encoded, y=to_categorical(transformations_inds),
            batch_size=64, epochs= 75)    #N_EPOCHS

  scores = np.zeros((len(x_test),))
  observed_data = x_train_task
  for t_ind in range(num_trans):
    observed_data_transformed = data_each_transformation[t_ind]
    observed_data_autoencoded = autoencoder.predict(observed_data_transformed)
    observed_dirichlet = cls.predict(observed_data_autoencoded)

    vect_alpha_0 = 0.05*np.ones(observed_dirichlet.shape[1])
    alpha_0 = 0.5
    beta_0 = 0.4
    lamda_0 = 0.5

    mle_vect_alpha_t,mle_alpha_t,mle_beta_t,mle_lamda_t = fixed_point_ln_beta_louiville_mle(vect_alpha_0, alpha_0, beta_0,lamda_0,observed_dirichlet,max_iter=50)
    mle_vect_alpha_t=np.nan_to_num(mle_vect_alpha_t, copy=True, nan=1, posinf=10**10, neginf=-10*10)
    mle_alpha_t= np.nan_to_num(mle_alpha_t, copy=True, nan=10**3, posinf=10**10, neginf=-10*10)
    mle_beta_t= np.nan_to_num(mle_beta_t, copy=True, nan=10**3, posinf=10**10, neginf=-10**10)
    mle_lamda_t= np.nan_to_num(mle_lamda_t, copy=True, nan=10**3, posinf=10**10, neginf=-10**10)

    x_test_p_transformed = transformations_list[t_ind].predict(x_test)
    x_test_p_autoencoded = autoencoder.predict(x_test_p_transformed)
    x_test_p = cls.predict(x_test_p_autoencoded)
    x_test_p = np.nan_to_num(x_test_p, copy=True, nan=0.0001, posinf=None, neginf=None)
    scores += lnbl_normality_score(mle_vect_alpha_t,mle_alpha_t,mle_beta_t,mle_lamda_t, x_test_p)

  scores /= num_trans
  labels = y_test.flatten() == single_class_ind
  roc_pr_curve_data(scores, labels)

In [None]:
for class_idx in range(2): #We have only two classes
  experiment(class_idx)

Class  0
Start of transformations for class 0
transformation  0
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
transformation  1
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
transformation  2
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
transformation  3
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
transformation  4
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
transformation  5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
transformation  6
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
transformation  7
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
transformation  8
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
transformation  9
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
End of transformations for class 0
Start of Encoding for class 0
Epoch 1/3
Epoch 2/3
Epoch 3/3
End of Encoding for class 0
Epoch 1/75
Epoch 2/75
Epoch 3/75
Epoch 4/75
Epoch 5/75
Epoch 6/75
Epoch 7/75
Epoch 8/75
Epoch 9/75
Epoch 10/75
Epoch 11/75
Epoch 12/75
Epoch 13/75
Epoc