In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras import layers, losses



### Attempt of multiclass classification using autoencoders

The general principle is the following: There are several classes in my dataset, and I want to train autoencoder models to classify them. An autoencoder model is trained to reconstruct data from a specific class; at the end of the training, it will be able to do so with a small reconstruction error. However, it should have a larger error when reconstructing data from other classes. Therefore, when presented with a new data sample, the classification is done based on the class for which the reconstruction error is the smallest.

### Loading the dataset:

In [None]:
df = pd.read_csv("drive/MyDrive/Datasets/Final_Features.csv")
df.drop('Unnamed: 0', axis=1, inplace=True)
df['Attack_type'].value_counts()

7     1363998
4      121567
2       67939
11      50826
3       50062
13      50026
8       49933
1       48544
12      36807
0       24026
9       19977
14      15066
10       9689
5         853
6         358
Name: Attack_type, dtype: int64

There are 15 classes in this dataset: class 0 represents normal network traffic and the 14 others are classes of attacks

In [None]:
df.shape

(1909671, 17)

In [None]:
df_train, df_test = train_test_split(df, test_size = 0.1)
df=df_train
df.shape

(1718703, 17)

In [None]:
df['Attack_type'].value_counts()

7     1227512
4      109535
2       61217
11      45744
3       45117
13      45063
8       44943
1       43675
12      33126
0       21546
9       17961
14      13490
10       8709
5         759
6         306
Name: Attack_type, dtype: int64

In this notebook we try only with classes 0 to 5

### Class balancing using SMOTE

In [None]:
from imblearn.over_sampling import SMOTE
from collections import Counter
X = df.loc[:, df.columns != "Attack_type"]
y = df["Attack_type"]
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.05)
smote = SMOTE(sampling_strategy=dict({2:1500}), random_state=10)
X2, y2 = smote.fit_resample(X_train, y_train)
print(Counter(y2))

Counter({3: 141541, 1: 111323, 0: 12347, 4: 9353, 5: 7845, 2: 1500})


### Data Scaling:

In [None]:
# dropping the target variable: it should not be scaled
labels=df['Attack_type']
df.drop('Attack_type', axis=1, inplace=True)

In [None]:
# function to harmonize (scale) the values
def harmonize(data):
    m = data.values
    for i in range(0,len(m)):
        row_max = 1.*m[i].max()
        for j in range(0,len(m[i])):
            m[i][j]=1.*m[i][j]
            if row_max>0:
                m[i][j]=((m[i][j])/row_max)
    return pd.DataFrame(m)

In [None]:
df=harmonize(df)
df=df.join(labels)
df.shape

(1718703, 17)

### Splitting data for each class:

In [None]:
X2=X2.join(y2) #y2 is the target column after oversampling
X2=X2[X2['Attack_type']==2] # Isolating the data point with label 2
X_test=X_test.join(y_test)
X_test=X_test[X_test['Attack_type']==2]
print(X2.shape)
print(X_test.shape)
X2.head()

(1500, 10)
(25, 10)


Unnamed: 0,tcp.flags,tcp.time_delta,tcp.len,mqtt.kalive,mqtt.len,mqtt.msgid,mqtt.msgtype,mqtt.qos,mqtt.protoname-0,target
129,16,4e-06,0,0.0,0.0,0.0,0.0,0.0,1,2
652,24,3.2e-05,32760,0.0,16.0,0.0,3.0,0.0,1,2
734,16,5e-06,0,0.0,0.0,0.0,0.0,0.0,1,2
1297,24,4.4e-05,32760,0.0,16.0,0.0,3.0,0.0,1,2
2085,16,1.6e-05,0,0.0,0.0,0.0,0.0,0.0,1,2


In [None]:
df0 = df[df['Attack_type']==0] # isolating class 0
df0.shape

(19379, 17)

In [None]:
df1 = df[df['Attack_type']==1] # isolating class 1
df1.shape

(39297, 17)

In [None]:
df2 = df[df['Attack_type']==2]
df2.shape

(55110, 17)

In [None]:
df3 = df[df['Attack_type']==3]
df3.shape

(40621, 17)

In [None]:
df4 = df[df['Attack_type']==4]
df4.shape

(98581, 17)

In [None]:
df5 = df[df['Attack_type']==5]
df5.shape

(684, 17)

### Constructing the training and test sets for the encoders:

In [None]:
X0 = df0.loc[:, df.columns != "Attack_type"] # Isolating the input variables (X)
y0 = df0["Attack_type"] # Isolating the target variable (y)
X_train0, X_test0, y_train0, y_test0 = train_test_split(X0,y0, test_size=0.05)
print('Shape of X_train0:', X_train0.shape)
print('Shape of X_test0:', X_test0.shape)

Shape of X_train0: (18410, 16)
Shape of X_test0: (969, 16)


In [None]:
X1 = df1.loc[:, df.columns != "Attack_type"]
y1 = df1["Attack_type"]
X_train1, X_test1, y_train1, y_test1 = train_test_split(X1,y1, test_size=0.05)
print('Shape of X_train1:', X_train1.shape)
print('Shape of X_test1:', X_test1.shape)

Shape of X_train1: (37332, 16)
Shape of X_test1: (1965, 16)


In [None]:
X2 = df2.loc[:, df.columns != "Attack_type"]
y2 = df2["Attack_type"]
X_train2, X_test2, y_train2, y_test2 = train_test_split(X2,y2, test_size=0.05)
print('Shape of X_train2:', X_train2.shape)
print('Shape of X_test2:', X_test2.shape)

Shape of X_train2: (52354, 16)
Shape of X_test2: (2756, 16)


In [None]:
X3 = df3.loc[:, df.columns != "Attack_type"]
y3 = df3["Attack_type"]
X_train3, X_test3, y_train3, y_test3 = train_test_split(X3,y3, test_size=0.05)
print('Shape of X_train3:', X_train3.shape)
print('Shape of X_test3:', X_test3.shape)

Shape of X_train3: (38589, 16)
Shape of X_test3: (2032, 16)


In [None]:
X4 = df4.loc[:, df.columns != "Attack_type"]
y4 = df4["Attack_type"]
X_train4, X_test4, y_train4, y_test4 = train_test_split(X4,y4, test_size=0.05)
print('Shape of X_train4:', X_train4.shape)
print('Shape of X_test4:', X_test4.shape)

Shape of X_train4: (93651, 16)
Shape of X_test4: (4930, 16)


In [None]:
X5 = df5.loc[:, df.columns != "Attack_type"]
y5 = df5["Attack_type"]
X_train5, X_test5, y_train5, y_test5 = train_test_split(X5,y5, test_size=0.05)
print('Shape of X_train5:', X_train5.shape)
print('Shape of X_test5:', X_test5.shape)

Shape of X_train5: (649, 16)
Shape of X_test5: (35, 16)


### Constructing the autoencoders:

In [None]:
from keras.layers import Input, Dense

### The following code section builds an autoencoder networks of 14 layers, each
### layer having respectively 17, 15, 13, 11, 9, 7, 5, 7, 9, 11, 13, 15 and 17 layers

input_dim = X0.shape[1] # Input dimension equal to the number of input variables/predictors: 17
input_layer = Input(shape=(input_dim, ))
nodes_number = 15
while (nodes_number>=15):
    if nodes_number == 15:
        layer = Dense(nodes_number, activation='tanh')(input_layer)
    else:
        layer = Dense(nodes_number, activation='tanh')(layer)
    nodes_number=nodes_number-2
nodes_number=5
while(nodes_number<=15):
    layer = Dense(nodes_number, activation='tanh')(layer)
    nodes_number=nodes_number+2
output_layer = Dense(input_dim, activation='tanh')(layer)

In [None]:
# Building an autoencoder model for each class: each model will specialize in reconstructing
# a specific class of data
autoencoder0 = Model(inputs=input_layer, outputs=output_layer)
autoencoder1 = Model(inputs=input_layer, outputs=output_layer)
autoencoder2 = Model(inputs=input_layer, outputs=output_layer)
autoencoder3 = Model(inputs=input_layer, outputs=output_layer)
autoencoder4 = Model(inputs=input_layer, outputs=output_layer)
autoencoder5 = Model(inputs=input_layer, outputs=output_layer)

In [None]:
class myCallback(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('val_accuracy') >= 0.99):
      print("\Accuracy is higher than 0.99 so cancelling training!")
      self.model.stop_training = True

# Defining callbacks to control the training: when the model reaches a desired level
# of performance (when the reconstruction loss drops under a defined threshold), the training stops
class myCallback2(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs={}):
    if(logs.get('val_loss') <= 1.5):
      print("\Loss is lower than 1.5 so cancelling training!")
      self.model.stop_training = True
cb = myCallback()
cb2=myCallback2()

### Training and testing the autoencoder models:

In [None]:
autoencoder0.compile(optimizer='adam', loss=losses.MeanSquaredError(), metrics=['accuracy'])
history0=autoencoder0.fit(X_train0, X_train0,
                epochs=10,
                callbacks=[cb2],
                shuffle=True,
                validation_data=(X_test0, X_test0))

Epoch 1/10


In [None]:
autoencoder1.compile(optimizer='adam', loss=losses.MeanSquaredError(), metrics=['accuracy'])
history1=autoencoder1.fit(X_train1, X_train1,
                epochs=10,
                callbacks=[cb2],
                shuffle=True,
                validation_data=(X_test1, X_test1))

Epoch 1/10


In [None]:
autoencoder2.compile(optimizer='adam', loss=losses.MeanSquaredError(), metrics=['accuracy'])
history2=autoencoder2.fit(X_train2, X_train2,
                epochs=10,
                callbacks=[cb2],
                shuffle=True,
                validation_data=(X_test2, X_test2))

Epoch 1/10


In [None]:
autoencoder3.compile(optimizer='adam', loss=losses.MeanSquaredError(), metrics=['accuracy'])
history3=autoencoder3.fit(X_train3, X_train3,
                epochs=10,
                callbacks=[cb2],
                shuffle=True,
                validation_data=(X_test3, X_test3))

Epoch 1/10


In [None]:
autoencoder4.compile(optimizer='adam', loss=losses.MeanSquaredError(), metrics=['accuracy'])
history4=autoencoder4.fit(X_train4, X_train4,
                epochs=10,
                callbacks=[cb2],
                shuffle=True,
                validation_data=(X_test4, X_test4))

Epoch 1/10


In [None]:
autoencoder5.compile(optimizer='adam', loss=losses.MeanSquaredError(), metrics=['accuracy'])
history5=autoencoder5.fit(X_train5, X_train5,
                epochs=10,
                callbacks=[cb],
                shuffle=True,
                validation_data=(X_test5, X_test5))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Classification

As said previously, when a data point X is received, all the 6 autoencoders try to reconstruct it. We operate under the assumption that the model that performs with the lowest reconstruction error should have been trained on the class to which the data point belongs. This is how we classify X.

In [None]:
def classify(X):
  loss0=autoencoder0.evaluate(autoencoder0.predict([X]), X)[0]
  loss1=autoencoder1.evaluate(autoencoder1.predict([X]), X)[0]
  loss2=autoencoder2.evaluate(autoencoder2.predict([X]), X)[0]
  loss3=autoencoder3.evaluate(autoencoder3.predict([X]), X)[0]
  loss4=autoencoder4.evaluate(autoencoder4.predict([X]), X)[0]
  loss5=autoencoder5.evaluate(autoencoder5.predict([X]), X)[0]
  losses=[loss0,loss1,loss2,loss3,loss4,loss5]
  return pd.Series(losses).idxmin()
  #return losses

In [None]:
#def classify(X):
  #loss0=np.mean(np.power(X-autoencoder0.predict([X]), 2), axis=1)
  #print("numeric: ",pd.to_numeric(loss0))
  #loss1=np.mean(np.power(X-autoencoder1.predict([X]), 2), axis=1)
  #print(loss1)
  #loss2=np.mean(np.power(X-autoencoder2.predict([X]), 2), axis=1)
  #print(loss2)
  #loss3=np.mean(np.power(X-autoencoder3.predict([X]), 2), axis=1)
  #print(loss3)
  #loss4=np.mean(np.power(X-autoencoder4.predict([X]), 2), axis=1)
  #print(loss4)
  #loss5=np.mean(np.power(X-autoencoder5.predict([X]), 2), axis=1)
  #print(loss5)
  #losses=[loss0,loss1,loss2,loss3,loss4,loss5]
  #return pd.Series(losses).idxmin()
  #return losses

In [None]:
df1=df_test
df2=df1
l1=df1['Attack_type']
df1.drop('Attack_type', axis=1, inplace=True)
df1=harmonize(df1)

In [None]:
v1=df1[:1]
c=classify(v1)
#v1.head()



















### Metrics calculation:

In [None]:
def pred(X_test):
  preds=[]
  for e in X_test:
    preds.append(classify(e))
  return preds

In [None]:
df_test.values[0]

In [None]:
parameters = np.zeros([4,6]) # TP, FP, TN, FN
scores = np.zeros([3,6]) # Precision, Recall, F1


In [None]:
import time

answer=df_test['Attack_type']
df_test.drop('Attack_type', axis=1, inplace=True)
df_test=harmonize(df_test)
start_time=time.time()
preds=pred([df_test])
print("Prediction Time in seconds: ", time.time()-start_time)

In [None]:
len(preds)

In [None]:
answers=answer.tolist()
for i in range(6):
  for j in range(len(answers)):
    if answers[j]==i and preds[j]==i:
      parameters[0, i] = parameters[0, i]+1
    elif answers[j]==i and preds[j]!=i:
      parameters[3, i] =  parameters[3, i]+1
    elif answers[j]!=i and preds[j]==i:
      parameters[1, i] =  parameters[1, i]+1
    elif answers[j]!=i and preds[j]!=i:
      parameters[2, i] =  parameters[2, i]+1

In [None]:
for i in range(6):
  scores[0,i] = parameters[0,i]/(parameters[0,i]+parameters[1,i])
  scores[1,i] = parameters[0,i]/(parameters[0,i]+parameters[3,i])
  scores[2,i] = 2*scores[0,i]*scores[1,i]/(scores[0,i]+scores[1,i])

In [None]:
accuracy_score = (np.sum(scores[0])+np.sum(scores[1]))/(np.sum(scores[0])+np.sum(scores[1])+np.sum(scores[2])+np.sum(scores[3]))
precision_score = np.average(scores[0])
recall_score = np.average(scores[1])
f1_score = np.average(scores[2])
print("Accuracy score: ", accuracy_score)
print("Precision score: ", precision_score)
print("Recall score: ", recall_score)
print("F1 score: ", f1_score)

As said previously, this method did not work in this specific case because the autoencoders worked too well. It was expected that an autoencoder trained on a given class will perform poorly on all the others. However, this was not the case, as the models performed very well on all the classes, which made it impossible to implement an efficient classification system. The core principle behind this technique can however be valid in other cases.

Usually, in anomaly detection, there are only 2 classes: normal and abnormal, and the abnormal cases are minority. Thus, we build an autoencoder that learns to reconstruct normal data samples. In principle, the model will generally reconstruct new normal cases with a low error rate. However, if the reconstruction loss is above a defined threshold, we consider that the corresponding data point is not from the normal class and is therefore an abnomaly instance.

The code in this notebook can be easily adapted for this purpose, if applied to a dataset with 2 classes. Then, only 1 autoencoder model needs to be built.