# Neural Network Training
We will now use the training and testing datasets to train the nerual networks. We will once again look at the code for approach 3 here. For the HPC code, please look at 03-hpc.py.

In [1]:
#!pip3 install tensorflow keras

In [2]:
from keras.models import Sequential
from keras.layers import Dense
import numpy as np
import pandas as pd
import time
import statistics
import matplotlib.pyplot as plt
import pickle
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix

In [3]:
test = pd.read_csv("../data/processed/test2.csv").drop(columns = ['Unnamed: 0'])
train = pd.read_csv("../data/processed/train2.csv").drop(columns = ['Unnamed: 0'])

Features is the number of descriptive features in the datasets.

In [4]:
#used for approach 1 and 2
features = train.shape[1] -1

In [None]:
features = train.shape[1] -5

## Create Models
We will now create the models that we will train. As they each follow a very similar pattern, we will use a function to define them. For each model we will use a dense sequential nerual net with either 1, 3, 5 or 10 hidden layers, each with the activation function of tanh, RELU or Swish. The input and hidden layers each have the `features` number of nodes. Then for the output layer we have 5 nodes (one for each of the classifications) and a sigmoid function to allow us to classify the data. 

We have used binary crossentropy allows us to minimise the error when classifying the attacks and we are using the Adam optimiser as this is a good and commonly optimiser. 

In [5]:
def create_model(features,layers,activation):
    model = Sequential()
    model.add(Dense(features, input_dim=features, activation=activation))
    for i in range(layers):
        model.add(Dense(features, activation=activation))
    model.add(Dense(5, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam')
    return model

We will now use this function to create the different models that we will test.

In [6]:
model1_tanh = create_model(features,1,'tanh')
model1_relu = create_model(features,1,'relu')
model1_swish = create_model(features,1,'swish')
model3_tanh = create_model(features,3,'tanh')
model3_relu = create_model(features,3,'relu')
model3_swish = create_model(features,3,'swish')
model5_tanh = create_model(features,5,'tanh')
model5_relu = create_model(features,5,'relu')
model5_swish = create_model(features,5,'swish')
model10_tanh = create_model(features,10,'tanh')
model10_relu = create_model(features,10,'relu')
model10_swish = create_model(features,10,'swish')

2022-03-07 20:11:06.257076: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-03-07 20:11:06.257123: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-03-07 20:11:06.257152: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (c-VirtualBox): /proc/driver/nvidia/version does not exist
2022-03-07 20:11:06.257511: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Train models
We will now train the models that we have created. To do this we will train each model and after each epoch we will test the model using the test dataset and then will stop training the model if the model does worse than in the previous epoch. This approach allows us to track the progress of the model throughout the training process, however it does not leave any scope for finding it's way out of a local minima. We decided that this was a worthy trade off as we would be training many models so we would have time constraints, and we are also not neccesarily looking to find a global minima as we are comparing the models under the same circumstances. The reasons that we are saving the model is for the extension of this project. Here we have to predict the model separately due to the different classifications and then calculate the accuracy and AUC using this prediction. 

In [None]:
def train_model(train,test,model,num):
    #separate the test and train datasets 
    train_X = train.drop(['normal','dos','u2r','r2l','probe'],axis=1)
    test_X = train.drop(['normal','dos','u2r','r2l','probe'],axis=1)
    train_Y = train.loc[:,['normal','dos','u2r','r2l','probe']]
    test_Y = train.loc[:,['normal','dos','u2r','r2l','probe']]
    old_score = 0 #used to track the AUC score of the last epoch
    #initialise the model metrics
    test_score_acc=[]
    train_score_acc=[]
    test_score_auc=[]
    train_score_auc=[]
    model_time = []
    test_norm_mat=[]
    test_dos_mat=[]
    test_u2r_mat=[]
    test_r2l_mat=[]
    test_probe_mat=[]
    epoch = 0
    while (epoch < 2) or (test_score_auc[epoch-1] > test_score_auc[epoch-2]): #repeat until the test AUC score of the latest iteration is lower than the previous epoch
        model_time1 = time.perf_counter() #check the time
        model.fit(train_X,train_Y,epochs=1,batch_size=128)
        model_time2 = time.perf_counter() #check the time
        model_time.append(model_time2 - model_time1) #calculates the difference in the times to see how long the model took to train
        yhat = model.predict(train_X) #predicts the classification of the train dataset
        yhat = yhat.round() #round to classify the data
        train_score_acc.append(accuracy_score(train_Y,yhat)) #calculate accuracy of the prediction
        train_score_auc.append(roc_auc_score(train_Y,yhat)) #calculate AUC of the prediction
        yhat = model.predict(test_X) #predicts the classification of the test dataset
        yhat = yhat.round() #round to classify the data
        yhat1=[[] for _ in range(5)] #create a matrix reformat the prediction
        #reformat the model prediction so we can separate the attack types
        for i in range(len(yhat)): 
            for j in range(5):
                yhat1[j].append(yhat[i][j])
        test_score_acc.append(accuracy_score(test_Y,yhat)) #calculate accuracy of the prediction
        test_score_auc.append(roc_auc_score(test_Y,yhat)) #calculate AUC of the prediction
        #calculate the confusion matrix for each of the classifications
        test_norm_mat=confusion_matrix(test_Y['normal'],yhat1[0])
        test_dos_mat=confusion_matrix(test_Y['dos'],yhat1[1])
        test_u2r_mat=confusion_matrix(test_Y['u2r'],yhat1[2])
        test_r2l_mat=confusion_matrix(test_Y['r2l'],yhat1[3])
        test_probe_mat=confusion_matrix(test_Y['probe'],yhat1[4])
        epoch = epoch + 1
    #manipulate the model times to useful metrics
    avg_time = statistics.mean(model_time[:-1])
    total_time=sum(model_time[:-1])
    model.save('model'+str(num)) #save models for the extension
    return (test_score_acc[:-1], train_score_acc[:-1], test_score_auc[:-1], train_score_auc[:-1], avg_time, total_time, test_norm_mat[:-1], test_dos_mat[:-1], test_u2r_mat[:-1], test_r2l_mat[:-1], test_probe_mat[:-1])

We now need to call the training function on each of the models and then save the metrics so that we can evaluate the results.

In [None]:
score=[]
score.append(train_model(train,test,model1_tanh,1))
score.append(train_model(train,test,model1_relu,2))
score.append(train_model(train,test,model1_swish,3))
score.append(train_model(train,test,model3_tanh,4))
score.append(train_model(train,test,model3_relu,5))
score.append(train_model(train,test,model3_swish,6))
score.append(train_model(train,test,model5_tanh,7))
score.append(train_model(train,test,model5_relu,8))
score.append(train_model(train,test,model5_swish,9))
score.append(train_model(train,test,model10_tanh,10))
score.append(train_model(train,test,model10_relu,11))
score.append(train_model(train,test,model10_swish,12))
with open('/user/work/zg18997/scores2','wb') as f:
	pickle.dump(score, f)