## CICIDS 2017
Implementación de los IDS separadamente para cada ataque usando algoritmos de aprendizaje no supervisado

In [None]:
## NO SUPERVISADO: K-MEDIAS

from sklearn import metrics


from sklearn.metrics import average_precision_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics.cluster import completeness_score

#Añadimos nuevos algoritmos de clasificación (no supervisado):
from sklearn.ensemble import IsolationForest
from sklearn.cluster import KMeans

from sklearn.cluster import AgglomerativeClustering
import matplotlib as mpl
import math

from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

from sklearn.preprocessing import MinMaxScaler  #Escalar
from sklearn.preprocessing import StandardScaler #Estandarizar
from sklearn.preprocessing import Normalizer #Normalizar

import matplotlib.pyplot as plt
import sys
import numpy as np
np.set_printoptions(threshold=50)
#%matplotlib inline
import os
import pandas as pd
import csv
import time
import warnings
import math
warnings.filterwarnings("ignore")

csv_files=os.listdir("attacks")# CSV files names: #The names of the files in the attacks folder are taken and assigned to a list (csv_files).
path=".\\attacks\\"
repetition=10




# the features to be used for each attack type is defined in a dictionary(features).
# the first 4 of the features created by the file "04_1_feature_selection_for_attack_files.py" are used here.
features={"Bot":["Bwd Packet Length Mean","Flow IAT Max","Flow Duration","Flow IAT Mean","Label"],
"DDoS":["Bwd Packet Length Std","Total Backward Packets","Fwd IAT Total","Total Length of Fwd Packets","Label"],
"DoS GoldenEye":["Flow IAT Max","Bwd Packet Length Std","Flow IAT Min","Total Backward Packets","Label"],
"DoS Hulk":["Bwd Packet Length Std","Fwd Packet Length Std","Fwd Packet Length Max","Flow IAT Min","Label"],
"DoS Slowhttptest":["Flow IAT Mean","Fwd Packet Length Min","Fwd Packet Length Std","Fwd Packet Length Mean","Label"],
"DoS slowloris":["Flow IAT Mean","Total Length of Bwd Packets","Bwd Packet Length Mean","Total Fwd Packets","Label"],
"FTP-Patator":["Fwd Packet Length Max","Fwd Packet Length Std","Fwd Packet Length Mean","Bwd Packet Length Mean","Label"],
"Heartbleed":["Bwd Packet Length Mean","Total Backward Packets","Total Length of Bwd Packets","Bwd Packet Length Max","Label"],
"Infiltration":["Fwd Packet Length Max","Fwd Packet Length Mean","Flow Duration","Total Length of Fwd Packets","Label"],
"PortScan":["Flow Bytes/s","Total Length of Fwd Packets","Flow IAT Max","Flow Duration","Label"],
"SSH-Patator":["Fwd Packet Length Max","Flow Duration","Flow IAT Max","Flow IAT Mean","Label"],
"Web Attack":["Bwd Packet Length Std","Total Length of Fwd Packets","Flow Bytes/s","Flow IAT Min","Label"]}

seconds=time.time()#time stamp for all processing time


print ('%-17s %-27s  %-15s %-15s %-15s %-15s %-15s' % ("File","ML algorithm","Accuracy","Precision", "Recall" , "F1-score","Time"))# print output header


for j in csv_files: #this loop runs on the list containing the filenames.Operations are repeated for all attack files
    a=[]
    
    feature_list=list(features[j[0:-4]])
    df=pd.read_csv(path+j,usecols=feature_list)#read an attack file.
    df=df.fillna(0)
    attack_or_not=[]
    for i in df["Label"]: #it changes the normal label to "1" and the attack tag to "0" for use in the machine learning algorithm
        
        if i =="BENIGN":
            attack_or_not.append(1)
        else:
            attack_or_not.append(0)
    df["Label"]=attack_or_not

    
    y = df["Label"] #this section separates the label and the data into two separate pieces, as Label=y Data=X 
    del df["Label"]
    feature_list.remove('Label')
    X = df[feature_list]
    
    precision=[]
    recall=[]
    f1=[]
    accuracy=[]
    t_time=[]
    
    for i in range(repetition): # This loop allows cross-validation and machine learning algorithm to be repeated 10 times
        #for i in range(repetition): # This loop allows cross-validation and machine learning algorithm to be repeated 10 times
        second=time.time()#time stamp for processing time

        
        scaler = StandardScaler()
        X = scaler.fit_transform(X)

        # cross-validation
        X_train, X_test, y_train, y_test = train_test_split(X, y,#  data (X) and labels (y) are divided into 2 parts to be sent to the machine learning algorithm (80% train,%20 test). 
            test_size = 0.20, random_state = repetition)#  So, in total there are 4 tracks: training data(X_train), training tag (y_train), test data(X_test) and test tag(y_test).

        #print("  Numero de clusters: %d\n" % (2))
        km = KMeans(init='random', n_clusters=2, random_state=333)
        km.fit(X_train)     
        predict = km.predict(X_test)

        #Para poder identificar qué cluster corresponde
        sum0 = sum(1 for p in predict if p == 0)
        sum1 = sum(1 for p in predict if p == 1)
        if(sum0 > sum1):
            predict = predict + 1
            predict = np.where(predict == 2, 0, predict)


        f_1=f1_score(y_test, predict, average='macro')
        pr=precision_score(y_test, predict, average='macro')
        rc=recall_score(y_test, predict, average='macro')
        score = metrics.accuracy_score(y_test, predict)

        precision.append(float(pr))
        recall.append(float(rc))
        f1.append(float(f_1))
        accuracy.append(score)
        t_time.append(float((time.time()-second)))

    print ('%-17s %-27s  %-15s %-15s %-15s %-15s %-15s' % (j[0:-4],"K-Means",str(round(np.mean(accuracy),2)),str(round(np.mean(precision),2)), 
            str(round(np.mean(recall),2)),str(round(np.mean(f1),2)),str(round(np.mean(t_time),4))))#the result of the ten repetitions is printed on the screen.





    #print("\n------------------------------------------------------------------------------------------------------\n\n")
    
print("mission accomplished!")
print("Total operation time: = ",time.time()- seconds ,"seconds")

File              ML algorithm                 Accuracy        Precision       Recall          F1-score        Time           
Bot               K-Means                      0.64            0.34            0.45            0.39            0.0394         
DDoS              K-Means                      0.77            0.73            0.69            0.7             0.22           
DoS GoldenEye     K-Means                      0.73            0.7             0.58            0.57            0.0541         
DoS Hulk          K-Means                      0.85            0.82            0.8             0.81            1.7521         
DoS Slowhttptest  K-Means                      0.72            0.69            0.52            0.47            0.0349         
DoS slowloris     K-Means                      0.66            0.34            0.47            0.4             0.0434         
FTP-Patator       K-Means                      0.64            0.34            0.46            0.39            

In [None]:
## NO SUPERVISADO: K-MEDOIDS

from sklearn import metrics


from sklearn.metrics import average_precision_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics.cluster import completeness_score

#Añadimos nuevos algoritmos de clasificación (no supervisado):
from sklearn_extra.cluster import KMedoids
from sklearn.cluster import AgglomerativeClustering
import matplotlib as mpl
import math

from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

from sklearn.preprocessing import MinMaxScaler  #Escalar
from sklearn.preprocessing import StandardScaler #Estandarizar
from sklearn.preprocessing import Normalizer #Normalizar

import matplotlib.pyplot as plt
import sys
import numpy as np
np.set_printoptions(threshold=50)
#%matplotlib inline
import os
import pandas as pd
import csv
import time
import warnings
import math
warnings.filterwarnings("ignore")

csv_files=os.listdir("attacks")# CSV files names: #The names of the files in the attacks folder are taken and assigned to a list (csv_files).
path=".\\attacks\\"
repetition=10




# the features to be used for each attack type is defined in a dictionary(features).
# the first 4 of the features created by the file "04_1_feature_selection_for_attack_files.py" are used here.
features={"Bot":["Bwd Packet Length Mean","Flow IAT Max","Flow Duration","Flow IAT Mean","Label"],
"DDoS":["Bwd Packet Length Std","Total Backward Packets","Fwd IAT Total","Total Length of Fwd Packets","Label"],
"DoS GoldenEye":["Flow IAT Max","Bwd Packet Length Std","Flow IAT Min","Total Backward Packets","Label"],
"DoS Hulk":["Bwd Packet Length Std","Fwd Packet Length Std","Fwd Packet Length Max","Flow IAT Min","Label"],
"DoS Slowhttptest":["Flow IAT Mean","Fwd Packet Length Min","Fwd Packet Length Std","Fwd Packet Length Mean","Label"],
"DoS slowloris":["Flow IAT Mean","Total Length of Bwd Packets","Bwd Packet Length Mean","Total Fwd Packets","Label"],
"FTP-Patator":["Fwd Packet Length Max","Fwd Packet Length Std","Fwd Packet Length Mean","Bwd Packet Length Mean","Label"],
"Heartbleed":["Bwd Packet Length Mean","Total Backward Packets","Total Length of Bwd Packets","Bwd Packet Length Max","Label"],
"Infiltration":["Fwd Packet Length Max","Fwd Packet Length Mean","Flow Duration","Total Length of Fwd Packets","Label"],
"PortScan":["Flow Bytes/s","Total Length of Fwd Packets","Flow IAT Max","Flow Duration","Label"],
"SSH-Patator":["Fwd Packet Length Max","Flow Duration","Flow IAT Max","Flow IAT Mean","Label"],
"Web Attack":["Bwd Packet Length Std","Total Length of Fwd Packets","Flow Bytes/s","Flow IAT Min","Label"]}

seconds=time.time()#time stamp for all processing time

print ('%-17s %-27s  %-15s %-15s %-15s %-15s %-15s' % ("File","ML algorithm","Accuracy","Precision", "Recall" , "F1-score","Time"))# print output header

for j in csv_files: #this loop runs on the list containing the filenames.Operations are repeated for all attack files
    a=[]
    
    feature_list=list(features[j[0:-4]])
    df=pd.read_csv(path+j,usecols=feature_list)#read an attack file.
    df=df.fillna(0)
    numRows = min(df.shape[0]-1, 10000) #Para que no explote el consumo de memoria
    df = df.sample(numRows)
    attack_or_not=[]
    for i in df["Label"]: #it changes the normal label to "1" and the attack tag to "0" for use in the machine learning algorithm
        
        if i =="BENIGN":
            attack_or_not.append(1)
        else:
            attack_or_not.append(0)
    df["Label"]=attack_or_not

    
    y = df["Label"] #this section separates the label and the data into two separate pieces, as Label=y Data=X 
    del df["Label"]
    feature_list.remove('Label')
    X = df[feature_list]
    
    precision=[]
    recall=[]
    f1=[]
    accuracy=[]
    t_time=[]
    
    for i in range(repetition): # This loop allows cross-validation and machine learning algorithm to be repeated 10 times
        #for i in range(repetition): # This loop allows cross-validation and machine learning algorithm to be repeated 10 times
        second=time.time()#time stamp for processing time

        
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        
        # cross-validation
        X_train, X_test, y_train, y_test = train_test_split(X, y,#  data (X) and labels (y) are divided into 2 parts to be sent to the machine learning algorithm (80% train,%20 test). 
            test_size = 0.20, random_state = repetition)#  So, in total there are 4 tracks: training data(X_train), training tag (y_train), test data(X_test) and test tag(y_test).

        #print("  Numero de clusters: %d\n" % (2))
        km = KMedoids(init='random', n_clusters=2, random_state=333)
        km.fit(X_train)     
        predict = km.predict(X_test)

        #Para poder identificar qué cluster corresponde
        sum0 = sum(1 for p in predict if p == 0)
        sum1 = sum(1 for p in predict if p == 1)
        if(sum0 > sum1):
            predict = predict + 1
            predict = np.where(predict == 2, 0, predict)


        f_1=f1_score(y_test, predict, average='macro')
        pr=precision_score(y_test, predict, average='macro')
        rc=recall_score(y_test, predict, average='macro')
        score = metrics.accuracy_score(y_test, predict)

        precision.append(float(pr))
        recall.append(float(rc))
        f1.append(float(f_1))
        accuracy.append(score)
        t_time.append(float((time.time()-second)))

    print ('%-17s %-27s  %-15s %-15s %-15s %-15s %-15s' % (j[0:-4],"K-Medoids",str(round(np.mean(accuracy),2)),str(round(np.mean(precision),2)), 
            str(round(np.mean(recall),2)),str(round(np.mean(f1),2)),str(round(np.mean(t_time),4))))#the result of the ten repetitions is printed on the screen.

    #print("\n------------------------------------------------------------------------------------------------------\n\n")
    
print("mission accomplished!")
print("Total operation time: = ",time.time()- seconds ,"seconds")


File              ML algorithm                 Accuracy        Precision       Recall          F1-score        Time           
Bot               K-Medoids                    0.3             0.25            0.21            0.23            2.3309         
DDoS              K-Medoids                    0.74            0.7             0.65            0.66            7.1836         
DoS GoldenEye     K-Medoids                    0.86            0.87            0.81            0.83            10.5523        
DoS Hulk          K-Medoids                    0.84            0.8             0.78            0.79            10.2076        
DoS Slowhttptest  K-Medoids                    0.92            0.92            0.88            0.9             6.0754         
DoS slowloris     K-Medoids                    0.61            0.33            0.44            0.38            5.6321         
FTP-Patator       K-Medoids                    0.56            0.32            0.4             0.36            

In [None]:
#NO SUPERVISADO: Isolation Forest

from sklearn import metrics


from sklearn.metrics import average_precision_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics.cluster import completeness_score

#Añadimos nuevos algoritmos de clasificación (no supervisado):
from sklearn.ensemble import IsolationForest
from sklearn.cluster import KMeans

from sklearn.cluster import AgglomerativeClustering
import matplotlib as mpl
import math

from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

from sklearn.preprocessing import MinMaxScaler  #Escalar
from sklearn.preprocessing import StandardScaler #Estandarizar
from sklearn.preprocessing import Normalizer #Normalizar

import matplotlib.pyplot as plt
import sys
import numpy as np
np.set_printoptions(threshold=sys.maxsize)
#%matplotlib inline
import os
import pandas as pd
import csv
import time
import warnings
import math
warnings.filterwarnings("ignore")

csv_files=os.listdir("attacks")# CSV files names: #The names of the files in the attacks folder are taken and assigned to a list (csv_files).
path=".\\attacks\\"
repetition=10

ml_list={
"Isolation Forest": IsolationForest(random_state=123, n_estimators = 30, contamination = 0.3, max_samples = "auto") #Sabemos que el 30% de los datos corresponden con ataques => contamination=0.3
}


# the features to be used for each attack type is defined in a dictionary(features).
# the first 4 of the features created by the file "04_1_feature_selection_for_attack_files.py" are used here.
features={"Bot":["Bwd Packet Length Mean","Flow IAT Max","Flow Duration","Flow IAT Mean","Label"],
"DDoS":["Bwd Packet Length Std","Total Backward Packets","Fwd IAT Total","Total Length of Fwd Packets","Label"],
"DoS GoldenEye":["Flow IAT Max","Bwd Packet Length Std","Flow IAT Min","Total Backward Packets","Label"],
"DoS Hulk":["Bwd Packet Length Std","Fwd Packet Length Std","Fwd Packet Length Max","Flow IAT Min","Label"],
"DoS Slowhttptest":["Flow IAT Mean","Fwd Packet Length Min","Fwd Packet Length Std","Fwd Packet Length Mean","Label"],
"DoS slowloris":["Flow IAT Mean","Total Length of Bwd Packets","Bwd Packet Length Mean","Total Fwd Packets","Label"],
"FTP-Patator":["Fwd Packet Length Max","Fwd Packet Length Std","Fwd Packet Length Mean","Bwd Packet Length Mean","Label"],
"Heartbleed":["Bwd Packet Length Mean","Total Backward Packets","Total Length of Bwd Packets","Bwd Packet Length Max","Label"],
"Infiltration":["Fwd Packet Length Max","Fwd Packet Length Mean","Flow Duration","Total Length of Fwd Packets","Label"],
"PortScan":["Flow Bytes/s","Total Length of Fwd Packets","Flow IAT Max","Flow Duration","Label"],
"SSH-Patator":["Fwd Packet Length Max","Flow Duration","Flow IAT Max","Flow IAT Mean","Label"],
"Web Attack":["Bwd Packet Length Std","Total Length of Fwd Packets","Flow Bytes/s","Flow IAT Min","Label"]}


seconds=time.time()#time stamp for all processing time

print ('%-17s %-27s  %-15s %-15s %-15s %-15s %-15s' % ("File","ML algorithm","Accuracy","Precision", "Recall" , "F1-score","Time"))# print output header

for j in csv_files: #this loop runs on the list containing the filenames.Operations are repeated for all attack files
    a=[]
    
    feature_list=list(features[j[0:-4]])
    df=pd.read_csv(path+j,usecols=feature_list)#read an attack file.
    df=df.fillna(0)
    attack_or_not=[]
    for i in df["Label"]: #it changes the normal label to "1" and the attack tag to "0" for use in the machine learning algorithm
        
        if i =="BENIGN":
            attack_or_not.append(1)
        else:
            attack_or_not.append(0)
    df["Label"]=attack_or_not

    
    y = df["Label"] #this section separates the label and the data into two separate pieces, as Label=y Data=X 
    del df["Label"]
    feature_list.remove('Label')
    X = df[feature_list]
    

    for ii in ml_list:
        precision=[]
        recall=[]
        f1=[]
        accuracy=[]
        t_time=[]
        
        for i in range(repetition): # This loop allows cross-validation and machine learning algorithm to be repeated 10 times
            #for i in range(repetition): # This loop allows cross-validation and machine learning algorithm to be repeated 10 times
            second=time.time()#time stamp for processing time

            # cross-validation
            scaler = StandardScaler()
            X = scaler.fit_transform(X)
            X_train, X_test, y_train, y_test = train_test_split(X, y,#  data (X) and labels (y) are divided into 2 parts to be sent to the machine learning algorithm (80% train,%20 test). 
                test_size = 0.20, random_state = repetition)#  So, in total there are 4 tracks: training data(X_train), training tag (y_train), test data(X_test) and test tag(y_test).

            #scaler = MinMaxScaler()
            #scaler = StandardScaler()
            #scaler = Normalizer()
            #X_train = scaler.fit_transform(X_train)
            #X_test = scaler.transform(X_test)


            clf = ml_list[ii]#choose algorithm from ml_list dictionary                                                                          
            clf.fit(X_train)
            predict =clf.predict(X_test)

            predict = np.where(predict == -1, 0, predict) #El resultado de isolation forest son 1 para los normales y -1 para anomalías

            #makes "classification report" and assigns the precision, f-measure, and recall values.s.    
            f_1=f1_score(y_test, predict, average='macro')
            pr=precision_score(y_test, predict, average='macro')
            rc=recall_score(y_test, predict, average='macro')
            score = metrics.accuracy_score(y_test, predict)

            precision.append(float(pr))
            recall.append(float(rc))
            f1.append(float(f_1))
            accuracy.append(score)
            t_time.append(float((time.time()-second)))

        print ('%-17s %-27s  %-15s %-15s %-15s %-15s %-15s' % (j[0:-4],ii,str(round(np.mean(accuracy),2)),str(round(np.mean(precision),2)), 
                str(round(np.mean(recall),2)),str(round(np.mean(f1),2)),str(round(np.mean(t_time),4))))#the result of the ten repetitions is printed on the screen.


    #print("\n------------------------------------------------------------------------------------------------------\n\n")
    
print("mission accomplished!")
print("Total operation time: = ",time.time()- seconds ,"seconds")

File              ML algorithm                 Accuracy        Precision       Recall          F1-score        Time           
Bot               Isolation Forest             0.54            0.44            0.44            0.44            0.0721         
DDoS              Isolation Forest             0.67            0.61            0.61            0.61            0.3327         
DoS GoldenEye     Isolation Forest             0.82            0.79            0.79            0.79            0.1249         
DoS Hulk          Isolation Forest             0.64            0.57            0.57            0.57            2.1033         
DoS Slowhttptest  Isolation Forest             0.66            0.59            0.59            0.59            0.1066         
DoS slowloris     Isolation Forest             0.6             0.52            0.52            0.52            0.0949         
FTP-Patator       Isolation Forest             0.4             0.28            0.29            0.28            

In [None]:
#NO SUPERVISADO: Local Outlier Factor

from sklearn import metrics


from sklearn.metrics import average_precision_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics.cluster import completeness_score

#Añadimos nuevos algoritmos de clasificación (no supervisado):
from sklearn.neighbors import LocalOutlierFactor

from sklearn.cluster import AgglomerativeClustering
import matplotlib as mpl
import math

from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score

from sklearn.preprocessing import MinMaxScaler  #Escalar
from sklearn.preprocessing import StandardScaler #Estandarizar
from sklearn.preprocessing import Normalizer #Normalizar

import matplotlib.pyplot as plt
import sys
import numpy as np
np.set_printoptions(threshold=sys.maxsize)
#%matplotlib inline
import os
import pandas as pd
import csv
import time
import warnings
import math
warnings.filterwarnings("ignore")

csv_files=os.listdir("attacks")# CSV files names: #The names of the files in the attacks folder are taken and assigned to a list (csv_files).
path=".\\attacks\\"
repetition=10

ml_list={
"LOF": LocalOutlierFactor(n_neighbors=25, contamination=0.2) #Sabemos que el 30% de los datos corresponden con ataques => contamination=0.3
}


# the features to be used for each attack type is defined in a dictionary(features).
# the first 4 of the features created by the file "04_1_feature_selection_for_attack_files.py" are used here.
features={"Bot":["Bwd Packet Length Mean","Flow IAT Max","Flow Duration","Flow IAT Min","Label"],
"DDoS":["Bwd Packet Length Std","Total Backward Packets","Fwd IAT Total","Flow Duration","Label"],
"DoS GoldenEye":["Flow IAT Max","Bwd Packet Length Std","Flow IAT Min","Total Backward Packets","Label"],
"DoS Hulk":["Bwd Packet Length Std","Fwd Packet Length Std","Fwd Packet Length Max","Flow IAT Min","Label"],
"DoS Slowhttptest":["Flow IAT Mean","Fwd Packet Length Min","Bwd Packet Length Mean","Total Length of Bwd Packets","Label"],
"DoS slowloris":["Flow IAT Mean","Total Length of Bwd Packets","Bwd Packet Length Mean","Total Fwd Packets","Label"],
"FTP-Patator":["Fwd Packet Length Max","Fwd Packet Length Std","Fwd Packet Length Mean","Bwd Packet Length Std","Label"],
"Heartbleed":["Total Backward Packets","Fwd Packet Length Max","Flow IAT Min","Bwd Packet Length Max","Label"],
"Infiltration":["Fwd Packet Length Max","Fwd Packet Length Mean","Flow Duration","Total Length of Fwd Packets","Label"],
"PortScan":["Flow Bytes/s","Total Length of Fwd Packets","Fwd IAT Total","Flow Duration","Label"],
"SSH-Patator":["Fwd Packet Length Max","Flow Duration","Flow IAT Max","Total Length of Fwd Packets","Label"],
"Web Attack":["Bwd Packet Length Std","Total Length of Fwd Packets","Flow Bytes/s","Flow IAT Max","Label"]}

seconds=time.time()#time stamp for all processing time

print ('%-17s %-27s  %-15s %-15s %-15s %-15s %-15s' % ("File","ML algorithm","Accuracy","Precision", "Recall" , "F1-score","Time"))# print output header
for j in csv_files: #this loop runs on the list containing the filenames.Operations are repeated for all attack files
    a=[]
    
    feature_list=list(features[j[0:-4]])
    df=pd.read_csv(path+j,usecols=feature_list)#read an attack file.
    df=df.fillna(0)
    attack_or_not=[]
    for i in df["Label"]: #it changes the normal label to "1" and the attack tag to "0" for use in the machine learning algorithm
        
        if i =="BENIGN":
            attack_or_not.append(1)
        else:
            attack_or_not.append(0)
    df["Label"]=attack_or_not

    
    y = df["Label"] #this section separates the label and the data into two separate pieces, as Label=y Data=X 
    del df["Label"]
    feature_list.remove('Label')
    X = df[feature_list]
    

    for ii in ml_list:
        precision=[]
        recall=[]
        f1=[]
        accuracy=[]
        t_time=[]
        
        for i in range(repetition): # This loop allows cross-validation and machine learning algorithm to be repeated 10 times
            #for i in range(repetition): # This loop allows cross-validation and machine learning algorithm to be repeated 10 times
            second=time.time()#time stamp for processing time

            # cross-validation
            scaler = StandardScaler()
            X = scaler.fit_transform(X)
            X_train, X_test, y_train, y_test = train_test_split(X, y,#  data (X) and labels (y) are divided into 2 parts to be sent to the machine learning algorithm (80% train,%20 test). 
                test_size = 0.20, random_state = repetition)#  So, in total there are 4 tracks: training data(X_train), training tag (y_train), test data(X_test) and test tag(y_test).

            clf = ml_list[ii]#choose algorithm from ml_list dictionary                                                                          
            clf.fit(X_train)
            predict =clf.fit_predict(X_test)

            predict = np.where(predict == -1, 0, predict) #El resultado de LOF son 1 para los normales y -1 para anomalías

            #makes "classification report" and assigns the precision, f-measure, and recall values.s.    
            f_1=f1_score(y_test, predict, average='macro')
            pr=precision_score(y_test, predict, average='macro')
            rc=recall_score(y_test, predict, average='macro')
            score = metrics.accuracy_score(y_test, predict)

            precision.append(float(pr))
            recall.append(float(rc))
            f1.append(float(f_1))
            accuracy.append(score)
            t_time.append(float((time.time()-second)))

        print ('%-17s %-27s  %-15s %-15s %-15s %-15s %-15s' % (j[0:-4],ii,str(round(np.mean(accuracy),2)),str(round(np.mean(precision),2)), 
                str(round(np.mean(recall),2)),str(round(np.mean(f1),2)),str(round(np.mean(t_time),4))))#the result of the ten repetitions is printed on the screen


    #print("\n------------------------------------------------------------------------------------------------------\n\n")
    
print("mission accomplished!")
print("Total operation time: = ",time.time()- seconds ,"seconds")

File              ML algorithm                 Accuracy        Precision       Recall          F1-score        Time           
Bot               LOF                          0.57            0.42            0.44            0.42            0.0899         
DDoS              LOF                          0.6             0.47            0.48            0.47            3.9223         
DoS GoldenEye     LOF                          0.61            0.48            0.48            0.47            0.5521         
DoS Hulk          LOF                          0.63            0.5             0.5             0.5             100.5955       
DoS Slowhttptest  LOF                          0.57            0.41            0.43            0.42            0.4681         
DoS slowloris     LOF                          0.56            0.41            0.43            0.42            0.3131         
FTP-Patator       LOF                          0.61            0.49            0.49            0.48            