In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
from sklearn import metrics
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import average_precision_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
import numpy as np
import pandas as pd
import warnings
import time

%matplotlib inline
warnings.filterwarnings("ignore")
seconds = time.time()

features = ["Flow IAT Min", "Bwd Pkt Len Mean", "Fwd Pkt Len Std", "Fwd IAT Tot", "Bwd Pkt Len Std", "Flow Byts/s",
            "Flow IAT Max", "Flow Duration", "Flow IAT Mean", "Flow IAT Std", "Fwd Pkt Len Max", "Flow Pkts/s",
            "Bwd Pkt Len Max", "Tot Bwd Pkts", "TotLen Fwd Pkts", "Tot Fwd Pkts", "TotLen Bwd Pkts", "Fwd Pkt Len Mean",
            "Bwd Pkt Len Min", "Fwd Pkt Len Min", "Label"]

df = pd.read_csv('/content/drive/MyDrive/ISU/all_data.csv', usecols=features)
df.fillna(0, inplace=True)
df.replace([np.inf, -np.inf], -1, inplace=True)

print('%-17s %-17s ' % ("Feature Number", "Feature"))

for i in range(len(features) - 1):
    print('%-17s %-17s' % (i + 1, features[i]))

print('\n\n\n')
attack_or_not = []

for i in df.iloc[:, -1]:
    if i == "Benign":
        attack_or_not.append(1)
    else:
        attack_or_not.append(0)

df.iloc[:, -1] = attack_or_not
y = df.iloc[:, -1].values
my_list = []
least = 0

ml_list = {"Naive Bayes": GaussianNB(),
           "QDA": QDA(),
           "MLP": MLPClassifier(hidden_layer_sizes=(13, 13, 13, 13, 13), max_iter=500)}

features.pop()
print('%-17s %-30s %-10s  %-10s %-15s ' % ("ML algorithm", "Feature Name", "F1-score", "Accuracy", "Feature List"))

for j in ml_list:
    my_list = []

    for i in features:
        my_list.append(i)
        X = df.loc[:, my_list].values
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)
        clf = ml_list[j]
        clf.fit(X_train, y_train)
        predict = clf.predict(X_test)
        f1 = clf.score(X_test, y_test)
        result = f1_score(y_test, predict, average='macro')
        accuracy = round(clf.score(X_test, y_test), 2)
        temp = "["

        for ii in my_list:
            temp += str(my_list.index(ii) + 1) + ", "
        if result >= least:
            least = result
            print('%-17s %-30s %-10s  %-10s %-15s %-15s ' % (
                j, i, result, accuracy, temp, "------> New feature found!!!"))
        else:
            my_list.remove(my_list[len(my_list) - 1])
            print('%-17s %-30s %-10s  %-10s %-15s ' % (j, i, result, accuracy, temp))
    print("F1=", least, j, " The most efficient feature list =", my_list, "\n\n")

print("Mission accomplished!")
print("Total operation time: = ", time.time() - seconds, "seconds")


Feature Number    Feature           
1                 Flow IAT Min     
2                 Bwd Pkt Len Mean 
3                 Fwd Pkt Len Std  
4                 Fwd IAT Tot      
5                 Bwd Pkt Len Std  
6                 Flow Byts/s      
7                 Flow IAT Max     
8                 Flow Duration    
9                 Flow IAT Mean    
10                Flow IAT Std     
11                Fwd Pkt Len Max  
12                Flow Pkts/s      
13                Bwd Pkt Len Max  
14                Tot Bwd Pkts     
15                TotLen Fwd Pkts  
16                Tot Fwd Pkts     
17                TotLen Bwd Pkts  
18                Fwd Pkt Len Mean 
19                Bwd Pkt Len Min  
20                Fwd Pkt Len Min  




ML algorithm      Feature Name                   F1-score    Accuracy   Feature List    
Naive Bayes       Flow IAT Min                   0.2730517012608505  0.31       [1,             ------> New feature found!!! 
Naive Bayes       Bwd Pk