In [1]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import csv
import time
import warnings

# Uyarıları kapat
warnings.filterwarnings("ignore")

# Sonuç dosyası ve klasörlerin tanımlanması
result = "./results/results_Final.csv"
csv_files = ["all_data.csv"]
path = ""
repetition = 10

# Klasör oluşturma fonksiyonu
def folder(f_name):
    try:
        if not os.path.exists(f_name):
            os.makedirs(f_name)
    except OSError:
        print("The folder could not be created!")

# Sonuçların ve grafiklerin kaydedileceği klasörler
folder_name = "./results/"
folder(folder_name)
folder_name = "./results/result_graph_Final/"
folder(folder_name)

# Algoritma listesi
ml_list = {
    "Naive Bayes": GaussianNB(),
    "QDA": QDA(),
    "MLP": MLPClassifier(hidden_layer_sizes=(13, 13, 13), max_iter=500),
    "Random Forest": RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    "ID3": DecisionTreeClassifier(max_depth=5, criterion="entropy"),
    "AdaBoost": AdaBoostClassifier(),
    "Nearest Neighbors": KNeighborsClassifier(3)
}

# Özellikler listesi
usecols = [
    "Bwd Packet Length Std", "Flow Bytes/s", "Total Length of Fwd Packets", "Fwd Packet Length Std",
    "Flow IAT Std", "Flow IAT Min", "Fwd IAT Total", "Flow Duration", "Bwd Packet Length Max",
    "Flow IAT Max", "Flow IAT Mean", "Total Length of Bwd Packets", "Fwd Packet Length Min",
    "Bwd Packet Length Mean", "Flow Packets/s", "Fwd Packet Length Mean", "Total Backward Packets",
    "Total Fwd Packets", "Fwd Packet Length Max", "Bwd Packet Length Min", "Label"
]

# Algoritma bazlı özellikler
others = ["Bwd Packet Length Std", "Flow Bytes/s", "Total Length of Fwd Packets", "Fwd Packet Length Std",
          "Flow IAT Std", "Flow IAT Min", "Fwd IAT Total"]

algorithms_features = {
    "Naive Bayes": ['Bwd Packet Length Std', 'Total Length of Fwd Packets', 'Flow IAT Min', 
                    'Fwd Packet Length Min', 'Flow Packets/s', 'Fwd Packet Length Mean'],
    "QDA": ['Bwd Packet Length Std', 'Flow Bytes/s', 'Total Length of Fwd Packets', 'Flow IAT Min'],
    "MLP": ['Bwd Packet Length Std', 'Flow Bytes/s', 'Total Length of Fwd Packets', 'Fwd Packet Length Std',
            'Flow IAT Min', 'Bwd Packet Length Max', 'Fwd Packet Length Min', 'Bwd Packet Length Mean',
            'Total Backward Packets', 'Total Fwd Packets', 'Fwd Packet Length Max', 'Bwd Packet Length Min'],
    "Random Forest": others,
    "ID3": others,
    "AdaBoost": others,
    "Nearest Neighbors": others
}

# Zaman damgası
seconds = time.time()

# Sonuç dosyasını oluştur
with open(result, "w", newline="", encoding="utf-8") as f:
    wrt = csv.writer(f)
    wrt.writerow(["File", "ML algorithm", "Accuracy", "Precision", "Recall", "F1-score", "Time"])

# Ana döngü
for j in csv_files:
    print(f"Processing file: {j}")
    feature_list = usecols
    df = pd.read_csv(path + j, usecols=feature_list).fillna(0)

    # Etiket sütununu 0 ve 1'e çevir
    df["Label"] = df["Label"].apply(lambda x: 1 if x == "BENIGN" else 0)

    y = df["Label"]
    for ii in ml_list:
        X = df[algorithms_features[ii]]
        precision, recall, f1, accuracy, t_time = [], [], [], [], []

        for _ in range(repetition):
            start_time = time.time()
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=_)
            clf = ml_list[ii]
            clf.fit(X_train, y_train)
            predict = clf.predict(X_test)

            precision.append(precision_score(y_test, predict, average='macro'))
            recall.append(recall_score(y_test, predict, average='macro'))
            f1.append(f1_score(y_test, predict, average='macro'))
            accuracy.append(clf.score(X_test, y_test))
            t_time.append(time.time() - start_time)

        # Sonuçları ekrana yazdır
        print(f"{ii}: Accuracy={np.mean(accuracy):.2f}, Precision={np.mean(precision):.2f}, Recall={np.mean(recall):.2f}, F1-score={np.mean(f1):.2f}, Time={np.mean(t_time):.4f}")

        # Sonuçları CSV'ye yaz
        with open(result, "a", newline="", encoding="utf-8") as f:
            wrt = csv.writer(f)
            wrt.writerow([j[:-4], ii, np.mean(accuracy), np.mean(precision), np.mean(recall), np.mean(f1), np.mean(t_time)])

        # Grafik oluştur ve kaydet
        plt.boxplot(f1)
        plt.title(f"All Dataset - {ii}")
        plt.ylabel('F-measure')
        plt.savefig(f"{folder_name}{j[:-4]}_{ii}.pdf", bbox_inches='tight', format='pdf')
        plt.close()

# İşlem tamamlandı
print("Mission accomplished!")
print(f"Total operation time: {time.time() - seconds:.2f} seconds")


Processing file: all_data.csv
Naive Bayes: Accuracy=0.87, Precision=0.79, Recall=0.70, F1-score=0.73, Time=1.1994
QDA: Accuracy=0.88, Precision=0.82, Recall=0.69, F1-score=0.73, Time=1.1151
MLP: Accuracy=0.90, Precision=0.89, Recall=0.73, F1-score=0.78, Time=230.3944
Random Forest: Accuracy=0.95, Precision=0.97, Recall=0.84, F1-score=0.89, Time=10.4290
ID3: Accuracy=0.95, Precision=0.93, Recall=0.89, F1-score=0.91, Time=6.0613
AdaBoost: Accuracy=0.93, Precision=0.92, Recall=0.83, F1-score=0.87, Time=76.9257
Nearest Neighbors: Accuracy=0.97, Precision=0.94, Recall=0.94, F1-score=0.94, Time=94.2227
Mission accomplished!
Total operation time: 4216.77 seconds
