In [1]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import csv
import time
import warnings

# Uyarıları görmezden gel
warnings.filterwarnings("ignore")

# Sonuç dosyası ve klasörlerin tanımlanması
result = "./results/results_1.csv"
csv_files = os.listdir("attacks")  # 'attacks' klasöründeki dosyalar
path = "./attacks/"
repetition = 10  # Her algoritma için tekrar sayısı

def folder(f_name):
    """Sonuçlar ve grafikler için klasör oluşturur."""
    try:
        if not os.path.exists(f_name):
            os.makedirs(f_name)
    except OSError:
        print("The folder could not be created!")

# Klasör oluştur
folder_name = "./results/"
folder(folder_name)
folder_name = "./results/result_graph_1/"
folder(folder_name)

# Kullanılacak algoritmaların listesi
ml_list = {
    "Naive Bayes": GaussianNB(),
    "QDA": QDA(),
    "Random Forest": RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    "ID3": DecisionTreeClassifier(max_depth=5, criterion="entropy"),
    "AdaBoost": AdaBoostClassifier(),
    "MLP": MLPClassifier(hidden_layer_sizes=(13, 13, 13), max_iter=500),
    "Nearest Neighbors": KNeighborsClassifier(3)
}

# Özellik listesi
features = {
    "Bot": ["Bwd Packet Length Mean", "Flow IAT Max", "Flow Duration", "Flow IAT Min", "Label"],
    "DDoS": ["Bwd Packet Length Std", "Total Backward Packets", "Fwd IAT Total", "Flow Duration", "Label"],
    "DoS GoldenEye": ["Flow IAT Max", "Bwd Packet Length Std", "Flow IAT Min", "Total Backward Packets", "Label"],
    "DoS Hulk": ["Bwd Packet Length Std", "Fwd Packet Length Std", "Fwd Packet Length Max", "Flow IAT Min", "Label"],
    "DoS Slowhttptest": ["Flow IAT Mean", "Fwd Packet Length Min", "Bwd Packet Length Mean", "Total Length of Bwd Packets", "Label"],
    "DoS slowloris": ["Flow IAT Mean", "Total Length of Bwd Packets", "Bwd Packet Length Mean", "Total Fwd Packets", "Label"],
    "FTP-Patator": ["Fwd Packet Length Max", "Fwd Packet Length Std", "Fwd Packet Length Mean", "Bwd Packet Length Std", "Label"],
    "Heartbleed": ["Total Backward Packets", "Fwd Packet Length Max", "Flow IAT Min", "Bwd Packet Length Max", "Label"],
    "Infiltration": ["Fwd Packet Length Max", "Fwd Packet Length Mean", "Flow Duration", "Total Length of Fwd Packets", "Label"],
    "PortScan": ["Flow Bytes/s", "Total Length of Fwd Packets", "Fwd IAT Total", "Flow Duration", "Label"],
    "SSH-Patator": ["Fwd Packet Length Max", "Flow Duration", "Flow IAT Max", "Total Length of Fwd Packets", "Label"],
    "Web Attack": ["Bwd Packet Length Std", "Total Length of Fwd Packets", "Flow Bytes/s", "Flow IAT Max", "Label"]
}

# Zaman damgası
seconds = time.time()

# Sonuç dosyasını oluştur
with open(result, "w", newline="", encoding="utf-8") as f:
    wrt = csv.writer(f)
    wrt.writerow(["File", "ML algorithm", "accuracy", "Precision", "Recall", "F1-score", "Time"])

# Dosyalar üzerinde döngü
for j in csv_files:
    print(f"Processing: {j}")
    attack_name = j[:-4]  # Dosya adı
    feature_list = features.get(attack_name, [])
    df = pd.read_csv(path + j, usecols=feature_list).fillna(0)
    
    # Etiketleri (Label) dönüştür
    df["Label"] = df["Label"].apply(lambda x: 1 if x == "BENIGN" else 0)

    y = df["Label"]
    X = df.drop(columns=["Label"])

    # Algoritmalar üzerinde döngü
    a = []  # Her algoritma için sonuçlar
    for ii in ml_list:
        precision, recall, f1, accuracy, t_time = [], [], [], [], []

        for _ in range(repetition):
            second = time.time()
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=_)
            clf = ml_list[ii]
            clf.fit(X_train, y_train)
            predict = clf.predict(X_test)

            # Performans metrikleri
            precision.append(precision_score(y_test, predict, average='macro'))
            recall.append(recall_score(y_test, predict, average='macro'))
            f1.append(f1_score(y_test, predict, average='macro'))
            accuracy.append(clf.score(X_test, y_test))
            t_time.append(time.time() - second)

        # Sonuçları kaydet
        with open(result, "a", newline="", encoding="utf-8") as f:
            wrt = csv.writer(f)
            wrt.writerow([
                attack_name, ii, np.mean(accuracy), np.mean(precision),
                np.mean(recall), np.mean(f1), np.mean(t_time)
            ])
        a.append(f1)

    # Grafik oluşturma
    ml = ["Naive Bayes", "QDA", "Random Forest", "ID3", "AdaBoost", "MLP", "Nearest Neighbors"]
    fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(12, 6), sharey=True)
    temp = 0
    for c in range(2):
        for b in range(4):
            if temp < len(a):  # Algoritma sayısını kontrol et
                axes[c, b].boxplot(a[temp])
                axes[c, b].set_title(f"{attack_name} - {ml[temp]}", fontsize=7)
                axes[c, b].set_ylabel("F measure")
            temp += 1
    plt.tight_layout()
    plt.savefig(folder_name + attack_name + ".pdf", bbox_inches='tight', format='pdf')
    plt.close()

print("Mission accomplished!")
print(f"Total operation time: {time.time() - seconds} seconds")


Processing: Bot.csv
Processing: DDoS.csv
Processing: DoS GoldenEye.csv
Processing: DoS Hulk.csv
Processing: DoS Slowhttptest.csv
Processing: DoS slowloris.csv
Processing: FTP-Patator.csv
Processing: Heartbleed.csv
Processing: Infiltration.csv
Processing: PortScan.csv
Processing: SSH-Patator.csv
Processing: Web Attack.csv
Mission accomplished!
Total operation time: 1540.9092762470245 seconds
