In [1]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.metrics import f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import csv
import time
import warnings

# Uyarıları kapat
warnings.filterwarnings("ignore")

# Sonuç dosyası ve klasörlerin tanımlanması
result = "./results/results_3.csv"
csv_files = ["all_data.csv"]  # Çalıştırılacak dosya
path = ""
repetition = 10

# Klasör oluşturma
def folder(f_name):
    """Sonuçlar ve grafikler için klasör oluşturur."""
    try:
        if not os.path.exists(f_name):
            os.makedirs(f_name)
    except OSError:
        print("The folder could not be created!")

folder_name = "./results/"
folder(folder_name)
folder_name = "./results/result_graph_3/"
folder(folder_name)

# Algoritma listesi
ml_list = {
    "Naive Bayes": GaussianNB(),
    "QDA": QDA(),
    "Random Forest": RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    "ID3": DecisionTreeClassifier(max_depth=5, criterion="entropy"),
    "AdaBoost": AdaBoostClassifier(n_estimators=20),  # 20 sınıflandırıcı kullandık bellek yetersizliğinden dolayı.
    "MLP": MLPClassifier(hidden_layer_sizes=(13, 13, 13), max_iter=500),
    "Nearest Neighbors": KNeighborsClassifier(3)
}

# Özellik listesi 7 features
features = {
    "all_data": [
        "Bwd Packet Length Std", "Flow Bytes/s", "Flow IAT Min", "Flow IAT Std", "Fwd IAT Total",
        "Fwd Packet Length Std", "Total Length of Fwd Packets", "Label"
    ]
}

# Zaman damgası
seconds = time.time()

# Sonuç dosyasını oluştur
with open(result, "w", newline="", encoding="utf-8") as f:
    wrt = csv.writer(f)
    wrt.writerow(["File", "ML algorithm", "Accuracy", "Precision", "Recall", "F1-score", "Time"])

# Dosyalar üzerinde döngü
for j in csv_files:
    print(f"Processing: {j}")
    feature_list = features.get("all_data", [])
    df = pd.read_csv(path + j, usecols=feature_list).fillna(0)

    # Label sütununu 0 ve 1'e çevir
    df["Label"] = df["Label"].apply(lambda x: 1 if x == "BENIGN" else 0)

    y = df["Label"]
    X = df.drop(columns=["Label"])

    # Algoritmalar üzerinde döngü
    for ii in ml_list:
        precision, recall, f1, accuracy, t_time = [], [], [], [], []

        for _ in range(repetition):
            start_time = time.time()
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=_)
            clf = ml_list[ii]
            clf.fit(X_train, y_train)
            predict = clf.predict(X_test)

            # Performans metriklerini hesapla
            precision.append(precision_score(y_test, predict, average='macro'))
            recall.append(recall_score(y_test, predict, average='macro'))
            f1.append(f1_score(y_test, predict, average='macro'))
            accuracy.append(clf.score(X_test, y_test))
            t_time.append(time.time() - start_time)

        # Ortalama sonuçları yazdır
        print(f"{j[:-4]} - {ii}")
        print(f"Accuracy: {np.mean(accuracy):.2f}, Precision: {np.mean(precision):.2f}, Recall: {np.mean(recall):.2f}, F1-score: {np.mean(f1):.2f}, Time: {np.mean(t_time):.4f}")

        # Sonuçları CSV'ye yaz
        with open(result, "a", newline="", encoding="utf-8") as f:
            wrt = csv.writer(f)
            wrt.writerow([j[:-4], ii, np.mean(accuracy), np.mean(precision), np.mean(recall), np.mean(f1), np.mean(t_time)])

        # Grafik oluştur ve kaydet
        plt.boxplot(f1)
        plt.title(f"All Dataset - {ii}")
        plt.ylabel('F-measure')
        plt.savefig(f"{folder_name}{j[:-4]}_{ii}.pdf", bbox_inches='tight', format='pdf')
        plt.close()

# İşlem tamamlandı
print("Mission accomplished!")
print(f"Total operation time: {time.time() - seconds:.2f} seconds")


Processing: all_data.csv
all_data - Naive Bayes
Accuracy: 0.82, Precision: 0.66, Recall: 0.63, F1-score: 0.64, Time: 1.5929
all_data - QDA
Accuracy: 0.66, Precision: 0.69, Recall: 0.65, F1-score: 0.57, Time: 2.4940
all_data - Random Forest
Accuracy: 0.94, Precision: 0.96, Recall: 0.82, F1-score: 0.87, Time: 13.6944
all_data - ID3
Accuracy: 0.95, Precision: 0.93, Recall: 0.89, F1-score: 0.91, Time: 7.7542
all_data - AdaBoost
Accuracy: 0.93, Precision: 0.91, Recall: 0.83, F1-score: 0.86, Time: 35.3968
all_data - MLP
Accuracy: 0.84, Precision: 0.75, Recall: 0.53, F1-score: 0.52, Time: 74.2048
all_data - Nearest Neighbors
Accuracy: 0.97, Precision: 0.94, Recall: 0.94, F1-score: 0.94, Time: 113.1480
Mission accomplished!
Total operation time: 2493.92 seconds
