In [1]:
import pandas as pd
import os
import joblib
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# List of attack types
attack_types = ["Bot", "DDoS", "DoS GoldenEye", "DoS Hulk", "DoS Slowhttptest", "DoS slowloris", "FTP-Patator",
                "PortScan", "SSH-Patator"]
benign_type = "BENIGN"

# Ensure models directory exists
models_dir = os.path.join("attacks", "models")
if not os.path.exists(models_dir):
    os.makedirs(models_dir)

# Initialize an empty list to store results as dictionaries
results = []

# Loop through each attack type
for attack_type in attack_types:
    # Read the feature importance file
    importance_file = os.path.join("attacks", f"{attack_type}_feature_importance.csv")
    try:
        importance_data = pd.read_csv(importance_file)
    except FileNotFoundError:
        print(f"Importance file not found for {attack_type}, skipping...")
        continue
    
    # Select the first 4 features
    # Note: original code selected [:3] which is first 3 features. Keeping as is.
    selected_features = importance_data['Feature'][:3].tolist()
    
    # Read the data file
    data_file = os.path.join("attacks", f"{attack_type}_vs_{benign_type}.csv")
    try:
        data = pd.read_csv(data_file)
    except FileNotFoundError:
        print(f"Data file not found for {attack_type}, skipping...")
        continue
    
    # Select the selected features and the target column
    # Handle potential space in Label column
    label_col = ' Label' if ' Label' in data.columns else 'Label'
    
    if label_col not in data.columns:
        print(f"Label column not found for {attack_type}, skipping...")
        continue

    selected_data = data[selected_features + [label_col]]
    
    # Split the data into features (X) and target (y)
    X = selected_data[selected_features]
    y = selected_data[label_col]
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)
    
    # Train Naive Bayes
    nb_model = GaussianNB()
    nb_model.fit(X_train, y_train)
    joblib.dump(nb_model, os.path.join(models_dir, f"{attack_type}_NB.pkl"))
    
    # Train Quadratic Discriminant Analysis
    qda_model = QuadraticDiscriminantAnalysis()
    qda_model.fit(X_train, y_train)
    joblib.dump(qda_model, os.path.join(models_dir, f"{attack_type}_QDA.pkl"))
    
    # Train Multi-Layer Perceptron
    mlp_model = MLPClassifier(random_state=42, max_iter=1000, learning_rate_init=0.001)
    mlp_model.fit(X_train, y_train)
    joblib.dump(mlp_model, os.path.join(models_dir, f"{attack_type}_MLP.pkl"))
    
    # Predictions
    nb_preds = nb_model.predict(X_test)
    qda_preds = qda_model.predict(X_test)
    mlp_preds = mlp_model.predict(X_test)
    
    # Calculate accuracies
    nb_accuracy = accuracy_score(y_test, nb_preds)
    qda_accuracy = accuracy_score(y_test, qda_preds)
    mlp_accuracy = accuracy_score(y_test, mlp_preds)
    
    # Print detailed metrics for this attack type
    print(f"\n--- Results for {attack_type} ---")
    print(f"Naive Bayes Accuracy: {nb_accuracy:.4f}")
    print(f"QDA Accuracy: {qda_accuracy:.4f}")
    print(f"MLP Accuracy: {mlp_accuracy:.4f}")
    
    print("\nNaive Bayes Classification Report:")
    print(classification_report(y_test, nb_preds))
    print("Confusion Matrix:\n", confusion_matrix(y_test, nb_preds))
    
    # Store the results as a dictionary
    result_dict = {
        'Attack Type': attack_type,
        'Naive Bayes Accuracy': nb_accuracy,
        'QDA Accuracy': qda_accuracy,
        'MLP Accuracy': mlp_accuracy
    }
    
    # Append the dictionary to the results list
    results.append(result_dict)

# Create a Pandas DataFrame from the results list 
results_df = pd.DataFrame(results)

# Display the DataFrame
print(results_df)

# Save results to CSV
results_df.to_csv(os.path.join("attacks", "results_1.csv"), index=False)
print(f"Models saved to {models_dir}")
