In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import joblib

# File paths for the testing datasets
test_file_paths = [
    'network_capture/30_nmap_scan_labeled.csv',
    'network_capture/120_nmap_scan_labeled.csv',
    'network_capture/ddos_attack_labeled.csv',
    'network_capture/new_traffic_labeled.csv'
]

# Load the saved scaler, One-Class SVM model, Gradient Boosting model, and selected features
scaler = joblib.load('scaler.joblib')
gb = joblib.load('gradient_boosting_model.joblib')
selected_features = joblib.load('selected_features.joblib')  # Load selected features

# Threshold for Gradient Boosting
threshold = 0.01  # Adjust this as needed

# Load and concatenate all testing datasets
print("Loading and concatenating testing datasets...")
test_data = pd.concat([pd.read_csv(file) for file in test_file_paths])

# Renaming the columns by removing leading/trailing whitespace
test_data.columns = test_data.columns.str.strip()

# Dropping duplicates
test_data = test_data.drop_duplicates()

# Replacing any infinite values (positive or negative) with NaN
test_data = test_data.replace([np.inf, -np.inf], np.nan)

# Fill missing values for specific columns in testing data
for col in selected_features:
    if col in test_data.columns:
        # Assign back after filling missing values
        test_data[col] = test_data[col].fillna(test_data[col].median())

# Encode labels for binary classification
test_data['Label'] = test_data['Label'].apply(lambda x: 0 if x == 'BENIGN' else 1)

# Use only the selected features in the test data
X_test = test_data[selected_features].copy()
y_test = test_data['Label'].copy()

# Standardize the test data using the loaded scaler
X_test_scaled = scaler.transform(X_test)


# -----------------------------
# Gradient Boosting prediction
# -----------------------------

# Function for threshold-based predictions
def predict_with_threshold(model, X, threshold=0.5):
    # Get prediction probabilities
    probabilities = model.predict_proba(X)[:, 1]  # Probability of the positive class
    # Apply threshold to classify
    return (probabilities >= threshold).astype(int)

# Get predictions from Gradient Boosting
gb_pred_binary = predict_with_threshold(gb, X_test_scaled, threshold)

# Display classification report and confusion matrix for Gradient Boosting
print("\nClassification Report (Gradient Boosting):\n", classification_report(y_test, gb_pred_binary))
print("Confusion Matrix (Gradient Boosting):\n", confusion_matrix(y_test, gb_pred_binary))

