# ML Models

# Load Data

In [1]:
import joblib
import os
# Load Scaled Data
project_root = r"C:/Graduation Project/AI_Cybersecurity"
processed_path = os.path.join(project_root, "Data", "Processed")

x_train_scaled = joblib.load(os.path.join(processed_path, "train_scaled.pkl"))
x_test_scaled  = joblib.load(os.path.join(processed_path, "test_scaled.pkl"))
# Load Test Labels
y_test = joblib.load(os.path.join(processed_path, "test_labels.pkl"))

# One_Class SVM

In [2]:
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import numpy as np
import pandas as pd

In [3]:
# # One-Class SVM with Subsampling 
# sample_size = 150_000
# np.random.seed(42)
# indices = np.random.choice(x_train_scaled.shape[0], size=sample_size, replace=False)
# x_train_sub = x_train_scaled[indices]

# # Train One-Class SVM on subsample
# oc_svm = OneClassSVM(kernel='rbf', nu=0.05, gamma='scale', verbose=True)
# oc_svm.fit(x_train_sub)

# # # ===== Optional: Decision function & threshold tuning =====
# # decision_scores = oc_svm.decision_function(x_test_scaled)
# # threshold = np.percentile(decision_scores, 5)  # adjust percentile to reduce false positives

# y_pred = oc_svm.predict(x_test_scaled)
# # Map SVM output: +1 = inlier (normal), -1 = outlier (anomaly)
# y_pred_mapped = np.where(y_pred == 1, 0, 1)  # 0 = Normal, 1 = Anomaly
# # Evaluation Metrics
# cm = confusion_matrix(y_test, y_pred_mapped)
# print(f"Accuracy: {accuracy_score(y_test, y_pred_mapped)}")
# print(f"Precision: {precision_score(y_test, y_pred_mapped)}")
# print(f"Recall: {recall_score(y_test, y_pred_mapped)}")
# print(f"F1 Score: {f1_score(y_test, y_pred_mapped)}\n")
# print("Confusion Matrix:")
# print(cm)
# print("\nClassification Report:")
# print(classification_report(y_test, y_pred_mapped))

# Isolation Forest

In [4]:
# Isolation Forest Model
iso = IsolationForest(n_estimators=400, max_samples=500000, contamination=0.05, n_jobs=-1, random_state=42)
iso.fit(x_train_scaled)
y_scores = iso.decision_function(x_test_scaled)
y_pred   = iso.predict(x_test_scaled)   # 1 = inlier, -1 = outlier

# Convert IsolationForest predictions to 0/1 labels
# -1 → anomaly → 1
#  1 → normal  → 0
y_pred_converted = np.where(y_pred == -1, 1, 0)

# Print evaluation metrics
print("Accuracy:", accuracy_score(y_test, y_pred_converted))
print("Precision:", precision_score(y_test, y_pred_converted))
print("Recall:", recall_score(y_test, y_pred_converted))
print("F1 Score:", f1_score(y_test, y_pred_converted))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred_converted)
print("\nConfusion Matrix:\n", cm)

# Detailed report
print("\nClassification Report:\n", classification_report(y_test, y_pred_converted))

Accuracy: 0.9284434413978836
Precision: 0.9485838308708402
Recall: 0.9086775730861965
F1 Score: 0.9282019768117267

Confusion Matrix:
 [[388725  20918]
 [ 38785 385919]]

Classification Report:
               precision    recall  f1-score   support

         0.0       0.91      0.95      0.93    409643
         1.0       0.95      0.91      0.93    424704

    accuracy                           0.93    834347
   macro avg       0.93      0.93      0.93    834347
weighted avg       0.93      0.93      0.93    834347



In [5]:
import joblib

project_root = r"C:/Graduation Project/AI_Cybersecurity"
models_path = os.path.join(project_root, "Models")
os.makedirs(models_path, exist_ok=True)
joblib.dump(y_pred, os.path.join(models_path, "iso_preds.pkl"))

['C:/Graduation Project/AI_Cybersecurity\\Models\\iso_preds.pkl']