In [6]:
import pandas as pd

X_train_scaled = pd.read_csv('../data/processed/X_train_scaled.csv')
X_test_scaled = pd.read_csv('../data/processed/X_test_scaled.csv')
y_train = pd.read_csv('../data/processed/y_train.csv').squeeze()
y_test = pd.read_csv('../data/processed/y_test.csv').squeeze()

# Option 1 - kernel

In [7]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

svm_model = SVC(kernel='rbf', C=1.0, gamma='scale') 
svm_model.fit(X_train_scaled, y_train)
y_pred = svm_model.predict(X_test_scaled)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, pos_label="malicious")
rec = recall_score(y_test, y_pred, pos_label="malicious")
f1 = f1_score(y_test, y_pred, pos_label="malicious")

print("SVM Model Performance:")
print(f"Accuracy: {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall: {rec:.4f}")
print(f"F1 Score: {f1:.4f}")

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


SVM Model Performance:
Accuracy: 0.9967
Precision: 0.9940
Recall: 0.9918
F1 Score: 0.9929

Confusion Matrix:
[[69023   125]
 [  171 20717]]

Classification Report:
              precision    recall  f1-score   support

      benign       1.00      1.00      1.00     69148
   malicious       0.99      0.99      0.99     20888

    accuracy                           1.00     90036
   macro avg       1.00      1.00      1.00     90036
weighted avg       1.00      1.00      1.00     90036



# Option 2 - Linear SVM

In [8]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

svm_model = LinearSVC(C=1.0, max_iter=5000) 
svm_model.fit(X_train_scaled, y_train)
y_pred = svm_model.predict(X_test_scaled)

acc = accuracy_score(y_test, y_pred)
prec = precision_score(y_test, y_pred, pos_label="malicious")
rec = recall_score(y_test, y_pred, pos_label="malicious")
f1 = f1_score(y_test, y_pred, pos_label="malicious")

print("SVM Model Performance:")
print(f"Accuracy: {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall: {rec:.4f}")
print(f"F1 Score: {f1:.4f}")

print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


SVM Model Performance:
Accuracy: 0.9964
Precision: 0.9930
Recall: 0.9916
F1 Score: 0.9923

Confusion Matrix:
[[69003   145]
 [  175 20713]]

Classification Report:
              precision    recall  f1-score   support

      benign       1.00      1.00      1.00     69148
   malicious       0.99      0.99      0.99     20888

    accuracy                           1.00     90036
   macro avg       1.00      0.99      1.00     90036
weighted avg       1.00      1.00      1.00     90036

