In [None]:
# Import necessary libraries
import pandas as pd
import joblib
from sklearn.svm import SVC 
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc



# Load the preprocessed data
from data_reprocessing import test_to_df
dataset = 'new_test.csv'
test_data = test_to_df(dataset)
test_data = dataset
# Display the first few rows of the dataset to verify the preprocessing
print(test_data.head())



In [16]:
# Separate features and labels from the test data
X_test = test_data.drop(columns=['is_attributed'])  # Features
y_test = test_data['is_attributed']  # True labels

# Standardize the features (SVM works better with scaled data)
scaler = StandardScaler()
X_test_scaled = scaler.fit_transform(X_test)


In [None]:
# Load the trained SVM model from the joblib file
svm_model = joblib.load('svm_model.joblib')
print("Loaded trained SVM model from 'svm_model.joblib'.")

In [None]:
# Make predictions on the test set using the reconstructed SVM model
y_test_pred_class = svm_model.predict(X_test_scaled)

# Calculate AUC-ROC using decision function scores (for SVMs)
y_test_decision_scores = svm_model.decision_function(X_test_scaled)
auc_roc = roc_auc_score(y_test, y_test_decision_scores)
print(f"Test Set AUC-ROC: {auc_roc:.4f}")



In [None]:
# Get decision scores from the SVM model
y_test_decision_scores = svm_model.decision_function(X_test_scaled)


# Calculate the ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_test_decision_scores)
roc_auc = auc(fpr, tpr)

# Plot the ROC curve
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) for SVM')
plt.legend(loc='lower right')
plt.show()


In [None]:
# Calculate performance metrics on the test set
accuracy = accuracy_score(y_test, y_test_pred_class)
precision = precision_score(y_test, y_test_pred_class, pos_label=0)  # 0 = fraudulent
recall = recall_score(y_test, y_test_pred_class, pos_label=0)  # 0 = fraudulent
f1 = f1_score(y_test, y_test_pred_class, pos_label=0)  # 0 = fraudulent

# Display evaluation results
print(f"Test Set Accuracy: {accuracy:.4f}")
print(f"Test Set Precision (fraud detection): {precision:.4f}")
print(f"Test Set Recall (fraud detection): {recall:.4f}")
print(f"Test Set F1-Score (fraud detection): {f1:.4f}")
print(f"Test Set AUC-ROC: {auc_roc:.4f}")


In [None]:
# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_test_pred_class)

# Plot the confusion matrix
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Fraud (0)', 'Non-Fraud (1)'])
disp.plot(cmap='Blues')
plt.title('Confusion Matrix on Test Set')
plt.show()
