In [1]:
import pandas as pd
import numpy as np

from sklearn.svm import OneClassSVM
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    classification_report,
    confusion_matrix
)
from sklearn.preprocessing import StandardScaler


In [2]:
X = pd.read_csv("/kaggle/input/anamoly/X_features.csv")
y = pd.read_csv("/kaggle/input/anamoly/y_labels.csv").values.ravel()

X.shape, y.shape


((594643, 17), (594643,))

In [3]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled,
    y,
    test_size=0.2,
    stratify=y,
    random_state=42
)


In [5]:
ocsvm = OneClassSVM(
    kernel="rbf",
    nu=0.012,
    gamma="scale"
)

ocsvm.fit(X_train)


In [6]:
y_pred_raw = ocsvm.predict(X_test)

# Convert to fraud labels
y_pred = np.where(y_pred_raw == -1, 1, 0)


In [7]:
# Decision scores for ROC-AUC
decision_scores = -ocsvm.decision_function(X_test)


In [8]:
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

precision, recall, f1


(0.4348432055749129, 0.43333333333333335, 0.4340869565217391)

In [9]:
roc_auc = roc_auc_score(y_test, decision_scores)
roc_auc


np.float64(0.8771986928327098)

In [10]:
confusion_matrix(y_test, y_pred)


array([[116678,    811],
       [   816,    624]])

In [11]:
print(classification_report(y_test, y_pred, digits=4))



              precision    recall  f1-score   support

           0     0.9931    0.9931    0.9931    117489
           1     0.4348    0.4333    0.4341      1440

    accuracy                         0.9863    118929
   macro avg     0.7139    0.7132    0.7136    118929
weighted avg     0.9863    0.9863    0.9863    118929



In [12]:
import joblib

joblib.dump(ocsvm, "one_class_svm_model.pkl")
joblib.dump(scaler, "ocsvm_scaler.pkl")


['ocsvm_scaler.pkl']