In [1]:
import pandas as pd

df=pd.read_csv('/Users/putuu/Documents/Github_Repo/MMDT_2025_MLAI101/Khaing-Hsu-Wai/data/fraud.csv', index_col = 0)
y = df['Class'].values
df = df.iloc[:,1:]
X = df.drop(columns = 'Class').values

In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, 
                                    test_size = 0.40, 
                                    random_state=1)

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

#--------------------------------------------------
## ------------ SVM Classifier ------------------## 
#--------------------------------------------------

from sklearn.svm import SVC

## Linear Kernel  ---------------
steps = [('scaler', StandardScaler()),         
         ('svc', SVC(kernel = 'linear',
                     class_weight='balanced'))]

svcL_pipeline = Pipeline(steps)
svcL_pipeline.fit(X_train, y_train)

## Polynomial Kernel -----------------------
steps = [('scaler', StandardScaler()),         
         ('svc', SVC(kernel = 'poly', degree = 3, 
                     class_weight='balanced'))]

svcPoly_pipeline = Pipeline(steps)
svcPoly_pipeline.fit(X_train, y_train)

## RBF Kernel -----------------------
steps = [('scaler', StandardScaler()),         
         ('svc', SVC(kernel = 'rbf', gamma = 'scale',
                     class_weight='balanced'))]

svcRBF_pipeline = Pipeline(steps)
svcRBF_pipeline.fit(X_train, y_train)

0,1,2
,steps,"[('scaler', ...), ('svc', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,'balanced'


In [9]:
from sklearn.metrics import classification_report, roc_auc_score, average_precision_score
from sklearn.base import clone
import pandas as pd
import numpy as np

def eval_row(name, pipe, Xtr, ytr, Xte, yte, proba=True):
    clf = clone(pipe)
    if proba:
        clf.set_params(svc__probability=True)
    clf.fit(Xtr, ytr)
    # scores for ROC/PR; prefer decision_function if available
    scores = (clf.decision_function(Xte)
              if hasattr(clf.named_steps['svc'], 'decision_function')
              else clf.predict_proba(Xte)[:,1])
    yhat = clf.predict(Xte)
    rep = classification_report(yte, yhat, output_dict=True, zero_division=0)
    return {
        "model": name,
        "acc": rep["accuracy"],
        "prec_pos": rep["1"]["precision"],
        "recall_pos": rep["1"]["recall"],
        "f1_pos": rep["1"]["f1-score"],
        "ROC_AUC": roc_auc_score(yte, scores),
        "PR_AUC": average_precision_score(yte, scores),
        "support_pos": rep["1"]["support"]
    }

rows = []
rows.append(eval_row("Linear SVM", svcL_pipeline, X_train, y_train, X_test, y_test))
rows.append(eval_row("Poly SVM d=3", svcPoly_pipeline, X_train, y_train, X_test, y_test))
rows.append(eval_row("RBF SVM", svcRBF_pipeline, X_train, y_train, X_test, y_test))

pd.DataFrame(rows).set_index("model").round(3)


Unnamed: 0_level_0,acc,prec_pos,recall_pos,f1_pos,ROC_AUC,PR_AUC,support_pos
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Linear SVM,0.97,0.334,0.883,0.485,0.952,0.845,137.0
Poly SVM d=3,0.99,0.661,0.796,0.722,0.921,0.796,137.0
RBF SVM,0.986,0.538,0.774,0.635,0.972,0.689,137.0


In [10]:
from sklearn.metrics import classification_report, roc_auc_score, average_precision_score

clf = svcRBF_pipeline.set_params(svc__probability=True).fit(X_train, y_train)
y_pred = clf.predict(X_test)
scores = clf.decision_function(X_test)
print(classification_report(y_test, y_pred, digits=3))
print("ROC-AUC:", roc_auc_score(y_test, scores))
print("PR-AUC :", average_precision_score(y_test, scores))

              precision    recall  f1-score   support

           0      0.996     0.989     0.993      8541
           1      0.538     0.774     0.635       137

    accuracy                          0.986      8678
   macro avg      0.767     0.882     0.814      8678
weighted avg      0.989     0.986     0.987      8678

ROC-AUC: 0.972283113568985
PR-AUC : 0.6890899146219205


In [11]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Predictions
y_pred_linear = svcL_pipeline.predict(X_test)
y_pred_poly = svcPoly_pipeline.predict(X_test)
y_pred_rbf = svcRBF_pipeline.predict(X_test)

# Accuracy
print("Linear Kernel Accuracy:", accuracy_score(y_test, y_pred_linear))
print("Polynomial Kernel Accuracy:", accuracy_score(y_test, y_pred_poly))
print("RBF Kernel Accuracy:", accuracy_score(y_test, y_pred_rbf))

# Classification Report
print("\nLinear Kernel Report:\n", classification_report(y_test, y_pred_linear))
print("\nPolynomial Kernel Report:\n", classification_report(y_test, y_pred_poly))
print("\nRBF Kernel Report:\n", classification_report(y_test, y_pred_rbf))

# Confusion Matrices
print("\nLinear Kernel Confusion Matrix:\n", confusion_matrix(y_test, y_pred_linear))
print("\nPolynomial Kernel Confusion Matrix:\n", confusion_matrix(y_test, y_pred_poly))
print("\nRBF Kernel Confusion Matrix:\n", confusion_matrix(y_test, y_pred_rbf))

Linear Kernel Accuracy: 0.9703848813090574
Polynomial Kernel Accuracy: 0.9903203503111316
RBF Kernel Accuracy: 0.9859414611661673

Linear Kernel Report:
               precision    recall  f1-score   support

           0       1.00      0.97      0.98      8541
           1       0.33      0.88      0.48       137

    accuracy                           0.97      8678
   macro avg       0.67      0.93      0.73      8678
weighted avg       0.99      0.97      0.98      8678


Polynomial Kernel Report:
               precision    recall  f1-score   support

           0       1.00      0.99      1.00      8541
           1       0.66      0.80      0.72       137

    accuracy                           0.99      8678
   macro avg       0.83      0.89      0.86      8678
weighted avg       0.99      0.99      0.99      8678


RBF Kernel Report:
               precision    recall  f1-score   support

           0       1.00      0.99      0.99      8541
           1       0.54      0.77 