In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE

# Install catboost if not already installed
!pip install catboost

# ===== 模型 =====
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier, PassiveAggressiveClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, BernoulliNB
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.neural_network import MLPClassifier

import torch
import torch.nn as nn
import torch.optim as optim


# =============== Step 1. 讀取資料 ===============
df = pd.read_csv("creditcard.csv")
X = df.drop("Class", axis=1)
y = df["Class"]

# =============== Step 2. Train/Test Split ===============
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# =============== Step 3. SMOTE ===============
smote = SMOTE(random_state=42)
X_train_bal, y_train_bal = smote.fit_resample(X_train, y_train)


# =============== Step 4. 模型清單 ===============
models = {
    "Logistic Regression": LogisticRegression(max_iter=500),
    "Ridge Classifier": RidgeClassifier(),
    "SGD Classifier": SGDClassifier(loss="log_loss"),
    "Passive Aggressive": PassiveAggressiveClassifier(),

    "Linear SVM": LinearSVC(),
    "RBF SVM": SVC(kernel="rbf", probability=True),

    "kNN": KNeighborsClassifier(),

    "Gaussian NB": GaussianNB(),
    "Bernoulli NB": BernoulliNB(),

    "Decision Tree": DecisionTreeClassifier(),
    "Extra Tree": ExtraTreeClassifier(),
    "Random Forest": RandomForestClassifier(),

    "Gradient Boosting": GradientBoostingClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "XGBoost": XGBClassifier(eval_metric="logloss"),
    "LightGBM": LGBMClassifier(),
    "CatBoost": CatBoostClassifier(verbose=0),

    "MLPClassifier": MLPClassifier(max_iter=500)
}


# =============== Step 5. 訓練 & 評估 ===============
results = []

for name, model in models.items():
    clf = Pipeline([
        ("scaler", StandardScaler()),
        ("model", model)
    ])

    clf.fit(X_train_bal, y_train_bal)

    y_pred = clf.predict(X_test)

    try:
        y_prob = clf.predict_proba(X_test)[:, 1]
    except:
        y_prob = clf.decision_function(X_test)

    auc = roc_auc_score(y_test, y_prob)

    print("="*50)
    print(name)
    print("ROC-AUC:", auc)
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))

    results.append([name, auc])

# 結果表
result_df = pd.DataFrame(results, columns=["Model", "ROC-AUC"]).sort_values(by="ROC-AUC", ascending=False)
print(result_df)

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8
Logistic Regression
ROC-AUC: 0.9764816590676788
[[56296   568]
 [   10    88]]
              precision    recall  f1-score   support

           0       1.00      0.99      0.99     56864
           1       0.13      0.90      0.23        98

    accuracy                           0.99     56962
   macro avg       0.57      0.94      0.61     56962
weighted avg       1.00      0.99      0.99     56962

Ridge Classifier
ROC-AUC: 0.9592181273184569
[[56173   691]
 [   18    80]]
              precision    recall  f1-score   support

           0       1.00      0.99      0.99     56864
           1       0.10     



LightGBM
ROC-AUC: 0.955624788252386
[[56809    55]
 [   15    83]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.60      0.85      0.70        98

    accuracy                           1.00     56962
   macro avg       0.80      0.92      0.85     56962
weighted avg       1.00      1.00      1.00     56962

CatBoost
ROC-AUC: 0.9738961848104464
[[56805    59]
 [   13    85]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.59      0.87      0.70        98

    accuracy                           1.00     56962
   macro avg       0.80      0.93      0.85     56962
weighted avg       1.00      1.00      1.00     56962

MLPClassifier
ROC-AUC: 0.9607324816533254
[[56832    32]
 [   21    77]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.71      