In [2]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.2-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting nvidia-nccl-cu12 (from xgboost)
  Downloading nvidia_nccl_cu12-2.26.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.0 kB)
Downloading xgboost-3.0.2-py3-none-manylinux_2_28_x86_64.whl (253.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.9/253.9 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading nvidia_nccl_cu12-2.26.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (318.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.1/318.1 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nvidia-nccl-cu12, xgboost
Successfully installed nvidia-nccl-cu12-2.26.5 xgboost-3.0.2


In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score, accuracy_score
import kagglehub

RANDOM_SEED = 42
TEST_SIZE = 0.3


In [4]:
# 載入資料
path = kagglehub.dataset_download("mlg-ulb/creditcardfraud")
data = pd.read_csv(f"{path}/creditcard.csv")

# 預處理
data['Class'] = data['Class'].astype(int)
data = data.drop(['Time'], axis=1)
data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))


In [5]:
X = data.drop(columns=['Class']).values
y = data['Class'].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_SIZE, random_state=RANDOM_SEED, stratify=y)

# 僅用正常資料來訓練
X_train_normal = X_train[y_train == 0]

iso_model = IsolationForest(contamination=0.00172, random_state=RANDOM_SEED)
iso_model.fit(X_train_normal)

anomaly_scores = iso_model.decision_function(X_test)
anomaly_pred = iso_model.predict(X_test)
anomaly_pred_binary = (anomaly_pred == -1).astype(int)

print("📊 Isolation Forest 預測結果：")
print(classification_report(y_test, anomaly_pred_binary))


📊 Isolation Forest 預測結果：
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85295
           1       0.20      0.25      0.22       148

    accuracy                           1.00     85443
   macro avg       0.60      0.62      0.61     85443
weighted avg       1.00      1.00      1.00     85443



In [6]:
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=RANDOM_SEED)
xgb_model.fit(X_train, y_train)

xgb_pred = xgb_model.predict(X_test)

print("📊 XGBoost 預測結果：")
print(classification_report(y_test, xgb_pred))


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


📊 XGBoost 預測結果：
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85295
           1       0.93      0.73      0.82       148

    accuracy                           1.00     85443
   macro avg       0.97      0.86      0.91     85443
weighted avg       1.00      1.00      1.00     85443



In [7]:
# 融合預測：兩者皆判為詐欺才算詐欺
combined_pred = np.logical_and(anomaly_pred_binary == 1, xgb_pred == 1).astype(int)

print("📊 融合模型結果：")
print(classification_report(y_test, combined_pred))


📊 融合模型結果：
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85295
           1       0.94      0.23      0.37       148

    accuracy                           1.00     85443
   macro avg       0.97      0.61      0.68     85443
weighted avg       1.00      1.00      1.00     85443

