In [None]:
# ex2.ipynb (as .py script for compatibility)

# 匯入必要套件
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import kagglehub

# 一般設定
RANDOM_SEED = 42
TEST_SIZE = 0.3

# 下載並載入資料集
path = kagglehub.dataset_download("mlg-ulb/creditcardfraud")
data = pd.read_csv(f"{path}/creditcard.csv")
data['Class'] = data['Class'].astype(int)

# 資料預處理
data = data.drop(['Time'], axis=1)
data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))

# 分離特徵與標籤
X = data.drop(columns=['Class'])
y = data['Class']

# 資料切分
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=RANDOM_SEED, stratify=y)

# 非監督學習：Isolation Forest
iso_forest = IsolationForest(contamination=0.002, random_state=RANDOM_SEED)
iso_forest.fit(X_train)

# 計算 anomaly score 並加入作為新特徵
X_train['anomaly_score'] = iso_forest.decision_function(X_train)
X_test['anomaly_score'] = iso_forest.decision_function(X_test)

# 有監督學習：XGBoost
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=RANDOM_SEED)
xgb_model.fit(X_train, y_train)

# 預測與評估
y_pred = xgb_model.predict(X_test)

def evaluate(y_true, y_pred, model_name):
    print(f"\n{model_name} Evaluation")
    print("=" * 45)
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred))
    print("Recall:", recall_score(y_true, y_pred))
    print("F1 Score:", f1_score(y_true, y_pred))
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

evaluate(y_test, y_pred, "XGBoost + Isolation Forest")