In [2]:


import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import IsolationForest

# 讀取資料
data = pd.read_csv("https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv")

# 預處理
data = data.drop(['Time'], axis=1)
data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))

X = data.drop(['Class'], axis=1)
y = data['Class']

# 資料分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# 使用 SMOTE 平衡訓練資料
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

# 隨機森林模型
rf = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    class_weight='balanced',
    random_state=42
)
rf.fit(X_train_res, y_train_res)
y_pred_rf = rf.predict(X_test)

print("=== Random Forest 評估結果 ===")
print(classification_report(y_test, y_pred_rf, digits=4))

# === 非監督式學習改用 Isolation Forest ===
iso = IsolationForest(contamination=0.002, random_state=42)
iso.fit(X_train[y_train == 0])  # 只用正常樣本訓練

# IsolationForest 預測測試集
y_pred_iso = iso.predict(X_test)
y_pred_iso = np.where(y_pred_iso == -1, 1, 0)  # -1 = anomaly

print("=== Isolation Forest 評估結果 ===")
print(classification_report(y_test, y_pred_iso, digits=4))


=== Random Forest 評估結果 ===
              precision    recall  f1-score   support

           0     0.9997    0.9983    0.9990     85295
           1     0.4481    0.8176    0.5789       148

    accuracy                         0.9979     85443
   macro avg     0.7239    0.9079    0.7890     85443
weighted avg     0.9987    0.9979    0.9982     85443

=== Isolation Forest 評估結果 ===
              precision    recall  f1-score   support

           0     0.9987    0.9981    0.9984     85295
           1     0.1912    0.2635    0.2216       148

    accuracy                         0.9968     85443
   macro avg     0.5949    0.6308    0.6100     85443
weighted avg     0.9973    0.9968    0.9970     85443

