In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report
from xgboost import XGBClassifier


In [3]:
# Load dataset (use the full CSV now)
data = pd.read_csv("creditcard.csv")
data['Class'] = data['Class'].astype(int)
data = data.drop(['Time'], axis=1)
data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))


In [4]:
# Train unsupervised anomaly detector
iso = IsolationForest(n_estimators=100, contamination=0.001, random_state=42)
data['anomaly_score'] = iso.fit_predict(data.drop('Class', axis=1))

# Convert -1 (anomaly) and 1 (normal) into a new feature
data['anomaly_score'] = data['anomaly_score'].apply(lambda x: 1 if x == -1 else 0)


In [5]:
# Split features/target
X = data.drop('Class', axis=1)
y = data['Class']

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Train XGBoost model
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

# Predict & evaluate
y_pred = model.predict(X_test)
print("🧠 XGBoost with Anomaly Feature")
print(classification_report(y_test, y_pred))


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🧠 XGBoost with Anomaly Feature
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85295
           1       0.93      0.73      0.82       148

    accuracy                           1.00     85443
   macro avg       0.97      0.86      0.91     85443
weighted avg       1.00      1.00      1.00     85443

