In [1]:
!pip install xgboost kagglehub --quiet

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import xgboost as xgb
import kagglehub

In [3]:
path = kagglehub.dataset_download("mlg-ulb/creditcardfraud")
data = pd.read_csv(f"{path}/creditcard.csv")
data['Class'] = data['Class'].astype(int)

data = data.drop(['Time'], axis=1)
data['Amount'] = StandardScaler().fit_transform(data['Amount'].values.reshape(-1, 1))

X = np.asarray(data.drop(columns=['Class']))
y = np.asarray(data['Class'])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

In [4]:
iso_forest = IsolationForest(contamination=0.0017, random_state=42)
iso_forest.fit(X_train)

anomaly_train = iso_forest.predict(X_train)
anomaly_test = iso_forest.predict(X_test)

X_train_aug = np.hstack((X_train, anomaly_train.reshape(-1, 1)))
X_test_aug = np.hstack((X_test, anomaly_test.reshape(-1, 1)))

In [5]:
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb_model.fit(X_train_aug, y_train)

y_pred = xgb_model.predict(X_test_aug)

def evaluation(y_true, y_pred, model_name="Model"):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    print(f'\n{model_name} Evaluation:')
    print('=' * 45)
    print('         Accuracy:', accuracy)
    print('  Precision Score:', precision)
    print('     Recall Score:', recall)
    print('         F1 Score:', f1)
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))

evaluation(y_test, y_pred, model_name="Isolation + XGBoost")

Parameters: { "use_label_encoder" } are not used.




Isolation + XGBoost Evaluation:
         Accuracy: 0.9994850368081645
  Precision Score: 0.9333333333333333
     Recall Score: 0.7567567567567568
         F1 Score: 0.835820895522388

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     85295
           1       0.93      0.76      0.84       148

    accuracy                           1.00     85443
   macro avg       0.97      0.88      0.92     85443
weighted avg       1.00      1.00      1.00     85443

