# Credit Card Fraud Detection – Updated Notebook
This notebook updates the XGBoost training to remove unsupported early stopping parameters.

In [None]:
# Optional: install or upgrade XGBoost if desired
# !pip install --upgrade xgboost --quiet

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, roc_auc_score
print('Imports successful')

In [None]:
df = pd.read_csv('creditcard.csv')
print(f"Initial shape: {df.shape}")
print(df['Class'].value_counts(normalize=True))

In [None]:
# Preprocessing: scale 'Amount', drop 'Time'
assert df.isnull().sum().sum() == 0, "Missing values detected!"
scaler = StandardScaler()
df['Amount'] = scaler.fit_transform(df[['Amount']])
df.drop(columns=['Time'], inplace=True)
print('Preprocessing complete')

In [None]:
X = df.drop(columns=['Class'])
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)
print(f"Train: {X_train.shape}, Test: {X_test.shape}")

In [None]:
# Handle class imbalance with SMOTE
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)
print(f"Resampled train: {X_train_res.shape}")

In [None]:
models = {
    'Logistic Regression': LogisticRegression(
        class_weight='balanced', max_iter=500, n_jobs=-1, random_state=42
    ),
    'Decision Tree': DecisionTreeClassifier(
        class_weight='balanced', random_state=42
    ),
    'Random Forest': RandomForestClassifier(
        n_estimators=50, class_weight='balanced', n_jobs=-1, random_state=42
    ),
    'XGBoost': XGBClassifier(
        n_estimators=100,
        scale_pos_weight=(len(y_train) - sum(y_train)) / sum(y_train),
        n_jobs=-1,
        use_label_encoder=False,
        eval_metric='auc',
        random_state=42
    ),
    'HistGradientBoosting': HistGradientBoostingClassifier(
        max_iter=100,
        early_stopping=True,
        random_state=42
    )
}

In [None]:
results = {}
for name, model in models.items():
    print(f"\n=== Training {name} ===")
    # uniform fit call
    model.fit(X_train_res, y_train_res)
    preds = model.predict(X_test)
    proba = model.predict_proba(X_test)[:, 1]
    report = classification_report(y_test, preds, output_dict=True)
    auc = roc_auc_score(y_test, proba)
    results[name] = {
        'precision_fraud': report['1']['precision'],
        'recall_fraud': report['1']['recall'],
        'f1_fraud': report['1']['f1-score'],
        'roc_auc': auc
    }
    print(f"{name} ROC AUC: {auc:.4f}")
    print(f" Fraud Precision: {report['1']['precision']:.3f}, Recall: {report['1']['recall']:.3f}, F1: {report['1']['f1-score']:.3f}")

In [None]:
import pandas as pd
summary_df = pd.DataFrame(results).T
print("\n=== Summary of all models ===")
print(summary_df.sort_values('roc_auc', ascending=False))