In [None]:
# ==========================
# Credit Card Fraud Detection - Clean Pipeline
# ==========================

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.combine import SMOTEENN
from imblearn.pipeline import Pipeline

# ---------- Configuration ----------
DATA_PATH = r"C:\Users\Dragon\Downloads\archive (2)\creditcard.csv"
  # Your dataset path
NUM_CLASSES = 2                          # Change to 3 for multi-class
RANDOM_STATE = 42

# ---------- Load & Clean ----------
df = pd.read_csv(DATA_PATH)
df.drop_duplicates(inplace=True)
df.fillna(df.median(numeric_only=True), inplace=True)

# ---------- Target (2 or 3 classes) ----------
if NUM_CLASSES == 2:
    y = df["Class"].values
else:
    legit = df[df["Class"] == 0]
    thr = legit["Amount"].quantile(0.75)  # split legitimate by amount
    y = np.where(df["Class"] == 1, 1, np.where(df["Amount"] > thr, 2, 0))

X = df.drop("Class", axis=1)

# ---------- Train/Test Split ----------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=RANDOM_STATE
)

# ---------- Models & Balancing Methods ----------
models = {
    "Logistic Regression": LogisticRegression(max_iter=2000, random_state=RANDOM_STATE),
    "Random Forest": RandomForestClassifier(n_estimators=300, random_state=RANDOM_STATE)
}

balancers = {
    "No Balancing": None,
    "Random Under-Sampling": RandomUnderSampler(random_state=RANDOM_STATE),
    "Random Over-Sampling": RandomOverSampler(random_state=RANDOM_STATE),
    "SMOTE": SMOTE(random_state=RANDOM_STATE),
    "SMOTEENN": SMOTEENN(random_state=RANDOM_STATE)
}

# ---------- Run Experiments ----------
for bal_name, balancer in balancers.items():
    for model_name, model in models.items():
        steps = [("scaler", StandardScaler())]
        if balancer: steps.append(("balancer", balancer))
        steps.append(("model", model))

        pipe = Pipeline(steps)
        pipe.fit(X_train, y_train)
        y_pred = pipe.predict(X_test)

        print("="*60)
        print(f"[{model_name}] with [{bal_name}]")
        print("-"*60)
        print(classification_report(y_test, y_pred, zero_division=0))
        print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


[Logistic Regression] with [No Balancing]
------------------------------------------------------------
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56651
           1       0.85      0.58      0.69        95

    accuracy                           1.00     56746
   macro avg       0.92      0.79      0.84     56746
weighted avg       1.00      1.00      1.00     56746

Confusion Matrix:
 [[56641    10]
 [   40    55]]
[Random Forest] with [No Balancing]
------------------------------------------------------------
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56651
           1       0.97      0.74      0.84        95

    accuracy                           1.00     56746
   macro avg       0.99      0.87      0.92     56746
weighted avg       1.00      1.00      1.00     56746

Confusion Matrix:
 [[56649     2]
 [   25    70]]
[Logistic Regression] with [Random Under-Sampl