# Credit Card Fraud Detection: End-to-End (Minimal)

This notebook performs an end-to-end workflow on the classic creditcard.csv dataset, including:
- Data loading and quick EDA
- Minimal feature engineering
- Statistical significance tests
- Modeling with class imbalance handling (class weights vs SMOTE) and light tuning
- Evaluation with ROC/PR curves, confusion matrix, and cost metric
- SHAP-based explainability (global and local)

Note: This is a minimal version designed to run quickly.

In [None]:
# Setup and Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, roc_curve, precision_recall_curve, confusion_matrix, average_precision_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils.class_weight import compute_class_weight
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
import shap
import warnings
warnings.filterwarnings('ignore')
print('Imports ready')


In [None]:
# Load Dataset (expects creditcard.csv in the same folder)
df = pd.read_csv('creditcard.csv')
print('Rows, Cols: ' + str(df.shape))
print(df.head())


In [None]:
# Minimal Feature Engineering: add hour_of_day and day_of_week from Time as proxies
# The dataset's Time is seconds from first transaction; we derive simple cyclical proxies
df['hour_of_day'] = ((df['Time'] // 3600) % 24).astype(int)
df['day_of_week'] = ((df['Time'] // 86400) % 7).astype(int)
print(df[['Time','hour_of_day','day_of_week']].head())


In [None]:
# Statistical Tests: Compare Amount and hour_of_day by Class
from scipy.stats import ttest_ind, mannwhitneyu, chi2_contingency, kruskal
fraud = df[df['Class'] == 1]
nonfraud = df[df['Class'] == 0]
tstat, tp = ttest_ind(fraud['Amount'], nonfraud['Amount'], equal_var=False)
u, up = mannwhitneyu(fraud['Amount'], nonfraud['Amount'], alternative='two-sided')
ct = pd.crosstab(df['hour_of_day'], df['Class'])
chi2, chip, dof, exp = chi2_contingency(ct)
kw, kwp = kruskal(df[df['Class']==0]['hour_of_day'], df[df['Class']==1]['hour_of_day'])
print('Welch t-test p: ' + str(tp))
print('Mann-Whitney p: ' + str(up))
print('Chi-square p: ' + str(chip))
print('Kruskal-Wallis p: ' + str(kwp))


In [None]:
# Train/Test split (stratified)
feature_cols = [c for c in df.columns if c != 'Class']
X = df[feature_cols]
y = df['Class'].astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
print('Train/Test shapes: ' + str(X_train.shape) + ' ' + str(X_test.shape))


In [None]:
# Build models and light GridSearchCV using average_precision as scoring
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
class_weights = compute_class_weight(class_weight='balanced', classes=np.array([0,1]), y=y_train)
cw = {0: class_weights[0], 1: class_weights[1]}
pipe_logit_w = Pipeline([('scaler', StandardScaler()), ('clf', LogisticRegression(max_iter=200, solver='lbfgs', class_weight=cw, n_jobs=1))])
pipe_rf_w = Pipeline([('clf', RandomForestClassifier(class_weight=cw, n_estimators=200, random_state=42, n_jobs=-1))])
pipe_logit_sm = ImbPipeline([('scaler', StandardScaler()), ('smote', SMOTE(random_state=42)), ('clf', LogisticRegression(max_iter=200, solver='lbfgs', n_jobs=1))])
pipe_rf_sm = ImbPipeline([('smote', SMOTE(random_state=42)), ('clf', RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1))])
param_logit = {'clf__C': [0.1, 1.0, 10.0]}
param_rf = {'clf__n_estimators': [200, 400], 'clf__max_depth': [None, 10], 'clf__min_samples_split': [2, 5]}
from sklearn.metrics import make_scorer
scoring = 'average_precision'
def run_gs(name, est, grid):
    gs = GridSearchCV(est, grid, cv=cv, scoring=scoring, n_jobs=-1)
    gs.fit(X_train, y_train)
    print(name + ' best params: ' + str(gs.best_params_))
    return gs.best_estimator_
best_logit_w = run_gs('LogReg_ClassWeight', pipe_logit_w, param_logit)
best_rf_w = run_gs('RF_ClassWeight', pipe_rf_w, param_rf)
best_logit_sm = run_gs('LogReg_SMOTE', pipe_logit_sm, param_logit)
best_rf_sm = run_gs('RF_SMOTE', pipe_rf_sm, param_rf)


In [None]:
# Evaluate models with multiple metrics and plots
def evaluate(name, model, X_test, y_test):
    proba = model.predict_proba(X_test)[:,1]
    preds = (proba >= 0.5).astype(int)
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, preds, average='binary', zero_division=0)
    rocauc = roc_auc_score(y_test, proba)
    ap = average_precision_score(y_test, proba)
    cm = confusion_matrix(y_test, preds)
    FP = int(cm[0,1]); FN = int(cm[1,0])
    cost = 1 * FP + 10 * FN
    print(name + ' metrics:')
    print('Precision: ' + str(round(precision,4)))
    print('Recall: ' + str(round(recall,4)))
    print('F1: ' + str(round(f1,4)))
    print('ROC-AUC: ' + str(round(rocauc,4)))
    print('PR-AUC (AP): ' + str(round(ap,4)))
    print('Confusion Matrix:')
    print(cm)
    print('Cost (FP=1, FN=10): ' + str(cost))
    fpr, tpr, _ = roc_curve(y_test, proba)
    prec_c, rec_c, _ = precision_recall_curve(y_test, proba)
    plt.style.use('default')
    plt.rcParams['font.family'] = ['Inter']
    plt.rcParams['font.sans-serif'] = ['Inter']
    fig, ax = plt.subplots(figsize=(12,8))
    plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)
    ax.set_axisbelow(True)
    for spine in ['top','right']:
        ax.spines[spine].set_visible(False)
    ax.spines['left'].set_color('#171717')
    ax.spines['bottom'].set_color('#171717')
    ax.grid(axis='y', color='#F3F4F6')
    ax.plot(fpr, tpr, color='#2563EB', label=name + ' ROC')
    ax.plot([0,1],[0,1], color='#E5E7EB', linestyle='--', label='Random')
    ax.set_title('ROC Curve - ' + name, fontsize=20, color='#171717', pad=15)
    ax.set_xlabel('False Positive Rate', fontsize=16, color='#171717', labelpad=10)
    ax.set_ylabel('True Positive Rate', fontsize=16, color='#171717', labelpad=10)
    ax.tick_params(axis='both', labelsize=14, colors='#171717')
    ax.legend()
    plt.show()
    fig, ax = plt.subplots(figsize=(12,8))
    plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)
    ax.set_axisbelow(True)
    for spine in ['top','right']:
        ax.spines[spine].set_visible(False)
    ax.spines['left'].set_color('#171717')
    ax.spines['bottom'].set_color('#171717')
    ax.grid(axis='y', color='#F3F4F6')
    ax.plot(rec_c, prec_c, color='#24EB84', label=name + ' PR')
    ax.set_title('Precision-Recall Curve - ' + name, fontsize=20, color='#171717', pad=15)
    ax.set_xlabel('Recall', fontsize=16, color='#171717', labelpad=10)
    ax.set_ylabel('Precision', fontsize=16, color='#171717', labelpad=10)
    ax.tick_params(axis='both', labelsize=14, colors='#171717')
    ax.legend()
    plt.show()
    return {'model': name, 'precision': precision, 'recall': recall, 'f1': f1, 'roc_auc': rocauc, 'avg_precision': ap, 'cost_FP1_FN10': cost}
results = []
results.append(evaluate('LogReg_ClassWeight', best_logit_w, X_test, y_test))
results.append(evaluate('RF_ClassWeight', best_rf_w, X_test, y_test))
results.append(evaluate('LogReg_SMOTE', best_logit_sm, X_test, y_test))
results.append(evaluate('RF_SMOTE', best_rf_sm, X_test, y_test))
results_df = pd.DataFrame(results).sort_values(['avg_precision','roc_auc','f1'], ascending=False)
print('Results summary:')
print(results_df.head())


In [None]:
# SHAP explainability using the best model from results_df
best_name = results_df.iloc[0]['model']
print('Best model for SHAP: ' + str(best_name))
model_map = {
    'LogReg_ClassWeight': best_logit_w,
    'RF_ClassWeight': best_rf_w,
    'LogReg_SMOTE': best_logit_sm,
    'RF_SMOTE': best_rf_sm
}
best_model = model_map[best_name]
# For tree vs non-tree decide SHAP path
clf = best_model.named_steps['clf'] if 'named_steps' in dir(best_model) else best_model.steps[-1][1]
is_tree = ('RandomForestClassifier' in str(type(clf)))
try:
    if is_tree:
        explainer = shap.TreeExplainer(clf)
        shap_values = explainer.shap_values(X_test)
        print('Computed Tree SHAP values')
    else:
        explainer = shap.Explainer(best_model.predict_proba, X_train, feature_names=X_train.columns.tolist())
        shap_values = explainer(X_test)
        print('Computed model-agnostic SHAP values')
except Exception as e:
    explainer = shap.Explainer(best_model.predict_proba, X_train, feature_names=X_train.columns.tolist())
    shap_values = explainer(X_test)
    print('Fallback to model-agnostic SHAP values')
# Global importance (bar)
plt.figure(figsize=(12,8))
shap.summary_plot(shap_values if is_tree else shap_values[:,1], X_test, plot_type='bar', show=False)
plt.title('Global Feature Importance (SHAP) - ' + str(best_name))
plt.show()
# Beeswarm for positive class
plt.figure(figsize=(12,8))
shap.summary_plot(shap_values[1] if is_tree else shap_values[:,1], X_test, show=False)
plt.title('SHAP Summary (Fraud Class) - ' + str(best_name))
plt.show()
# Local explanations for a few fraud samples
fraud_idx = y_test[y_test == 1].index[:3]
for i in fraud_idx:
    row_pos = X_test.index.get_loc(i)
    if is_tree:
        vals = shap_values[1][row_pos]
        base = explainer.expected_value[1]
    else:
        vals = shap_values[row_pos,1].values
        base = explainer.expected_value[1]
    exp = shap.Explanation(values=vals, base_values=base, data=X_test.loc[i].values, feature_names=X_test.columns.tolist())
    shap.plots.waterfall(exp, max_display=15, show=False)
    plt.title('Local SHAP Waterfall - index ' + str(i))
    plt.show()
print('SHAP global and local explanations generated.')


In [None]:
# Save results summary
results_df.to_csv('model_results_summary_minimal.csv', index=False)
print('Saved model_results_summary_minimal.csv')
