# Credit Card Fraud Detection

An end-to-end workflow for building, evaluating, and deploying an XGBoost fraud detection model.

In [None]:
import os
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix

# 1. Load data
base_path = os.path.expanduser('~/.cache/kagglehub/datasets/kartik2112/fraud-detection/versions/1')
df_train = pd.read_csv(f"{base_path}/fraudTrain.csv")
df_test  = pd.read_csv(f"{base_path}/fraudTest.csv")
print("Train shape:", df_train.shape, "Test shape:", df_test.shape)


In [None]:
# 2. Preprocessing function
from scripts.process import preprocess  # adjust path if needed
# Fit label encoder on categories for later use
le_category = LabelEncoder().fit(df_train['category'].astype(str))
joblib.dump(le_category, 'streamlit_app/le_category.joblib')

# Apply preprocessing
cat_rates = df_train.join(df_train['is_fraud']).groupby('category')['is_fraud'].mean()
df_train_prep = preprocess(df_train, cat_rates)
df_test_prep  = preprocess(df_test,  cat_rates)

X_train = df_train_prep.drop(columns=['is_fraud'])
y_train = df_train_prep['is_fraud']
X_test  = df_test_prep .drop(columns=['is_fraud'])
y_test  = df_test_prep ['is_fraud']


In [None]:
# 3. Model training
model = XGBClassifier(
    colsample_bytree=0.8,
    learning_rate=0.15,
    max_depth=10,
    n_estimators=350,
    subsample=1.0,
    objective='binary:logistic',
    eval_metric='logloss',
    use_label_encoder=False,
    scale_pos_weight=(y_train==0).sum()/(y_train==1).sum(),
    random_state=42
)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred, digits=4))


In [None]:
# 4. Save artifacts
joblib.dump(model, 'streamlit_app/final_model.joblib')
joblib.dump(cat_rates, 'streamlit_app/category_rates.joblib')

uf_names = {
    "misc_net": "Miscellaneous (Online)", "grocery_pos": "Grocery (In-Store)",
    # ... add remaining categories ...
}
joblib.dump(uf_names, 'streamlit_app/uf_names.joblib')


**End of notebook.**