In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.utils import resample

In [3]:
# 1. Load data
df = pd.read_csv('../data/fraud_mock.csv')

# 2. Drop rule-based flag and identifier columns
df = df.drop(columns=['is_flagged_fraud', 'src_acc', 'dst_acc'])
print(f"Original fraud rate: {df['is_fraud'].mean():.4f}")

# 3. Split into features/target
X = df.drop(columns=['is_fraud'])
y = df['is_fraud']

# 4. Train/test split (stratified)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

Original fraud rate: 0.0013


In [None]:
# 5. Manual resampling:
#   - Oversample fraud cases to 10% of non-fraud
#   - Undersample non-fraud to match oversampled fraud count (1:1 ratio)
train = pd.concat([X_train, y_train], axis=1)
fraud = train[train.is_fraud == 1]
nonfraud = train[train.is_fraud == 0]

# Oversample fraud
target_fraud_n = int(len(nonfraud) * 0.1)
fraud_over = resample(
    fraud,
    replace=True,
    n_samples=target_fraud_n,
    random_state=42
)
print(f"Fraud oversampled to: {len(fraud_over)} samples")

# Undersample non-fraud to match oversampled fraud count
nonfraud_under = resample(
    nonfraud,
    replace=False,
    n_samples=len(fraud_over),
    random_state=42
)
resampled = pd.concat([nonfraud_under, fraud_over])
X_res = resampled.drop(columns=['is_fraud'])
y_res = resampled['is_fraud']
print(f"Resampled distribution: fraud={sum(y_res==1)}, nonfraud={sum(y_res==0)}")

Fraud oversampled to: 508352 samples
Resampled distribution: fraud=508352, nonfraud=508352


In [17]:
# 6. Preprocessing pipeline
numeric_features = ['time_ind', 'amount', 'src_bal', 'src_new_bal', 'dst_bal', 'dst_new_bal']
categorical_feats = ['transac_type']
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_feats)
    ],
    remainder='drop',
    force_int_remainder_cols=True
)

In [18]:
# 7. Model pipeline and hyperparameter search
model_pipeline = Pipeline([
    ('prep', preprocessor),
    ('clf', RandomForestClassifier(random_state=42, class_weight='balanced'))
])
param_grid = {
    'clf__n_estimators': [100, 200],
    'clf__max_depth': [None, 10, 20]
}
grid = GridSearchCV(
    model_pipeline,
    param_grid,
    cv=3,
    scoring='roc_auc',
    n_jobs=-1,
    verbose=1
)
grid.fit(X_res, y_res)
print(f"Best parameters: {grid.best_params_}")
print(f"CV ROC AUC (resampled): {grid.best_score_:.4f}")

Fitting 3 folds for each of 6 candidates, totalling 18 fits


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-

Best parameters: {'clf__max_depth': None, 'clf__n_estimators': 100}
CV ROC AUC (resampled): 1.0000


In [19]:
# 8. Evaluate on test set
best_model = grid.best_estimator_
y_pred = best_model.predict(X_test)
y_proba = best_model.predict_proba(X_test)[:,1]

print("\n=== Test Set Evaluation ===")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print(f"Test ROC AUC: {roc_auc_score(y_test, y_proba):.4f}")

# 9. Interpretation
print("\nInterpretation:")
print("- False Positives: legitimate transactions flagged as fraud => customer friction, costs.")
print("- False Negatives: fraud missed => financial loss, reputational damage.")


=== Test Set Evaluation ===
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   1270881
           1       0.70      0.93      0.80      1643

    accuracy                           1.00   1272524
   macro avg       0.85      0.97      0.90   1272524
weighted avg       1.00      1.00      1.00   1272524

Confusion Matrix:
 [[1270209     672]
 [    110    1533]]
Test ROC AUC: 0.9992

Interpretation:
- False Positives: legitimate transactions flagged as fraud => customer friction, costs.
- False Negatives: fraud missed => financial loss, reputational damage.


In [20]:
import joblib
MODEL_PATH = 'fraud_detection_rf_model.joblib'
joblib.dump({'model': grid.best_estimator_, 'X_test': X_test, 'y_test': y_test}, MODEL_PATH)
print(f"Model saved to {MODEL_PATH}")

Model saved to fraud_detection_rf_model.joblib


In [21]:
data = joblib.load(MODEL_PATH)
model, X_test, y_test = data['model'], data['X_test'], data['y_test']
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:,1]
print("\n=== Test Set Evaluation ===")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print(f"ROC AUC: {roc_auc_score(y_test, y_proba):.4f}")
print("\nInterpretation:")
print("- False Positives: legitimate transactions flagged as fraud => customer friction.")
print("- False Negatives: fraud missed => financial loss.")


=== Test Set Evaluation ===
              precision    recall  f1-score   support

           0       1.00      1.00      1.00   1270881
           1       0.70      0.93      0.80      1643

    accuracy                           1.00   1272524
   macro avg       0.85      0.97      0.90   1272524
weighted avg       1.00      1.00      1.00   1272524

Confusion Matrix:
 [[1270209     672]
 [    110    1533]]
ROC AUC: 0.9992

Interpretation:
- False Positives: legitimate transactions flagged as fraud => customer friction.
- False Negatives: fraud missed => financial loss.
