In [8]:
# fraud_model_train.py

import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# ───── 1. Load Data ─────
df = pd.read_csv("payment_fraud.csv")
df.rename(columns={'label': 'isFraud'}, inplace=True)
df.dropna(inplace=True)

# ───── 2. Define features ─────
X = df.drop('isFraud', axis=1)
y = df['isFraud']

# Column types
categorical_cols = ['paymentMethod', 'Category']
numerical_cols = ['accountAgeDays', 'numItems', 'localTime', 'paymentMethodAgeDays', 'isWeekend']

# ───── 3. Preprocessing ─────
preprocessor = ColumnTransformer(transformers=[
    ('num', StandardScaler(), numerical_cols),
    ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_cols)
])

# ───── 4. Create Pipeline ─────
model_pipeline = Pipeline(steps=[
    ('preprocessing', preprocessor),
    ('classifier', RandomForestClassifier(class_weight='balanced', random_state=42))
])

# ───── 5. Train-test split ─────
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# ───── 6. Train model ─────
model_pipeline.fit(X_train, y_train)

# ───── 7. Evaluate ─────
y_pred = model_pipeline.predict(X_test)
print("📊 Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\n📈 Classification Report:")
print(classification_report(y_test, y_pred))

# ───── 8. Save model ─────
with open("fraud_pipeline.pkl", "wb") as f:
    pickle.dump(model_pipeline, f)

print("✅ Model saved as 'fraud_pipeline.pkl'")


📊 Confusion Matrix:
[[7715]]

📈 Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7715

    accuracy                           1.00      7715
   macro avg       1.00      1.00      1.00      7715
weighted avg       1.00      1.00      1.00      7715

✅ Model saved as 'fraud_pipeline.pkl'
