In [22]:
# 2_model_training.ipynb

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, roc_auc_score
import joblib

# Load cleaned dataset
df = pd.read_csv("default_credit_card_clean.csv")

# Separate features and target
X = df.drop(columns=["default"])
y = df["default"]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]

print("\n📊 Classification Report:\n")
print(classification_report(y_test, y_pred))
print("\n🔥 AUC Score:", roc_auc_score(y_test, y_proba))

# Save model
joblib.dump(model, "rf_credit_model.pkl")
print("\n✅ Model saved as 'rf_credit_model.pkl'")



📊 Classification Report:

              precision    recall  f1-score   support

           0       0.84      0.94      0.89      4687
           1       0.64      0.37      0.47      1313

    accuracy                           0.82      6000
   macro avg       0.74      0.65      0.68      6000
weighted avg       0.80      0.82      0.80      6000


🔥 AUC Score: 0.7577788444679593

✅ Model saved as 'rf_credit_model.pkl'
