In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                            f1_score, roc_auc_score, confusion_matrix, 
                            classification_report, roc_curve, precision_recall_curve)
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:

import matplotlib.pyplot as plt
import seaborn as sns

def evaluate_model(model, X, y, model_name=""):
    """Generate all evaluation metrics and plots"""
    y_pred = model.predict(X)
    y_proba = model.predict_proba(X)[:, 1] if hasattr(model, "predict_proba") else [0]*len(y)
    
    # Metrics
    print(f"\n{'='*50}\nEvaluation for {model_name}\n{'='*50}")
    print(classification_report(y, y_pred))
    print(f"ROC AUC: {roc_auc_score(y, y_proba):.4f}")
    
    # Confusion Matrix
    plt.figure(figsize=(6,4))
    sns.heatmap(confusion_matrix(y, y_pred), annot=True, fmt='d', cmap='Blues')
    plt.title(f'{model_name} Confusion Matrix')
    plt.show()
    
    # ROC Curve
    fpr, tpr, _ = roc_curve(y, y_proba)
    plt.figure(figsize=(6,4))
    plt.plot(fpr, tpr, label=f'{model_name} (AUC = {roc_auc_score(y, y_proba):.2f})')
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend()
    plt.show()
    
    # Precision-Recall Curve
    precision, recall, _ = precision_recall_curve(y, y_proba)
    plt.figure(figsize=(6,4))
    plt.plot(recall, precision, label=model_name)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend()
    plt.show()

# Prepare feature sets
X_train_all = X_train_df
X_test_all = X_test_df
X_train_cibil = X_train_df[['cibil_score']]
X_test_cibil = X_test_df[['cibil_score']]

# Model 1: Logistic Regression (All Features)
lr_all = LogisticRegression(max_iter=1000)
lr_all.fit(X_train_all, y_train_filtered)
evaluate_model(lr_all, X_test_all, y_test_filtered, "LogReg (All Features)")

# Model 2: Logistic Regression (Only CIBIL)
lr_cibil = LogisticRegression(max_iter=1000)
lr_cibil.fit(X_train_cibil, y_train_filtered)
evaluate_model(lr_cibil, X_test_cibil, y_test_filtered, "LogReg (CIBIL Only)")

# Model 3: Random Forest (All Features)
rf_all = RandomForestClassifier(n_estimators=100, random_state=42)
rf_all.fit(X_train_all, y_train_filtered)
evaluate_model(rf_all, X_test_all, y_test_filtered, "Random Forest (All Features)")

# Model 4: Random Forest (Only CIBIL)
rf_cibil = RandomForestClassifier(n_estimators=100, random_state=42)
rf_cibil.fit(X_train_cibil, y_train_filtered)
evaluate_model(rf_cibil, X_test_cibil, y_test_filtered, "Random Forest (CIBIL Only)")

# Feature Importance Plot (for RF with all features)
plt.figure(figsize=(10,6))
pd.Series(rf_all.feature_importances_, index=X_train_all.columns
         ).sort_values().plot.barh(title='Random Forest Feature Importances')
plt.show()