In [None]:
# # # Credit Card Default Prediction - Google Colab Version
# # ## Machine Learning Project with Google Drive Integration
# # ### 1. Mount Google Drive and Install Packages
# # %%
# # Mount Google Drive to access your files
# from google.colab import drive
# drive.mount('/content/drive')

# # Navigate to your project folder (update the path as needed)
# import os
# project_path = '/content/drive/MyDrive'  # Update this to your actual path
# os.chdir(project_path)# ### 13. Project Summary
# # %%
# print("\n" + "="*60)
# print("PROJECT SUMMARY - CREDIT CARD DEFAULT PREDICTION")
# print("="*60)

# print(f"\nüìÅ Dataset: credit_card_default.csv")
# print(f"üìä Total samples: {len(df_clean):,}")
# print(f"üéØ Target variable: {target_col}")
# print(f"üîß Features used: {X.shape[1]}")

# print(f"\nü§ñ Models trained: {', '.join(models.keys())}")
# print(f"‚öñÔ∏è  Class balancing: {'SMOTE' if USE_SMOTE else 'class_weight parameter'}")

# print(f"\nüèÜ Best performing model: {best_model_name}")
# print(f"   üìà F1-Score: {best_result['f1']:.4f}")
# print(f"   üéØ Accuracy: {best_result['accuracy']:.4f}")

# print(f"\n‚úÖ Preprocessing steps completed:")
# print("   1. Google Drive mounting and file loading")
# print("   2. Missing values check and handling")
# print("   3. Duplicate removal")
# print("   4. Column name standardization")
# print("   5. Target variable identification")
# print("   6. Feature scaling")
# print("   7. Train-test split with stratification")
# print("   8. Class imbalance handling")
# print("   9. Model training and evaluation")

# print(f"\nüéØ Key insights:")
# print(f"   ‚Ä¢ Default rate: {(target_counts[1]/total*100):.1f}%")
# print(f"   ‚Ä¢ Class imbalance ratio: {imbalance_ratio:.2f}:1")
# print(f"   ‚Ä¢ Best model achieves {best_result['f1']:.1%} F1-Score")

# print("\n" + "="*60)
# print("PROJECT COMPLETED SUCCESSFULLY! ‚úÖ")
# print("="*60)
# print(f"Current working directory: {os.getcwd()}")

# # List files to verify
# print("\nFiles in directory:")
# print(os.listdir('.'))

# # Install compatible package versions
# !pip install scikit-learn==1.5.0 imbalanced-learn==0.13.0 -q

In [None]:
# ### 2. Import Libraries with Compatibility Check
# %%
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
                           f1_score, confusion_matrix, classification_report, roc_auc_score)
import warnings
warnings.filterwarnings('ignore')

# Check versions
import sklearn
print(f"‚úÖ scikit-learn version: {sklearn.__version__}")

# Try to import SMOTE, fallback to alternative if fails
try:
    from imblearn.over_sampling import SMOTE
    print("‚úÖ SMOTE imported successfully")
    USE_SMOTE = True
except ImportError as e:
    print(f"‚ö†Ô∏è SMOTE import failed: {e}")
    print("Using sklearn class_weight balancing instead")
    USE_SMOTE = False

In [None]:
# ### 3. Load Dataset from Google Drive
# %%
print("\n" + "="*50)
print("LOADING DATASET FROM GOOGLE DRIVE")
print("="*50)

# Load the dataset
try:
    # Try to load from current directory
    df = pd.read_csv('credit_card_default.csv')
    print("‚úÖ Dataset loaded from 'credit_card_default.csv'")
except FileNotFoundError:
    # If not found, search in common locations
    print("Searching for file in Google Drive...")

    # Common paths to check
    search_paths = [
        '/content/drive/MyDrive/',
        '/content/drive/MyDrive/ML_Project/',
        '/content/drive/MyDrive/ML_Project/data/',
        '/content/drive/MyDrive/data/'
    ]

    file_found = False
    for path in search_paths:
        try:
            file_path = os.path.join(path, 'credit_card_default.csv')
            if os.path.exists(file_path):
                df = pd.read_csv(file_path)
                print(f"‚úÖ Dataset loaded from: {file_path}")
                file_found = True
                break
        except:
            continue

    if not file_found:
        # If still not found, upload directly
        print("File not found. Please upload the CSV file.")
        from google.colab import files
        uploaded = files.upload()
        for filename in uploaded.keys():
            df = pd.read_csv(filename)
            print(f"‚úÖ Dataset loaded from uploaded file: {filename}")

print(f"\nDataset Shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
print("\nFirst 5 rows:")
display(df.head())


In [None]:
# ### 4. Data Preprocessing and Cleaning
# %%
print("\n" + "="*50)
print("DATA PREPROCESSING")
print("="*50)

# Create a clean copy
df_clean = df.copy()

# 1. Check for missing values
print("\n1. MISSING VALUES CHECK:")
missing_values = df_clean.isnull().sum()
missing_percentage = (missing_values / len(df_clean)) * 100
missing_df = pd.DataFrame({
    'Missing_Count': missing_values,
    'Percentage': missing_percentage
})
display(missing_df[missing_df['Missing_Count'] > 0])

if missing_values.sum() == 0:
    print("‚úÖ No missing values found")
else:
    print(f"‚ö†Ô∏è Found {missing_values.sum()} missing values")
    # Fill numeric missing values with median
    numeric_cols = df_clean.select_dtypes(include=[np.number]).columns
    df_clean[numeric_cols] = df_clean[numeric_cols].fillna(df_clean[numeric_cols].median())
    print("Filled numeric missing values with median")

# 2. Check for duplicates
print("\n2. DUPLICATES CHECK:")
duplicate_rows = df_clean.duplicated().sum()
print(f"Duplicate rows: {duplicate_rows}")
if duplicate_rows > 0:
    df_clean = df_clean.drop_duplicates()
    print(f"‚úÖ Removed {duplicate_rows} duplicate rows")
else:
    print("‚úÖ No duplicate rows found")

# 3. Check column names and standardize
print("\n3. COLUMN NAMES STANDARDIZATION:")
print(f"Original columns: {df_clean.columns.tolist()}")

# Standardize column names (remove spaces, dots, convert to lowercase)
df_clean.columns = df_clean.columns.str.strip().str.lower().str.replace('.', '_').str.replace(' ', '_')
print(f"Standardized columns: {df_clean.columns.tolist()}")

# Identify target column
possible_targets = ['default_payment_next_month', 'default_next_month', 'default', 'y']
target_col = None
for col in possible_targets:
    if col in df_clean.columns:
        target_col = col
        break

if target_col is None:
    # Try to find any binary column that could be the target
    binary_cols = []
    for col in df_clean.columns:
        unique_vals = df_clean[col].nunique()
        if unique_vals == 2 and df_clean[col].dtype in [np.int64, np.float64, int, float]:
            binary_cols.append(col)

    if binary_cols:
        target_col = binary_cols[0]
        print(f"Assuming target column is: {target_col} (binary column)")
    else:
        raise ValueError("Could not identify target column. Please check your dataset.")

print(f"‚úÖ Target column identified: {target_col}")

In [None]:
# 4. Drop unnecessary columns
cols_to_drop = []
if 'id' in df_clean.columns:
    cols_to_drop.append('id')
if 'customer_id' in df_clean.columns:
    cols_to_drop.append('customer_id')

if cols_to_drop:
    df_clean = df_clean.drop(columns=cols_to_drop)
    print(f"‚úÖ Dropped unnecessary columns: {cols_to_drop}")

print(f"\n‚úÖ Final dataset shape: {df_clean.shape}")
print(f"‚úÖ Features: {len(df_clean.columns) - 1}")
print(f"‚úÖ Samples: {len(df_clean)}")

In [None]:
# ### 5. Target Variable Analysis
# %%
print("\n" + "="*50)
print("TARGET VARIABLE ANALYSIS")
print("="*50)

target_counts = df_clean[target_col].value_counts()
total = len(df_clean)

print(f"Class 0 (No Default): {target_counts[0]:,} ({(target_counts[0]/total*100):.2f}%)")
if len(target_counts) > 1:
    print(f"Class 1 (Default): {target_counts[1]:,} ({(target_counts[1]/total*100):.2f}%)")

plt.figure(figsize=(8, 5))
bars = plt.bar(['No Default', 'Default'], target_counts.values,
               color=['green', 'red'], alpha=0.7, edgecolor='black')
plt.title('Credit Card Default Distribution', fontsize=14)
plt.ylabel('Number of Clients')
plt.xlabel('Default Status')

# Add count labels on bars
for bar, count in zip(bars, target_counts.values):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 100,
             f'{count:,}\n({count/total*100:.1f}%)',
             ha='center', va='bottom')

plt.tight_layout()
plt.show()

# Check for class imbalance
imbalance_ratio = target_counts.max() / target_counts.min()
if imbalance_ratio > 1.5:
    print(f"‚ö†Ô∏è Class imbalance detected: Ratio = {imbalance_ratio:.2f}:1")
    print("Will apply class balancing techniques")
else:
    print("‚úÖ Classes are relatively balanced")

In [None]:
# ### 6. Data Preparation and Feature Engineering
# %%
print("\n" + "="*50)
print("FEATURE PREPARATION")
print("="*50)

# Separate features and target
X = df_clean.drop(columns=[target_col])
y = df_clean[target_col]

print(f"Features (X) shape: {X.shape}")
print(f"Target (y) shape: {y.shape}")

# Check feature types
print("\nFeature types:")
print(X.dtypes.value_counts())

# Feature scaling
print("\nApplying feature scaling...")
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

print("‚úÖ Feature scaling completed")

In [None]:
# ### 7. Train-Test Split
# %%
print("\n" + "="*50)
print("TRAIN-TEST SPLIT")
print("="*50)

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

print(f"Training set: {X_train.shape[0]:,} samples")
print(f"Test set: {X_test.shape[0]:,} samples")
print(f"\nTraining class distribution:")
print(f"  Class 0: {sum(y_train == 0):,} ({(sum(y_train == 0)/len(y_train)*100):.1f}%)")
print(f"  Class 1: {sum(y_train == 1):,} ({(sum(y_train == 1)/len(y_train)*100):.1f}%)")

In [None]:
# ### 8. Handle Class Imbalance
# %%
print("\n" + "="*50)
print("HANDLING CLASS IMBALANCE")
print("="*50)

if USE_SMOTE:
    print("Using SMOTE for class balancing...")
    smote = SMOTE(random_state=42)
    X_train_bal, y_train_bal = smote.fit_resample(X_train, y_train)

    print(f"\nAfter SMOTE:")
    print(f"  Training samples: {X_train_bal.shape[0]:,}")
    print(f"  Class 0: {sum(y_train_bal == 0):,}")
    print(f"  Class 1: {sum(y_train_bal == 1):,}")
else:
    print("Using class_weight parameter in models (no resampling)")
    X_train_bal, y_train_bal = X_train, y_train


In [None]:
# ### 9. Model Training with Fast Algorithms
# %%
print("\n" + "="*50)
print("MODEL TRAINING")
print("="*50)

# Initialize results dictionary
results = {}


In [None]:
# ### 9.1 Logistic Regression Model
# %%
print("\n" + "="*40)
print("MODEL 1: LOGISTIC REGRESSION")
print("="*40)

# Define model based on SMOTE availability
if USE_SMOTE:
    lr_model = LogisticRegression(random_state=42, max_iter=1000, C=1.0)
else:
    lr_model = LogisticRegression(class_weight='balanced', random_state=42, max_iter=1000, C=1.0)

# Train model
print("Training Logistic Regression...")
lr_model.fit(X_train_bal, y_train_bal)
print("‚úì Training completed")

# Make predictions
y_pred_lr = lr_model.predict(X_test)
y_pred_proba_lr = lr_model.predict_proba(X_test)[:, 1]

# Calculate metrics
accuracy_lr = accuracy_score(y_test, y_pred_lr)
precision_lr = precision_score(y_test, y_pred_lr)
recall_lr = recall_score(y_test, y_pred_lr)
f1_lr = f1_score(y_test, y_pred_lr)

# Store results
results['Logistic Regression'] = {
    'model': lr_model,
    'y_pred': y_pred_lr,
    'y_pred_proba': y_pred_proba_lr,
    'accuracy': accuracy_lr,
    'precision': precision_lr,
    'recall': recall_lr,
    'f1': f1_lr
}

# Display results
print("\nüìä Logistic Regression Performance:")
print(f"  Accuracy:  {accuracy_lr:.4f}")
print(f"  Precision: {precision_lr:.4f}")
print(f"  Recall:    {recall_lr:.4f}")
print(f"  F1-Score:  {f1_lr:.4f}")

# Display coefficients if available
if hasattr(lr_model, 'coef_'):
    print("\nüîç Top 10 Feature Coefficients (Absolute Value):")
    coef_df = pd.DataFrame({
        'Feature': X.columns,
        'Coefficient': lr_model.coef_[0],
        'Abs_Coefficient': np.abs(lr_model.coef_[0])
    }).sort_values('Abs_Coefficient', ascending=False).head(10)
    display(coef_df[['Feature', 'Coefficient']])







In [None]:
# ### 9.2 Decision Tree Model
# %%
print("\n" + "="*40)
print("MODEL 2: DECISION TREE")
print("="*40)

# Define model based on SMOTE availability
if USE_SMOTE:
    dt_model = DecisionTreeClassifier(random_state=42, max_depth=5, min_samples_split=10)
else:
    dt_model = DecisionTreeClassifier(class_weight='balanced', random_state=42, max_depth=5, min_samples_split=10)

# Train model
print("Training Decision Tree...")
dt_model.fit(X_train_bal, y_train_bal)
print("‚úì Training completed")

# Make predictions
y_pred_dt = dt_model.predict(X_test)
y_pred_proba_dt = dt_model.predict_proba(X_test)[:, 1]

# Calculate metrics
accuracy_dt = accuracy_score(y_test, y_pred_dt)
precision_dt = precision_score(y_test, y_pred_dt)
recall_dt = recall_score(y_test, y_pred_dt)
f1_dt = f1_score(y_test, y_pred_dt)

# Store results
results['Decision Tree'] = {
    'model': dt_model,
    'y_pred': y_pred_dt,
    'y_pred_proba': y_pred_proba_dt,
    'accuracy': accuracy_dt,
    'precision': precision_dt,
    'recall': recall_dt,
    'f1': f1_dt
}

# Display results
print("\nüìä Decision Tree Performance:")
print(f"  Accuracy:  {accuracy_dt:.4f}")
print(f"  Precision: {precision_dt:.4f}")
print(f"  Recall:    {recall_dt:.4f}")
print(f"  F1-Score:  {f1_dt:.4f}")

# Display feature importance
if hasattr(dt_model, 'feature_importances_'):
    print("\nüîç Top 10 Feature Importances:")
    feature_importance_dt = pd.DataFrame({
        'Feature': X.columns,
        'Importance': dt_model.feature_importances_
    }).sort_values('Importance', ascending=False).head(10)
    display(feature_importance_dt)

# Visualize tree structure
print("\nüå≥ Tree Depth Information:")
print(f"  Tree Depth: {dt_model.get_depth()}")
print(f"  Number of Leaves: {dt_model.get_n_leaves()}")

In [None]:
# ### 9.3 Random Forest Model
# %%
print("\n" + "="*40)
print("MODEL 3: RANDOM FOREST")
print("="*40)

# Define model based on SMOTE availability
if USE_SMOTE:
    rf_model = RandomForestClassifier(random_state=42, n_estimators=100, max_depth=10, n_jobs=-1)
else:
    rf_model = RandomForestClassifier(class_weight='balanced', random_state=42, n_estimators=100, max_depth=10, n_jobs=-1)

# Train model
print("Training Random Forest...")
rf_model.fit(X_train_bal, y_train_bal)
print("‚úì Training completed")

# Make predictions
y_pred_rf = rf_model.predict(X_test)
y_pred_proba_rf = rf_model.predict_proba(X_test)[:, 1]

# Calculate metrics
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)

# Store results
results['Random Forest'] = {
    'model': rf_model,
    'y_pred': y_pred_rf,
    'y_pred_proba': y_pred_proba_rf,
    'accuracy': accuracy_rf,
    'precision': precision_rf,
    'recall': recall_rf,
    'f1': f1_rf
}

# Display results
print("\nüìä Random Forest Performance:")
print(f"  Accuracy:  {accuracy_rf:.4f}")
print(f"  Precision: {precision_rf:.4f}")
print(f"  Recall:    {recall_rf:.4f}")
print(f"  F1-Score:  {f1_rf:.4f}")

# Display feature importance
if hasattr(rf_model, 'feature_importances_'):
    print("\nüîç Top 10 Feature Importances:")
    feature_importance_rf = pd.DataFrame({
        'Feature': X.columns,
        'Importance': rf_model.feature_importances_
    }).sort_values('Importance', ascending=False).head(10)
    display(feature_importance_rf)

# Forest information
print("\nüå≤ Random Forest Information:")
print(f"  Number of Trees: {len(rf_model.estimators_)}")
print(f"  Tree Depth Range: {min([tree.get_depth() for tree in rf_model.estimators_])}-{max([tree.get_depth() for tree in rf_model.estimators_])}")

In [None]:
# ### 9.4 Training Summary
# %%
print("\n" + "="*50)
print("TRAINING SUMMARY")
print("="*50)

print(f"\n‚úÖ All {len(results)} models trained successfully!")
print(f"Class balancing method: {'SMOTE' if USE_SMOTE else 'class_weight parameter'}")

# Create summary table
summary_data = []
for name, result in results.items():
    summary_data.append({
        'Model': name,
        'Accuracy': result['accuracy'],
        'Precision': result['precision'],
        'Recall': result['recall'],
        'F1-Score': result['f1']
    })

summary_df = pd.DataFrame(summary_data)
summary_df = summary_df.sort_values('F1-Score', ascending=False)

print("\nüìã Performance Summary (Sorted by F1-Score):")
print("-" * 70)
display(summary_df.style.format({
    'Accuracy': '{:.4f}',
    'Precision': '{:.4f}',
    'Recall': '{:.4f}',
    'F1-Score': '{:.4f}'
}).background_gradient(cmap='YlOrRd', subset=['F1-Score']))

# Identify best model
best_model_name = summary_df.iloc[0]['Model']
best_result = results[best_model_name]

print(f"\nüèÜ BEST MODEL: {best_model_name}")
print(f"   F1-Score: {best_result['f1']:.4f}")
print(f"   Accuracy: {best_result['accuracy']:.4f}")

print("\nüìä Training Set Information:")
print(f"  Original size: {X_train.shape[0]:,} samples")
print(f"  After balancing: {X_train_bal.shape[0]:,} samples")
print(f"  Test set size: {X_test.shape[0]:,} samples")

In [None]:
# %% [markdown]
# ### 10. Model Comparison
# %%
print("\n" + "="*50)
print("MODEL COMPARISON")
print("="*50)

# Create comparison dataframe
comparison_data = []
for name, result in results.items():
    comparison_data.append({
        'Model': name,
        'Accuracy': result['accuracy'],
        'Precision': result['precision'],
        'Recall': result['recall'],
        'F1-Score': result['f1']
    })

comparison_df = pd.DataFrame(comparison_data)
comparison_df = comparison_df.sort_values('F1-Score', ascending=False)

print("\nüìä Model Performance Summary:")
print("-" * 60)
display(comparison_df.style.format({
    'Accuracy': '{:.4f}',
    'Precision': '{:.4f}',
    'Recall': '{:.4f}',
    'F1-Score': '{:.4f}'
}).background_gradient(cmap='YlOrRd', subset=['F1-Score', 'Accuracy']))

# Visual comparison
plt.figure(figsize=(10, 6))
x = np.arange(len(results))  # CHANGED: Use len(results) instead of len(models)
width = 0.2

models_list = comparison_df['Model'].tolist()

for i, metric in enumerate(['Accuracy', 'Precision', 'Recall', 'F1-Score']):
    offset = width * (i - 1.5)
    values = comparison_df[metric].values
    plt.bar(x + offset, values, width, label=metric, alpha=0.8)

plt.xlabel('Models', fontsize=12)
plt.ylabel('Score', fontsize=12)
plt.title('Model Performance Comparison', fontsize=14, fontweight='bold')
plt.xticks(x, models_list)
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1))
plt.ylim([0, 1])
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

In [None]:
# ### 11. Confusion Matrices (ESSENTIAL)
# %%
print("\n" + "="*50)
print("CONFUSION MATRICES")
print("="*50)

fig, axes = plt.subplots(1, 3, figsize=(15, 4))
fig.suptitle('Confusion Matrices for Credit Card Default Prediction', fontsize=14, fontweight='bold')

for idx, (name, result) in enumerate(results.items()):
    cm = confusion_matrix(y_test, result['y_pred'])

    # Create heatmap
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                ax=axes[idx], cbar=False,
                annot_kws={'size': 12, 'weight': 'bold'})

    axes[idx].set_title(f'{name}\nF1: {result["f1"]:.3f}', fontsize=12)
    axes[idx].set_xlabel('Predicted Label', fontsize=10)
    axes[idx].set_ylabel('True Label', fontsize=10)
    axes[idx].set_xticklabels(['No Default', 'Default'])
    axes[idx].set_yticklabels(['No Default', 'Default'])

plt.tight_layout()
plt.show()

In [None]:
# ### 11. Enhanced Confusion Matrices with Metrics Display
# %%
print("\n" + "="*60)
print("ENHANCED CONFUSION MATRICES WITH METRICS")
print("="*60)

# Create a 2x2 grid for better visualization
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
fig.suptitle('Enhanced Confusion Matrices with Performance Metrics', fontsize=16, fontweight='bold')

models_list = list(results.keys())

# First row: Standard confusion matrices with metrics on top
for idx, name in enumerate(models_list):
    result = results[name]
    cm = confusion_matrix(y_test, result['y_pred'])
    tn, fp, fn, tp = cm.ravel()

    # Create enhanced title with metrics
    metrics_title = f'{name}\nAcc: {result["accuracy"]:.3f} | F1: {result["f1"]:.3f} | Prec: {result["precision"]:.3f} | Rec: {result["recall"]:.3f}'

    # Create heatmap with annotations
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                ax=axes[0, idx], cbar=True,
                annot_kws={'size': 14, 'weight': 'bold', 'color': 'white' if cm.max() > cm.sum()/2 else 'black'},
                linewidths=1, linecolor='gray')

    axes[0, idx].set_title(metrics_title, fontsize=12, fontweight='bold', pad=12)
    axes[0, idx].set_xlabel('Predicted Label', fontsize=11)
    axes[0, idx].set_ylabel('True Label', fontsize=11)
    axes[0, idx].set_xticklabels(['No Default\n(0)', 'Default\n(1)'], fontsize=10)
    axes[0, idx].set_yticklabels(['No Default\n(0)', 'Default\n(1)'], fontsize=10, rotation=0)

    # Add additional text annotations inside the plot
    axes[0, idx].text(0.5, -0.15, f'TN={tn} | FP={fp} | FN={fn} | TP={tp}',
                      transform=axes[0, idx].transAxes, ha='center', fontsize=10,
                      bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))


# ### 11.1 Single Large Confusion Matrix with All Metrics
print("\n" + "="*60)
print("COMPREHENSIVE CONFUSION MATRIX WITH ALL METRICS")
print("="*60)

# Create one large figure for the best model
best_result = results[best_model_name]
best_cm = confusion_matrix(y_test, best_result['y_pred'])
tn, fp, fn, tp = best_cm.ravel()
total = best_cm.sum()

fig = plt.figure(figsize=(14, 10))

# Create grid for layout
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)

# Main confusion matrix (span 2x2)
ax1 = fig.add_subplot(gs[0:2, 0:2])

# Create the confusion matrix heatmap
sns.heatmap(best_cm, annot=True, fmt='d', cmap='Blues',
            ax=ax1, cbar=False,
            annot_kws={'size': 18, 'weight': 'bold', 'color': 'white'},
            linewidths=2, linecolor='black')

ax1.set_title(f'{best_model_name} - Confusion Matrix', fontsize=16, fontweight='bold', pad=20)
ax1.set_xlabel('Predicted Label', fontsize=14)
ax1.set_ylabel('True Label', fontsize=14)
ax1.set_xticklabels(['No Default\n(Class 0)', 'Default\n(Class 1)'], fontsize=12)
ax1.set_yticklabels(['No Default\n(Class 0)', 'Default\n(Class 1)'], fontsize=12, rotation=0)

# Add cell annotations with percentages
for i in range(2):
    for j in range(2):
        value = best_cm[i, j]
        percentage = value / total * 100
        ax1.text(j + 0.5, i + 0.3, f'{value}\n({percentage:.1f}%)',
                ha='center', va='center', fontsize=11,
                bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))

# Metrics panel (right side)
ax2 = fig.add_subplot(gs[0:2, 2])

# Hide axes for text display
ax2.axis('off')

# Create metrics display
metrics_text = f"""
{best_model_name} - PERFORMANCE METRICS
‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê

CONFUSION MATRIX VALUES:
‚Ä¢ True Negatives (TN): {tn:,}
‚Ä¢ False Positives (FP): {fp:,}
‚Ä¢ False Negatives (FN): {fn:,}
‚Ä¢ True Positives (TP): {tp:,}
‚Ä¢ Total Samples: {total:,}

PRIMARY METRICS:
‚Ä¢ Accuracy:   {best_result['accuracy']:.4f}
‚Ä¢ Precision:  {best_result['precision']:.4f}
‚Ä¢ Recall:     {best_result['recall']:.4f}
‚Ä¢ F1-Score:   {best_result['f1']:.4f}

DERIVED METRICS:
‚Ä¢ Specificity: {tn/(tn+fp):.4f}
‚Ä¢ NPV:        {tn/(tn+fn):.4f}
‚Ä¢ FPR:        {fp/(fp+tn):.4f}
‚Ä¢ FNR:        {fn/(fn+tp):.4f}

ERROR ANALYSIS:
‚Ä¢ Type I Errors:  {fp} ({fp/total*100:.1f}%)
‚Ä¢ Type II Errors: {fn} ({fn/total*100:.1f}%)
‚Ä¢ Correct:       {tp+tn} ({(tp+tn)/total*100:.1f}%)
"""

ax2.text(0.1, 0.95, metrics_text, fontsize=12, family='monospace',
         verticalalignment='top', bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.3))

# Bottom panel: Metric comparison visualization
ax3 = fig.add_subplot(gs[2, :])

# Prepare data for bar chart
metrics_to_plot = ['Accuracy', 'Precision', 'Recall', 'F1-Score']
values_to_plot = [best_result['accuracy'], best_result['precision'],
                  best_result['recall'], best_result['f1']]

colors = ['#2E86AB', '#A23B72', '#F18F01', '#C73E1D']
bars = ax3.bar(metrics_to_plot, values_to_plot, color=colors, alpha=0.8, edgecolor='black')

# Add value labels on bars
for bar, value in zip(bars, values_to_plot):
    height = bar.get_height()
    ax3.text(bar.get_x() + bar.get_width()/2, height + 0.01,
            f'{value:.3f}', ha='center', va='bottom', fontsize=11, fontweight='bold')

ax3.set_ylabel('Score', fontsize=12)
ax3.set_title(f'{best_model_name} - Key Metrics Visualization', fontsize=14, fontweight='bold')
ax3.set_ylim([0, 1])
ax3.grid(True, alpha=0.3, axis='y')
ax3.set_axisbelow(True)

plt.tight_layout()
plt.show()

In [None]:
# ### 11.2 All Models Side-by-Side with Metrics
# %%
print("\n" + "="*60)
print("ALL MODELS - SIDE-BY-SIDE COMPARISON")
print("="*60)

fig, axes = plt.subplots(1, len(results), figsize=(16, 5))
fig.suptitle('Model Comparison: Confusion Matrices with Key Metrics', fontsize=16, fontweight='bold', y=1.05)

for idx, (name, result) in enumerate(results.items()):
    cm = confusion_matrix(y_test, result['y_pred'])
    tn, fp, fn, tp = cm.ravel()

    # Create the heatmap
    im = axes[idx].imshow(cm, cmap='YlOrRd', interpolation='nearest', vmin=0, vmax=cm.max())

    # Add text annotations for each cell
    for i in range(2):
        for j in range(2):
            value = cm[i, j]
            percentage = value / cm.sum() * 100
            text_color = 'white' if value > cm.max()/2 else 'black'
            axes[idx].text(j, i, f'{value}\n({percentage:.1f}%)',
                          ha='center', va='center',
                          color=text_color, fontsize=11, fontweight='bold')

    # Set labels
    axes[idx].set(xticks=[0, 1], yticks=[0, 1],
                  xticklabels=['No Default', 'Default'],
                  yticklabels=['No Default', 'Default'])
    axes[idx].set_xlabel('Predicted', fontsize=11)
    axes[idx].set_ylabel('Actual', fontsize=11)

    # Add metrics as title
    title_text = f'{name}\nAcc: {result["accuracy"]:.3f} | F1: {result["f1"]:.3f}'
    axes[idx].set_title(title_text, fontsize=12, fontweight='bold', pad=10)

    # Add detailed metrics below
    metrics_text = f'Prec: {result["precision"]:.3f}\nRec: {result["recall"]:.3f}'
    axes[idx].text(0.5, -0.25, metrics_text, transform=axes[idx].transAxes,
                   ha='center', fontsize=10,
                   bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.5))

plt.tight_layout()

# Add colorbar
cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7])
fig.colorbar(im, cax=cbar_ax)
cbar_ax.set_ylabel('Count', fontsize=11)

plt.show()

In [None]:
# ### 11.3 Metrics Summary Table
# %%
print("\n" + "="*60)
print("COMPREHENSIVE METRICS SUMMARY TABLE")
print("="*60)

# Create detailed metrics table
detailed_metrics = []
for name, result in results.items():
    cm = confusion_matrix(y_test, result['y_pred'])
    tn, fp, fn, tp = cm.ravel()
    total = cm.sum()

    # Calculate additional metrics
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    npv = tn / (tn + fn) if (tn + fn) > 0 else 0
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
    fnr = fn / (fn + tp) if (fn + tp) > 0 else 0
    balanced_acc = (result['recall'] + specificity) / 2

    detailed_metrics.append({
        'Model': name,
        'Accuracy': result['accuracy'],
        'Precision': result['precision'],
        'Recall': result['recall'],
        'F1-Score': result['f1'],
        'Specificity': specificity,
        'NPV': npv,
        'Balanced Acc': balanced_acc,
        'FPR': fpr,
        'FNR': fnr,
        'TP': tp,
        'TN': tn,
        'FP': fp,
        'FN': fn
    })

detailed_df = pd.DataFrame(detailed_metrics)
detailed_df = detailed_df.sort_values('F1-Score', ascending=False)

print("\nüìä Complete Metrics Summary:")
print("-" * 100)

# Format the display
styled_df = detailed_df.style.format({
    'Accuracy': '{:.4f}',
    'Precision': '{:.4f}',
    'Recall': '{:.4f}',
    'F1-Score': '{:.4f}',
    'Specificity': '{:.4f}',
    'NPV': '{:.4f}',
    'Balanced Acc': '{:.4f}',
    'FPR': '{:.4f}',
    'FNR': '{:.4f}',
    'TP': '{:d}',
    'TN': '{:d}',
    'FP': '{:d}',
    'FN': '{:d}'
})

# Apply gradient to key metrics
styled_df = styled_df.background_gradient(cmap='YlOrRd', subset=['Accuracy', 'F1-Score', 'Balanced Acc'])

display(styled_df)

# Print key insights
print(f"\nüí° KEY INSIGHTS FROM CONFUSION MATRICES:")
print("-" * 50)
print(f"1. Best Model: {best_model_name} (F1-Score: {results[best_model_name]['f1']:.4f})")
print(f"2. Highest Accuracy: {detailed_df['Accuracy'].max():.4f} ({detailed_df.loc[detailed_df['Accuracy'].idxmax()]['Model']})")
print(f"3. Highest Precision: {detailed_df['Precision'].max():.4f} ({detailed_df.loc[detailed_df['Precision'].idxmax()]['Model']})")
print(f"4. Highest Recall: {detailed_df['Recall'].max():.4f} ({detailed_df.loc[detailed_df['Recall'].idxmax()]['Model']})")
print(f"5. Most Balanced: {detailed_df['Balanced Acc'].max():.4f} ({detailed_df.loc[detailed_df['Balanced Acc'].idxmax()]['Model']})")

print("\n" + "="*60)
print("METRICS DISPLAY ENHANCEMENT COMPLETE")
print("="*60)

In [None]:
# ### 12. Best Model Analysis
# %%
print("\n" + "="*50)
print("BEST MODEL ANALYSIS")
print("="*50)

# Identify best model
best_model_name = comparison_df.iloc[0]['Model']
best_result = results[best_model_name]
best_model = best_result['model']

print(f"üèÜ BEST MODEL: {best_model_name}")
print(f"   F1-Score: {best_result['f1']:.4f}")
print(f"   Accuracy: {best_result['accuracy']:.4f}")
print(f"   Precision: {best_result['precision']:.4f}")
print(f"   Recall: {best_result['recall']:.4f}")

print("\nüìã Classification Report:")
print("-" * 50)
print(classification_report(y_test, best_result['y_pred'],
                           target_names=['No Default', 'Default']))

# Feature importance (if available)
if hasattr(best_model, 'feature_importances_'):
    print("\nüîç Top 10 Feature Importances:")
    feature_importance = pd.DataFrame({
        'Feature': X.columns,
        'Importance': best_model.feature_importances_
    }).sort_values('Importance', ascending=False).head(10)

    display(feature_importance)

    # Plot feature importance
    plt.figure(figsize=(10, 6))
    plt.barh(feature_importance['Feature'], feature_importance['Importance'],
             color='teal', alpha=0.7)
    plt.xlabel('Importance Score', fontsize=12)
    plt.title(f'Top 10 Feature Importances - {best_model_name}', fontsize=14)
    plt.gca().invert_yaxis()
    plt.grid(True, alpha=0.3, axis='x')
    plt.tight_layout()
    plt.show()