# Page 3 Dashboard - ML Model Tables
This notebook creates the tables needed for the ML Model dashboard visualizations:
- Model Comparison (LR/RF/GBT)
- Feature Importance
- Confusion Matrix

## 1. Model Comparison Table

In [None]:
# Model Comparison Data from 08_ML_Training results
model_comparison_data = [
    ("Logistic Regression", 0.5936, 0.7349, "LR", 1),
    ("Random Forest", 0.6432, 0.7603, "RF", 2),
    ("Gradient Boosted Trees", 0.6955, 0.7613, "GBT", 3)
]

model_comparison = spark.createDataFrame(
    model_comparison_data, 
    ["model_name", "roc_auc", "accuracy", "model_code", "model_order"]
)

model_comparison.write.format("delta").mode("overwrite") \
    .saveAsTable("msme_risk_analytics.gold_model_comparison")

print("âœ… Model comparison table created!")
spark.table("msme_risk_analytics.gold_model_comparison").show()

## 2. Feature Importance Table

In [None]:
# Feature Importance from GBT Model (09_Model_Evaluation results)
feature_importance_data = [
    ("LTV", 0.255088, 1),
    ("loan_to_income_ratio", 0.192788, 2),
    ("dtir1", 0.173469, 3),
    ("loan_amount", 0.145198, 4),
    ("income", 0.102258, 5),
    ("Credit_Score", 0.075848, 6),
    ("risk_score", 0.055350, 7)
]

feature_importance = spark.createDataFrame(
    feature_importance_data,
    ["feature_name", "importance", "rank"]
)

feature_importance.write.format("delta").mode("overwrite") \
    .saveAsTable("msme_risk_analytics.gold_feature_importance")

print("âœ… Feature importance table created!")
spark.table("msme_risk_analytics.gold_feature_importance").show()

## 3. Confusion Matrix Table

In [None]:
# Confusion Matrix from GBT Model (09_Model_Evaluation results)
confusion_matrix_data = [
    (0, 0, 2133, "True Negative", "Correctly predicted No Default"),
    (0, 1, 39, "False Positive", "Incorrectly predicted Default"),
    (1, 0, 684, "False Negative", "Missed Default (High Risk!)"),
    (1, 1, 173, "True Positive", "Correctly predicted Default")
]

confusion_matrix = spark.createDataFrame(
    confusion_matrix_data,
    ["actual_status", "predicted_status", "count", "category", "description"]
)

confusion_matrix.write.format("delta").mode("overwrite") \
    .saveAsTable("msme_risk_analytics.gold_confusion_matrix")

print("âœ… Confusion matrix table created!")
spark.table("msme_risk_analytics.gold_confusion_matrix").show(truncate=False)

## 4. Model Metrics Summary Table

In [None]:
# Calculate detailed metrics for GBT (Best Model)
# From confusion matrix: TP=173, TN=2133, FP=39, FN=684
TP = 173
TN = 2133
FP = 39
FN = 684

accuracy = (TP + TN) / (TP + TN + FP + FN)
precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)
roc_auc = 0.6955

metrics_data = [
    ("GBT", accuracy, precision, recall, f1_score, roc_auc, 11758, 3029)
]

metrics_summary = spark.createDataFrame(
    metrics_data,
    ["model", "accuracy", "precision", "recall", "f1_score", "roc_auc", "train_records", "test_records"]
)

metrics_summary.write.format("delta").mode("overwrite") \
    .saveAsTable("msme_risk_analytics.gold_model_metrics_summary")

print("âœ… Model metrics summary table created!")
spark.table("msme_risk_analytics.gold_model_metrics_summary").show()

## 5. Verify All Tables Created

In [None]:
print("\n" + "="*60)
print("ðŸ“Š PAGE 3 DASHBOARD TABLES SUMMARY")
print("="*60)

tables = [
    "gold_model_comparison",
    "gold_feature_importance", 
    "gold_confusion_matrix",
    "gold_model_metrics_summary"
]

for table in tables:
    count = spark.table(f"msme_risk_analytics.{table}").count()
    print(f"âœ… msme_risk_analytics.{table}: {count} rows")

print("="*60)
print("ðŸŽ‰ All tables ready for Page 3 Dashboard!")
print("="*60)

## SQL Queries for Dashboard Visualizations

Use these SQL queries in your Databricks Dashboard:

In [None]:
# %sql
# -- Model Performance Comparison
# SELECT 
#     model_name,
#     model_code,
#     ROUND(roc_auc * 100, 2) as roc_auc_pct,
#     ROUND(accuracy * 100, 2) as accuracy_pct
# FROM msme_risk_analytics.gold_model_comparison
# ORDER BY roc_auc DESC

In [None]:
# %sql
# -- Feature Importance
# SELECT 
#     feature_name,
#     ROUND(importance * 100, 2) as importance_pct,
#     rank
# FROM msme_risk_analytics.gold_feature_importance
# ORDER BY importance DESC

In [None]:
# %sql
# -- Confusion Matrix
# SELECT 
#     CASE WHEN actual_status = 0 THEN 'No Default' ELSE 'Default' END as actual,
#     CASE WHEN predicted_status = 0 THEN 'No Default' ELSE 'Default' END as predicted,
#     count,
#     category
# FROM msme_risk_analytics.gold_confusion_matrix
# ORDER BY actual_status, predicted_status

In [None]:
# %sql
# -- Model KPI Metrics
# SELECT 
#     ROUND(accuracy * 100, 1) as accuracy_pct,
#     ROUND(precision * 100, 1) as precision_pct,
#     ROUND(recall * 100, 1) as recall_pct,
#     ROUND(roc_auc * 100, 1) as roc_auc_pct
# FROM msme_risk_analytics.gold_model_metrics_summary
# WHERE model = 'GBT'