In [7]:
# 4_dashboard_export.ipynb (Shareholder Insights - Enhanced)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load scored portfolio
df = pd.read_csv("scored_credit_portfolio.csv")

# Fix column names to lowercase for consistent access
df.columns = df.columns.str.lower()

# === KEY METRICS ===
total_customers = len(df)
avg_pd = df["pd"].mean()
high_risk_count = (df["risk_tier"] == "High Risk").sum()
default_rate = df["default"].mean()

# === VISUALS ===
# Risk Tier Bar Chart
plt.figure(figsize=(6,4))
sns.countplot(data=df, x="risk_tier", order=["Low Risk", "Medium Risk", "High Risk"], palette="Set2")
plt.title("Customer Distribution by Risk Tier")
plt.xlabel("Risk Tier")
plt.ylabel("Number of Customers")
plt.tight_layout()
plt.savefig("risk_tier_distribution.png")
plt.close()

# PD Distribution
plt.figure(figsize=(6,4))
sns.histplot(df["pd"], bins=30, kde=True, color="darkblue")
plt.title("Probability of Default Distribution")
plt.xlabel("Probability of Default")
plt.ylabel("Frequency")
plt.tight_layout()
plt.savefig("pd_distribution.png")
plt.close()

# Insights by Demographics
# Age vs PD
if "age" in df.columns:
    plt.figure(figsize=(6,4))
    sns.scatterplot(data=df, x="age", y="pd", hue="risk_tier", palette="Set2", alpha=0.7)
    plt.title("Probability of Default by Age")
    plt.xlabel("Age")
    plt.ylabel("Probability of Default")
    plt.tight_layout()
    plt.savefig("age_vs_pd.png")
    plt.close()

# Sex vs Average PD
if "sex" in df.columns:
    plt.figure(figsize=(6,4))
    sns.barplot(data=df, x="sex", y="pd", estimator="mean", palette="pastel")
    plt.title("Average PD by Gender")
    plt.xlabel("Gender (1=Male, 2=Female)")
    plt.ylabel("Average Probability of Default")
    plt.tight_layout()
    plt.savefig("sex_vs_pd.png")
    plt.close()

# Marital Status vs Risk Tier
if "marriage" in df.columns:
    plt.figure(figsize=(6,4))
    sns.countplot(data=df, x="marriage", hue="risk_tier", palette="Set2")
    plt.title("Risk Tier by Marital Status")
    plt.xlabel("Marital Status (1=Married, 2=Single, 3=Other)")
    plt.ylabel("Number of Customers")
    plt.tight_layout()
    plt.savefig("marriage_vs_risk_tier.png")
    plt.close()

# Generate improved HTML summary
with open("credit_risk_dashboard_simple.html", "w") as f:
    f.write("""
    <html>
    <head>
        <title>Credit Risk Portfolio Summary</title>
        <style>
            body { font-family: Arial, sans-serif; margin: 30px; }
            h1 { color: #1a237e; }
            ul { line-height: 1.8; }
            img { border: 1px solid #ccc; padding: 5px; margin-bottom: 20px; }
        </style>
    </head>
    <body>
    <h1>📊 Credit Risk Portfolio Summary</h1>
    <h2>Key Metrics</h2>
    <ul>
        <li><strong>Total Customers:</strong> """ + str(total_customers) + """</li>
        <li><strong>Average Probability of Default:</strong> """ + str(round(avg_pd, 3)) + """</li>
        <li><strong>High Risk Customers:</strong> """ + str(high_risk_count) + """</li>
        <li><strong>Actual Default Rate:</strong> """ + str(round(default_rate, 3)) + """</li>
    </ul>
    <h2>Visual Insights</h2>
    <h4>Customer Distribution by Risk Tier</h4>
    <img src='risk_tier_distribution.png' width='500'><br>

    <h4>Probability of Default (PD) Distribution</h4>
    <img src='pd_distribution.png' width='500'><br>

    """ + ("<h4>Probability of Default by Age</h4><img src='age_vs_pd.png' width='500'><br>" if "age" in df.columns else "") + 
    ("<h4>Average PD by Gender</h4><img src='sex_vs_pd.png' width='500'><br>" if "sex" in df.columns else "") + 
    ("<h4>Risk Tier by Marital Status</h4><img src='marriage_vs_risk_tier.png' width='500'><br>" if "marriage" in df.columns else "") + 
    """
    </body>
    </html>
    """)

print("✅ Updated HTML dashboard saved as 'credit_risk_dashboard_simple.html'")



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(data=df, x="risk_tier", order=["Low Risk", "Medium Risk", "High Risk"], palette="Set2")


ValueError: Could not interpret value `age` for `x`. An entry with this name does not appear in `data`.