In [4]:
# save as app.py
import streamlit as st
import pandas as pd
import joblib
import shap
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

st.set_page_config(page_title="Credit Risk Prediction", layout="wide")
st.title("ðŸ’³ Credit Risk Prediction System")
st.write("Machine Learning + Explainable AI (SHAP)")

# ==============================
# LOAD MODEL AND FEATURES
# ==============================
model = joblib.load(r"C:\Users\admin\Desktop\creditscoreproject\models\model.pkl")
feature_order = joblib.load(r"C:\Users\admin\Desktop\creditscoreproject\models\feature_encoders.pkl")

data = pd.read_csv(r"C:\Users\admin\Desktop\creditscoreproject\data\credit_risk_1_lakh_dataset.csv")

print("Dataset Loaded Successfully")
print(data.head())

# STEP 4: Prepare Data

target_column = "credit_risk"   # change if needed

X = data.drop(columns=[target_column])

# Ensure same column order as training
X = X.reindex(columns=feature_order, fill_value=0)


# ==============================
# FILE UPLOAD
# ==============================
uploaded_file = st.file_uploader("Upload CSV file", type="csv")
if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)
    st.write("Dataset Preview:", df.head())

    X = df.drop('credit_risk', axis=1)

    # Encode categorical columns
    for col in X.select_dtypes(include="object").columns:
        X[col] = LabelEncoder().fit_transform(X[col])

    # Add missing columns and reorder
    for col in feature_order:
        if col not in X.columns:
            X[col] = 0
    X = X[feature_order]

    # ==============================
    # PREDICTIONS
    # ==============================
    y_pred = model.predict(X)
    df['predicted_credit_risk'] = y_pred
    st.write("Predictions:", df[['predicted_credit_risk']])

    # ==============================
    # SHAP EXPLAINABILITY
    # ==============================
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X)

    st.subheader("SHAP Summary Plot")
    plt.figure(figsize=(10,6))
    if isinstance(shap_values, list):
        shap.summary_plot(shap_values[1], X, show=False)
    else:
        shap.summary_plot(shap_values, X, show=False)
    st.pyplot(plt.gcf())
    plt.clf()

    st.subheader("SHAP Force Plot for First Observation")
    observation = X.iloc[0]
    if isinstance(shap_values, list):
        shap_values_single = shap_values[1][0]
        expected_value_single = explainer.expected_value[1]
    else:
        shap_values_single = shap_values[0]
        expected_value_single = explainer.expected_value
    shap.initjs()
    st_shap = st.components.v1.html(
        shap.force_plot(expected_value_single, shap_values_single, observation).data,
        height=400
    )




Dataset Loaded Successfully
   age   income     sex  credit_score  loan_amount employment_status  \
0   59  1000000    Male           471      1600000          Employed   
1   49  2800000  Female           516       900000          Employed   
2   35  1300000  Female           410      2400000          Employed   
3   63  1700000    Male           485      1300000     Self-Employed   
4   28  4800000  Female           347      2900000     Self-Employed   

  credit_history repayment_behavior  existing_liabilities credit_risk  
0           Good            On-Time                900000        High  
1      Excellent            On-Time                600000        High  
2        Average            On-Time                300000        High  
3      Excellent            On-Time                200000        High  
4        Average            On-Time                850000        High  
