In [3]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# ==============================
# 1. Load Dataset
# ==============================
df = pd.read_csv("Credit-Risk-Dataset.csv")

# Separate target and features
X = df.drop("loan_status", axis=1)  # target column name = loan_status
y = df["loan_status"]

# ==============================
# 2. Encode categorical variables
# ==============================
categorical_cols = ["person_home_ownership", "loan_intent", "loan_grade", "cb_person_default_on_file"]

label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

# ==============================
# 3. Train-Test Split
# ==============================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ==============================
# 4. Train Model
# ==============================
gbm = lgb.LGBMClassifier(
    boosting_type='gbdt',
    objective='binary',
    n_estimators=200,
    learning_rate=0.05,
    max_depth=6,
    random_state=42
)

gbm.fit(X_train, y_train)

# ==============================
# 5. Evaluate
# ==============================
y_pred = gbm.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
# Convert object columns to category dtype
for col in categorical_cols:
    X[col] = X[col].astype("category")

# Train with categorical features specified
gbm.fit(X_train, y_train, categorical_feature=categorical_cols)



[LightGBM] [Info] Number of positive: 5663, number of negative: 20401
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001493 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 690
[LightGBM] [Info] Number of data points in the train set: 26064, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.217273 -> initscore=-1.281630
[LightGBM] [Info] Start training from score -1.281630
Accuracy: 0.9337118305969004
[LightGBM] [Info] Number of positive: 5663, number of negative: 20401
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000367 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 692
[LightGBM] [Info] Number of data points in the train set: 26064, number of used featur

In [None]:
# ==================================================
# Frontend + Backend Integration (Gradio App)
# ==================================================

import gradio as gr
import joblib
import pandas as pd

# ==============================
# 1. Load Trained Model & Scaler
# ==============================
model = joblib.load("credit_model.pkl")
scaler = joblib.load("scaler.pkl")
feature_names = joblib.load("feature_names.pkl")  # list of feature names

# ==============================
# 2. Prediction Function
# ==============================
def predict_credit_risk(
    age, income, home_ownership, emp_length,
    loan_intent, loan_grade, loan_amnt,
    loan_percent_income, cb_default, cb_cred_hist_length
):
    # Create input DataFrame
    input_data = pd.DataFrame([[
        age, income, home_ownership, emp_length,
        loan_intent, loan_grade, loan_amnt,
        loan_percent_income, cb_default, cb_cred_hist_length
    ]], columns=feature_names)

    # Scale numeric values
    input_data = pd.DataFrame(scaler.transform(input_data), columns=feature_names)

    # Predict
    prediction = model.predict(input_data)[0]
    probability = model.predict_proba(input_data)[0][1]

    return {
        "Prediction": "✅ Approved" if prediction == 1 else "❌ Default Risk",
        "Default Probability": round(probability, 3)
    }

# ==============================
# 3. Gradio Interface
# ==============================
with gr.Blocks() as demo:
    gr.Markdown("## 🏦 Credit Risk Prediction App")
    gr.Markdown("Enter applicant details below:")

    with gr.Row():
        age = gr.Number(label="Person Age")
        income = gr.Number(label="Person Income")
        home_ownership = gr.Dropdown(["0", "1", "2"], label="Home Ownership (Encoded)")
        emp_length = gr.Number(label="Employment Length (Years)")

    with gr.Row():
        loan_intent = gr.Dropdown(["0", "1", "2", "3", "4", "5"], label="Loan Intent (Encoded)")
        loan_grade = gr.Dropdown(["0", "1", "2", "3", "4", "5"], label="Loan Grade (Encoded)")
        loan_amnt = gr.Number(label="Loan Amount")
        loan_percent_income = gr.Number(label="Loan Percent Income")

    with gr.Row():
        cb_default = gr.Dropdown(["0", "1"], label="Default on File (0=No, 1=Yes)")
        cb_cred_hist_length = gr.Number(label="Credit History Length")

    submit = gr.Button("🔮 Predict Risk")
    output = gr.JSON(label="Prediction Result")

    submit.click(
        fn=predict_credit_risk,
        inputs=[
            age, income, home_ownership, emp_length,
            loan_intent, loan_grade, loan_amnt,
            loan_percent_income, cb_default, cb_cred_hist_length
        ],
        outputs=output
    )

# ==============================
# 4. Launch
# ==============================
demo.launch()
