In [None]:
import json
from pathlib import Path
import pandas as pd
import scorecardpy as sc

# Paths
DATA_PATH = Path("merged_applicant_and_bureau_cleaned.csv")
BIN_SPEC_PATH = Path("woe_iv_outputs/woe_bin_specs.json")

# === Step 1: Load data and split ===
df = pd.read_csv(DATA_PATH)
train, test = sc.split_df(df, y='BAD', ratio=0.7).values()
print(train.shape, test.shape)

# === Step 2: Load your manually created bin specs ===
with open(BIN_SPEC_PATH, "r", encoding="utf-8") as f:
    bin_specs = json.load(f)

# === Step 3: Convert JSON specs into scorecardpy bin dictionary format ===
# This converts your numeric quantiles or categorical levels into
# a format that sc.woebin_ply() and sc.scorecard() can understand
bins = {}

for var, spec in bin_specs.items():
    if spec["type"].startswith("numeric"):
        # For numeric variables
        bins[var] = pd.DataFrame({
            'variable': var,
            'bin': [f"[{spec['quantiles'][i]}, {spec['quantiles'][i+1]})"
                    for i in range(len(spec['quantiles']) - 1)],
            'breaks': spec['quantiles'][1:-1]  # middle edges only
        })
    elif spec["type"].startswith("categorical"):
        # For categorical variables
        bins[var] = pd.DataFrame({
            'variable': var,
            'bin': spec['levels'],
            'breaks': [None] * len(spec['levels'])
        })

# === Step 4: Use your custom bins for WOE transformation ===
# Instead of sc.woebin(), you apply these bins
train_woe = sc.woebin_ply(train, bins)
test_woe = sc.woebin_ply(test, bins)

print("âœ… Custom binning applied successfully to train and test datasets.")

# Optional: verify transformation worked
print(train_woe.head())


In [None]:
# build logistic regression model on WOE data
from sklearn.linear_model import LogisticRegression

X_train = train_woe.drop(columns=['BAD'])
y_train = train_woe['BAD']

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)
