In [1]:
import os
os.chdir("/Users/chentahung/Desktop/git/mob-py/")
from pathlib import Path
import sys

repo_src = Path("src").resolve()
if str(repo_src) not in sys.path:
    sys.path.insert(0, str(repo_src))

# Plotting (inline in notebooks)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from MOBPY.binning.mob import MonotonicBinner
from MOBPY.core.constraints import BinningConstraints
from MOBPY.plot.csd_gcm import plot_csd_gcm
from MOBPY.plot.mob_plot import MOBPlot

In [2]:
df = pd.read_csv("data/german_data_credit_cat.csv")
df['default'] = df['default'] - 1
df["default"] = df["default"].clip(lower=0, upper=1).astype(int)

display(df.head())

Unnamed: 0,Statusofexistingcheckingaccount,Durationinmonth,Credithistory,Purpose,Creditamount,Savingsaccountbonds,Presentemploymentsince,Installmentrate,Personalstatussex,Otherdebtors,...,Property,Age,installmentplans,Housing,existingcredits,Job,Numberofpeople,Telephone,foreignworker,default
0,A11,6,A34,A43,1169,A65,A75,4,A93,A101,...,A121,67,A143,A152,2,A173,1,A192,A201,0
1,A12,48,A32,A43,5951,A61,A73,2,A92,A101,...,A121,22,A143,A152,1,A173,1,A191,A201,1
2,A14,12,A34,A46,2096,A61,A74,2,A93,A101,...,A121,49,A143,A152,1,A172,2,A191,A201,0
3,A11,42,A32,A42,7882,A61,A74,2,A93,A103,...,A122,45,A143,A153,1,A173,2,A191,A201,0
4,A11,24,A33,A40,4870,A61,A73,3,A93,A101,...,A124,53,A143,A153,2,A173,2,A191,A201,1


### 1) Pick a numeric feature x to bin (auto-select the first “good” candidate)

In [3]:
# Auto-pick a numeric feature (≠ 'default') with decent cardinality
num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
candidates = [c for c in num_cols if c != "default" and df[c].nunique(dropna=True) >= 5]

if candidates:
    x_col = candidates[0]
else:
    # Fallback: manufacture a simple numeric feature if everything else is categorical
    # (PAVA will still work, but real numeric columns are preferred.)
    x_col = "x_auto"
    df[x_col] = np.arange(len(df), dtype=float)
    print("No suitable numeric column found; using fallback feature:", x_col)

y_col = "default"
x_col, y_col


('Durationinmonth', 'default')

### 2) Fit the binner (mean-only) with constraints

In [4]:
cons = BinningConstraints(
    max_bins=6,       # allow up to 6 bins
    min_bins=2,       # try to keep at least 2 in non-maximize paths
    min_samples=0.05, # per-bin minimum as fraction of clean rows
    initial_pvalue=0.4,
    maximize_bins=True,
)

binner = MonotonicBinner(
    df=df,
    x=x_col,
    y=y_col,
    metric="mean",     # mean-only (median/quantiles: future work)
    sign="auto",       # infer direction from grouped correlation
    strict=True,       # merge equal-mean plateaus in PAVA
    constraints=cons,
    exclude_values=None,
).fit()

print("Resolved sign:", binner.resolved_sign_)


Resolved sign: +


### 3) Inspect PAVA blocks before constraint-driven merges

In [5]:
# Blocks straight from PAVA (monotone by mean), before merge-adjacent()
pre_blocks = binner.pava_blocks_()

pd.DataFrame(
    [{
        "left": b.left, "right": b.right,
        "n": b.n, "sum": b.sum,
        "mean": b.mean, "std": b.std,
        "ymin": b.ymin, "ymax": b.ymax,
    } for b in pre_blocks]
)


Unnamed: 0,left,right,n,sum,mean,std,ymin,ymax
0,4.0,6.0,7,0.0,0.0,0.0,0.0,0.0
1,6.0,8.0,80,9.0,0.1125,0.317974,0.0,1.0
2,8.0,9.0,7,1.0,0.142857,0.377964,0.0,1.0
3,9.0,12.0,86,17.0,0.197674,0.400581,0.0,1.0
4,12.0,16.0,251,62.0,0.247012,0.432135,0.0,1.0
5,16.0,27.0,340,109.0,0.320588,0.467391,0.0,1.0
6,27.0,36.0,59,20.0,0.338983,0.477427,0.0,1.0
7,36.0,45.0,100,42.0,0.42,0.496045,0.0,1.0
8,45.0,72.0,69,39.0,0.565217,0.49936,0.0,1.0
9,72.0,inf,1,1.0,1.0,0.0,1.0,1.0


### 5) Plot the classic “means vs x + step fit” (backward-compatible helper)

In [6]:
from MOBPY.plot.csd_gcm import plot_csd_gcm

plot_csd_gcm(
    groups_df=binner._pava.groups_,
    blocks=binner._pava.export_blocks(as_dict=True),
    x_name=x_col, y_name=y_col,
    savepath=None,
)

  plt.show()


### 6) Plot the Cumulative Sum Diagram (CSD)

In [7]:
from MOBPY.plot.csd_gcm import plot_csd

plot_csd(
    groups_df=binner._pava.groups_,
    savepath=None,
)

  plt.show()


### 7) Plot the Greatest Convex Minorant (GCM) over the CSD

In [8]:
from MOBPY.plot.csd_gcm import plot_gcm

plot_gcm(
    groups_df=binner._pava.groups_,
    pava_blocks=binner._pava.export_blocks(as_dict=True),
    savepath=None,
)

  plt.show()


### 8) Plot the MOB summary (WoE bars + bad-rate line)

In [9]:
from MOBPY.plot.mob_plot import MOBPlot

# Ensure output folder exists (if you ran this cell standalone)
out_dir = Path("docs/demo_plots")
out_dir.mkdir(parents=True, exist_ok=True)

# Ensure `summary` exists
summary = binner.summary_()

# MOB summary plot is only meaningful for binary targets
if {"woe", "iv_grp", "bad_rate"}.issubset(summary.columns):
    MOBPlot.plot_bins_summary(summary, savepath=None)
else:
    print("Target appears non-binary; WoE/IV summary plot is skipped.")


  plt.show()


### 9) (Optional) Animate PAVA’s merge steps as a GIF

In [10]:
# Requires: pip install imageio
from MOBPY.plot.csd_gcm import animate_pava_steps

try:
    animate_pava_steps(
        pava=binner._pava,
        savepath=str(out_dir / "pava_steps.gif"),
        fps=1,
        x_name=x_col, y_name=y_col,
    )
    print("Saved:", out_dir / "pava_steps.gif")
except ImportError as e:
    print("Skipping GIF (imageio not installed):", e)
except RuntimeError as e:
    print("Skipping GIF (no PAVA history):", e)


Skipping GIF (imageio not installed): imageio is required for GIF creation. Install via `pip install imageio`.


### 10) Transform a few values to their assigned bins

In [11]:
sample_vals = df[x_col].head(10)
assigned_labels = binner.transform(sample_vals, assign="interval")
assigned_lefts  = binner.transform(sample_vals, assign="left")
assigned_rights = binner.transform(sample_vals, assign="right")

display(pd.DataFrame({
    x_col: sample_vals.to_numpy(),
    "interval": assigned_labels.to_numpy(),
    "left": assigned_lefts.to_numpy(),
    "right": assigned_rights.to_numpy(),
}))

Unnamed: 0,Durationinmonth,interval,left,right
0,6,"[4, 9)",4.0,9.0
1,48,"[45, inf)",45.0,inf
2,12,"[12, 16)",12.0,16.0
3,42,"[36, 45)",36.0,45.0
4,24,"[16, 36)",16.0,36.0
5,36,"[36, 45)",36.0,45.0
6,24,"[16, 36)",16.0,36.0
7,36,"[36, 45)",36.0,45.0
8,12,"[12, 16)",12.0,16.0
9,30,"[16, 36)",16.0,36.0
