# OpenFinGuard — Feature Refinement & Signal Audit

Week: 5  
Day: 1  
Objective:
- Improve feature signal quality
- Preserve explainability
- Avoid leakage


Load modeling data

In [1]:
import pandas as pd

modeling = pd.read_csv(
    "../data/processed/modeling_table.csv",
    parse_dates=["cycle_end_date"]
)


In [2]:
cutoff_date = modeling["cycle_end_date"].quantile(0.75)

train = modeling[modeling["cycle_end_date"] <= cutoff_date]
valid = modeling[modeling["cycle_end_date"] > cutoff_date]


Feature-by-feature signal check

In [3]:
features = [
    "credit_utilization",
    "min_due_ratio",
    "monthly_spend",
    "spend_volatility",
    "util_roll_3",
    "spend_roll_3",
    "spend_shock"
]

signal_summary = (
    train.groupby("late_30")[features]
    .mean()
    .T
)

signal_summary


late_30,0,1
credit_utilization,0.17038,0.178436
min_due_ratio,0.204548,0.168455
monthly_spend,25938.211382,25359.259259
spend_volatility,10571.607525,10865.287402
util_roll_3,0.164947,0.170129
spend_roll_3,26407.723577,27881.481481
spend_shock,0.081301,0.037037


Create ratio & interaction features (high value)
4.1 Payment stress ratio
High utilization + high minimum due = stress

In [4]:
train["payment_stress"] = train["min_due_ratio"] * train["credit_utilization"]
valid["payment_stress"] = valid["min_due_ratio"] * valid["credit_utilization"]


4.2 Spend-to-limit ratio

In [5]:
train["spend_to_limit"] = train["monthly_spend"] / (train["credit_utilization"] + 1e-6)
valid["spend_to_limit"] = valid["monthly_spend"] / (valid["credit_utilization"] + 1e-6)


Evaluate new feature signals

In [6]:
train.groupby("late_30")[["payment_stress", "spend_to_limit"]].mean()


Unnamed: 0_level_0,payment_stress,spend_to_limit
late_30,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.019184,305305.727394
1,0.018252,437047.186215


LASS IMBALANCE (BUSINESS-AWARE, NOT HACKY)

In [7]:
# Payment stress = utilization × minimum due pressure
modeling["payment_stress"] = (
    modeling["credit_utilization"] * modeling["min_due_ratio"]
)

# Spend relative to limit (guard against divide by zero)
modeling["spend_to_limit"] = (
    modeling["monthly_spend"] / (modeling["credit_utilization"] + 1e-6)
)


In [8]:
import os
os.makedirs("../data/processed", exist_ok=True)

modeling.to_csv(
    "../data/processed/modeling_table.csv",
    index=False
)
