In [None]:
import joblib, os, numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler



In [None]:
df = pd.read_csv("data/accepted_2007_to_2018.csv", nrows=500000)
cols = [
    'loan_amnt','int_rate','term','emp_length','home_ownership','annual_inc',
    'purpose','dti','delinq_2yrs','inq_last_6mths','open_acc','pub_rec',
    'revol_bal','revol_util','total_acc','fico_range_low','fico_range_high',
    'loan_status'
]
df = df[cols]
df['int_rate'] = df['int_rate'].astype(str).str.rstrip('%').astype(float)
df['term'] = df['term'].astype(str).str.extract('(\d+)').astype(float)
def parse_emp(x):
    if pd.isna(x): return np.nan
    x=str(x)
    if x=='10+ years': return 10
    if x=='< 1 year': return 0.5
    try: return float(x.split()[0])
    except: return np.nan
df['emp_length'] = df['emp_length'].apply(parse_emp)
df = df[df['loan_status'].isin(['Fully Paid','Charged Off'])]
df['target'] = df['loan_status'].apply(lambda x: 0 if x=='Fully Paid' else 1)
df = pd.get_dummies(df, columns=['purpose','home_ownership'], drop_first=True)
X = df.drop(columns=['loan_status','target'])
y = df['target']
X_train_df, X_test_df, y_train_series, y_test_series = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
loan_info_test = X_test_df[['loan_amnt','int_rate']].copy()
loan_info_test['loan_status'] = df.loc[X_test_df.index,'loan_status']
loan_amnt_test = loan_info_test['loan_amnt'].values
int_rate_test = loan_info_test['int_rate'].values
status_test = loan_info_test['loan_status'].values
X_train = joblib.load("preprocessed.joblib")[0]
X_test = joblib.load("preprocessed.joblib")[1]
y_train = joblib.load("preprocessed.joblib")[2]
y_test = joblib.load("preprocessed.joblib")[3]


In [None]:
preds_proba = None
try:
    preds_proba = joblib.load("preds_proba_test.joblib")
except:
    try:
        import joblib as jl
        if os.path.exists("models/stacker.joblib"):
            stacker = jl.load("models/stacker.joblib")
            lgbp = jl.load("models/best_lgb.joblib")
            rfp = jl.load("models/best_rf.joblib")
            lrp = jl.load("models/best_lr.joblib")
            p1 = lgbp.predict_proba(X_test)[:,1]
            p2 = rfp.predict_proba(X_test)[:,1]
            p3 = lrp.predict_proba(X_test)[:,1]
            stack_input = np.vstack([p1,p2,p3]).T
            preds_proba = stacker.predict_proba(stack_input)[:,1]
        elif os.path.exists("models/best_lgb.joblib"):
            lgbp = jl.load("models/best_lgb.joblib")
            preds_proba = lgbp.predict_proba(X_test)[:,1]
    except:
        from sklearn.linear_model import LogisticRegression
        lr = LogisticRegression(max_iter=1000, class_weight='balanced')
        lr.fit(X_train, y_train)
        preds_proba = lr.predict_proba(X_test)[:,1]


In [None]:
def compute_reward(action, amnt, rate, status):
    if action==0: return 0.0
    if status=='Fully Paid': return amnt * (rate/100.0)
    return -amnt


In [None]:
def policy_value(actions, amnts, rates, statuses):
    rewards = [compute_reward(a,am,rt,st) for a,am,rt,st in zip(actions,amnts,rates,statuses)]
    return np.mean(rewards)


In [None]:
thresholds = np.linspace(0.01,0.99,99)
values = []
for t in thresholds:
    acts = (preds_proba < t).astype(int)
    values.append(policy_value(acts, loan_amnt_test, int_rate_test, status_test))
best_idx = int(np.argmax(values))
best_threshold = float(thresholds[best_idx])
best_value = float(values[best_idx])
best_threshold, best_value


In [None]:
actions_rl = (preds_proba < best_threshold).astype(int)
value_rl = policy_value(actions_rl, loan_amnt_test, int_rate_test, status_test)
actions_supervised_05 = (preds_proba < 0.5).astype(int)
value_supervised_05 = policy_value(actions_supervised_05, loan_amnt_test, int_rate_test, status_test)
actions_approve_all = np.ones_like(actions_rl)
value_approve_all = policy_value(actions_approve_all, loan_amnt_test, int_rate_test, status_test)
actions_deny_all = np.zeros_like(actions_rl)
value_deny_all = policy_value(actions_deny_all, loan_amnt_test, int_rate_test, status_test)
results = {
    "best_threshold": best_threshold,
    "best_value": value_rl,
    "supervised_0.5_value": value_supervised_05,
    "approve_all_value": value_approve_all,
    "deny_all_value": value_deny_all
}
results


In [None]:
diff_idx = np.where((actions_rl != actions_supervised_05))[0][:20]
rows=[]
for i in diff_idx:
    rows.append({
        "index": int(i),
        "p_default": float(preds_proba[i]),
        "rl_action": int(actions_rl[i]),
        "supervised_action": int(actions_supervised_05[i]),
        "loan_amnt": float(loan_amnt_test[i]),
        "int_rate": float(int_rate_test[i]),
        "status": str(status_test[i]),
        "reward_if_approve": float(compute_reward(1, loan_amnt_test[i], int_rate_test[i], status_test[i]))
    })
pd.DataFrame(rows)


In [None]:
try:
    import d3rlpy
    from d3rlpy.dataset import MDPDataset
    states = X_test.astype('float32')
    actions_hist = (preds_proba < best_threshold).astype('int32')
    rewards = np.array([compute_reward(a,am,rt,st) for a,am,rt,st in zip(actions_hist,loan_amnt_test,int_rate_test,status_test)], dtype='float32')
    terminals = np.zeros(len(rewards), dtype='bool')
    dataset = MDPDataset(states=states, actions=actions_hist, rewards=rewards, terminals=terminals)
    from d3rlpy.algos import CQL
    cql = CQL()
    cql.fit(dataset, n_epochs=20)
    policy = cql.predict(states)
    policy_value_cql = policy_value((policy>=0.5).astype(int), loan_amnt_test, int_rate_test, status_test)
    policy_value_cql
except:
    None


In [None]:
os.makedirs("rl_outputs", exist_ok=True)
joblib.dump({"preds_proba": preds_proba, "best_threshold": best_threshold, "best_value": best_value, "results": results}, "rl_outputs/rl_policy.joblib")
