In [1]:
!pip install -q scikit-learn pandas pyarrow joblib

from google.colab import drive
drive.mount('/content/drive')

import os
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import joblib
import warnings
warnings.filterwarnings('ignore')

SEED = 42
np.random.seed(SEED)


Mounted at /content/drive


In [2]:
# EDIT: point this to your processed dataset (parquet or csv) that contains numeric features and a 'target' column
DATA_PATH = '/content/drive/MyDrive/loan-data/processed_sample.parquet'  # change if needed

def load_df(path):
    if path.endswith('.parquet'):
        return pd.read_parquet(path)
    else:
        return pd.read_csv(path, low_memory=False)

df = load_df(DATA_PATH)
print("Loaded rows,cols:", df.shape)
df.head(3)


Loaded rows,cols: (113034, 15)


Unnamed: 0,loan_amnt,int_rate,annual_inc,dti,fico_range_low,fico_range_high,term_ 36 months,term_ 60 months,home_ownership_ANY,home_ownership_MORTGAGE,home_ownership_NONE,home_ownership_OTHER,home_ownership_OWN,home_ownership_RENT,target
0,-0.114653,0.525858,0.224567,-0.800913,-1.165317,-1.165286,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0
1,-0.54916,-0.640744,0.422585,-0.599756,-1.165317,-1.165286,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0
2,-1.092292,-0.080277,-0.369488,1.123446,-0.410514,-0.410508,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0


In [3]:
# Required: numeric features and 'target' column (0 = fully paid, 1 = defaulted)
if 'target' not in df.columns:
    raise ValueError("No 'target' column found. Create binary target: 0 fully paid, 1 defaulted.")

# Choose state features. These should be numeric. Edit the list to match your processed df.
# If your pipeline created one-hot columns, include them here. Keep this list focused to avoid overfitting.
SUGGESTED = [c for c in [
    'loan_amnt','int_rate','annual_inc','dti','fico_range_low','fico_range_high','term'
] if c in df.columns]

# Add any one-hot columns automatically (optional)
# We'll include columns that are numeric and not 'target'
auto_add = [c for c in df.columns if c not in SUGGESTED + ['target'] and np.issubdtype(df[c].dtype, np.number)]
# limit extra columns to avoid massive models; pick up to 30
auto_add = auto_add[:30]
STATE_FEATS = SUGGESTED + auto_add

print("Using features:", STATE_FEATS)
X_state = df[STATE_FEATS].fillna(0).astype(float).copy()
y_target = df['target'].astype(int).copy()
print("State matrix shape:", X_state.shape)


Using features: ['loan_amnt', 'int_rate', 'annual_inc', 'dti', 'fico_range_low', 'fico_range_high', 'term_ 36 months', 'term_ 60 months', 'home_ownership_ANY', 'home_ownership_MORTGAGE', 'home_ownership_NONE', 'home_ownership_OTHER', 'home_ownership_OWN', 'home_ownership_RENT']
State matrix shape: (113034, 14)


In [4]:
# Reward rules:
# - action == 0 (Deny): reward = 0
# - action == 1 (Approve) AND loan fully paid (target==0): reward = + (loan_amnt * (int_rate/100))
# - action == 1 (Approve) AND defaulted (target==1): reward = - loan_amnt

loan_amt_col = None
int_rate_col = None
for c in ['loan_amnt','loan_amount','funded_amnt']:
    if c in df.columns:
        loan_amt_col = c
        break
for c in ['int_rate','interest_rate']:
    if c in df.columns:
        int_rate_col = c
        break

if loan_amt_col is None or int_rate_col is None:
    raise ValueError("Dataset must have loan amount and interest rate columns (loan_amnt and int_rate).")

# Build Q dataset: for each state, include two rows: (s, a=0) and (s, a=1)
states = X_state.values.astype(float)
loan_amts = df[loan_amt_col].astype(float).values
int_rates = df[int_rate_col].astype(float).values / 100.0  # convert percent to fraction
targets = y_target.values

n = states.shape[0]
# Create arrays for two actions
S_list = np.vstack([states, states])                 # shape (2n, d)
A_list = np.concatenate([np.zeros(n), np.ones(n)])  # shape (2n,)
# Compute rewards for approve (1) and deny (0)
R_deny = np.zeros(n)
R_approve = np.where(targets == 0, loan_amts * int_rates, -loan_amts)

R_list = np.concatenate([R_deny, R_approve])       # shape (2n,)

# Create next_obs as zeros and terminals True (one-step episodes)
# We'll not use next_obs for this simple Q-fitting approach.
print("Built Q dataset shapes:", S_list.shape, A_list.shape, R_list.shape)


Built Q dataset shapes: (226068, 14) (226068,) (226068,)


In [5]:
# Concatenate action as an extra column to the state features
A_col = A_list.reshape(-1,1)
X_q = np.hstack([S_list, A_col])
y_q = R_list.astype(float)

print("X_q shape:", X_q.shape, "y_q shape:", y_q.shape)

# Train/test split for Q model (random). We'll keep a holdout to check regression fit.
Xq_train, Xq_test, yq_train, yq_test = train_test_split(X_q, y_q, test_size=0.2, random_state=SEED)


X_q shape: (226068, 15) y_q shape: (226068,)


In [6]:
# Simple, robust regressor. You can swap for GradientBoosting or NN later.
q_model = Pipeline([
    ('scaler', StandardScaler()),    # helps if you swap to linear/NN later
    ('rf', RandomForestRegressor(n_estimators=200, random_state=SEED, n_jobs=-1))
])

print("Training Q model...")
q_model.fit(Xq_train, yq_train)
print("Done. Train R^2:", q_model.score(Xq_train, yq_train), " Test R^2:", q_model.score(Xq_test, yq_test))

# Save model
os.makedirs('/content/drive/MyDrive/loan-data/models', exist_ok=True)
joblib.dump(q_model, '/content/drive/MyDrive/loan-data/models/q_model_rf.joblib')
print("Saved Q model to Drive.")


Training Q model...
Done. Train R^2: 0.878196797702235  Test R^2: 0.11874719496892683
Saved Q model to Drive.


In [7]:
# For each original state, compute Q_hat(s,0) and Q_hat(s,1)
n = states.shape[0]
X0 = np.hstack([states, np.zeros((n,1))])
X1 = np.hstack([states, np.ones((n,1))])

q0 = q_model.predict(X0)
q1 = q_model.predict(X1)

# Policy picks action with larger predicted Q
policy_actions = (q1 > q0).astype(int)  # 1 if approve chosen, 0 if deny

# Sanity counts
print("Policy chooses approve for", int(policy_actions.sum()), "out of", n, "applicants")


Policy chooses approve for 67903 out of 113034 applicants


In [8]:
# Compute reward when using policy: for each row, reward = reward_if_action(policy_action)
# Use same reward function logic as earlier
policy_rewards = np.where(policy_actions == 0, 0, np.where(targets == 0, loan_amts * int_rates, -loan_amts))
estimated_policy_value = policy_rewards.mean()
estimated_policy_value_sum = policy_rewards.sum()  # total reward across dataset
print("Estimated Policy Value (mean reward per applicant):", float(estimated_policy_value))
print("Estimated Policy Value (total over dataset):", float(estimated_policy_value_sum))

# Compare with simple baselines:
# - Always deny => mean reward = 0
# - Always approve => mean reward = mean(R_approve)
always_approve_mean = R_approve.mean()
print("Baseline: Always deny mean reward = 0")
print("Baseline: Always approve mean reward = {:.4f}".format(float(always_approve_mean)))


Estimated Policy Value (mean reward per applicant): 0.0429472986707429
Estimated Policy Value (total over dataset): 4854.504957948753
Baseline: Always deny mean reward = 0
Baseline: Always approve mean reward = -0.0072


In [9]:
out = df.copy().reset_index(drop=True)
out['policy_action'] = policy_actions
out['q0'] = q0
out['q1'] = q1
out['policy_reward'] = policy_rewards
out.to_parquet('/content/drive/MyDrive/loan-data/policy_results.parquet', index=False)
out.to_csv('/content/drive/MyDrive/loan-data/policy_results.csv', index=False)
print("Saved policy results to Drive: policy_results.parquet / .csv")


Saved policy results to Drive: policy_results.parquet / .csv


In [10]:
mask = (policy_actions == 0) & (R_approve > 0)
print("Denied but approving would have profited:", mask.sum())
out[mask].head(6)


Denied but approving would have profited: 18112


Unnamed: 0,loan_amnt,int_rate,annual_inc,dti,fico_range_low,fico_range_high,term_ 36 months,term_ 60 months,home_ownership_ANY,home_ownership_MORTGAGE,home_ownership_NONE,home_ownership_OTHER,home_ownership_OWN,home_ownership_RENT,target,policy_action,q0,q1,policy_reward
6,0.537106,1.64264,3.062829,-0.999257,0.94813,0.948091,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0,0,0.0,-0.068984,0.0
18,0.189501,0.720983,-0.151668,1.077025,-0.259554,-0.259553,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0.0,-0.023449,0.0
33,0.224805,3.095704,-0.435494,-0.093345,-1.165317,-1.165286,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0.0,-0.093985,0.0
34,0.645732,0.720983,0.092554,0.968709,0.193328,0.193314,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0.0,-0.051136,0.0
38,0.439342,0.291292,-0.395891,-0.162977,-1.014357,-1.01433,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0.0,-0.024859,0.0
42,2.166504,0.021437,1.33611,0.605782,0.193328,0.193314,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0,0,0.0,-0.040056,0.0
