In [1]:
import pandas as pd
pd.__version__


'2.3.3'

In [4]:
import sys
print("python executable:", sys.executable)


python executable: /Users/anandghugare/Shodh.AI_Project/venv/bin/python3


In [1]:
import pandas as pd

df = pd.read_csv('data/accepted_2007_to_2018.csv', low_memory=False)
print("rows, cols:", df.shape)
df.head(3)


rows, cols: (2260701, 151)


Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,...,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
0,68407277,,3600.0,3600.0,3600.0,36 months,13.99,123.03,C,C4,...,,,Cash,N,,,,,,
1,68355089,,24700.0,24700.0,24700.0,36 months,11.99,820.28,C,C1,...,,,Cash,N,,,,,,
2,68341763,,20000.0,20000.0,20000.0,60 months,10.78,432.66,B,B4,...,,,Cash,N,,,,,,


In [2]:
# Select only rows with a final outcome: Fully Paid or Charged Off/Default
default_statuses = ['Charged Off', 'Default']

clean_df = df[df['loan_status'].isin(['Fully Paid'] + default_statuses)].copy()

# Create binary target: 0 = Fully Paid, 1 = Default (Charged Off)
clean_df['target'] = clean_df['loan_status'].apply(lambda x: 1 if x in default_statuses else 0)

# Select 4 simple numeric features (easy to start with)
features = ['loan_amnt', 'int_rate', 'annual_inc', 'dti']

# Keep only the selected columns and drop missing rows
clean_df = clean_df[features + ['target']].dropna().reset_index(drop=True)

print("Usable rows:", len(clean_df))
clean_df.head()


Usable rows: 1344976


Unnamed: 0,loan_amnt,int_rate,annual_inc,dti,target
0,3600.0,13.99,55000.0,5.91,0
1,24700.0,11.99,65000.0,16.06,0
2,20000.0,10.78,63000.0,10.78,0
3,10400.0,22.45,104433.0,25.37,0
4,11950.0,13.44,34000.0,10.2,0


In [3]:
from sklearn.model_selection import train_test_split

X = clean_df[features]
y = clean_df['target']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print("Train rows:", len(X_train))
print("Test rows:", len(X_test))

Train rows: 1075980
Test rows: 268996


In [4]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', LogisticRegression(max_iter=2000))
])

pipeline.fit(X_train, y_train)

print("Model trained.")

Model trained.


In [5]:
from sklearn.metrics import roc_auc_score, f1_score

# Predicted probabilities (for AUC)
y_proba = pipeline.predict_proba(X_test)[:, 1]

# Predicted classes (for F1)
y_pred = pipeline.predict(X_test)

print("AUC:", roc_auc_score(y_test, y_proba))
print("F1 Score:", f1_score(y_test, y_pred))

AUC: 0.6908231179878868
F1 Score: 0.10749012710317592


In [6]:
import joblib
joblib.dump(pipeline, "supervised_pipeline.joblib")

print("Saved model as supervised_pipeline.joblib")

Saved model as supervised_pipeline.joblib


In [7]:
# Cell 1: create reward column
import numpy as np

def compute_reward(row):
    # target: 0 = Fully Paid, 1 = Defaulted (Charged Off)
    if row['target'] == 0:
        return row['loan_amnt'] * (row['int_rate'] / 100.0)   # interest profit
    else:
        return - row['loan_amnt']  # loss of principal

# apply to clean_df and show summary
clean_df['reward'] = clean_df.apply(compute_reward, axis=1)
print("Reward summary (min, mean, max):", clean_df['reward'].min(), clean_df['reward'].mean(), clean_df['reward'].max())
clean_df[['loan_amnt','int_rate','target','reward']].head()

Reward summary (min, mean, max): -40000.0 -1645.1146255156227 12396.0


Unnamed: 0,loan_amnt,int_rate,target,reward
0,3600.0,13.99,0,503.64
1,24700.0,11.99,0,2961.53
2,20000.0,10.78,0,2156.0
3,10400.0,22.45,0,2334.8
4,11950.0,13.44,0,1606.08


In [8]:
# Cell 2: compute p_default and expected profit, create profit baseline action
import joblib
import numpy as np

# load supervised pipeline if not in memory (if you already have pipeline, this is harmless)
pipeline = joblib.load("supervised_pipeline.joblib")

# features matrix for all examples
X_all = clean_df[features]

# predicted default probability from your classifier
p_default = pipeline.predict_proba(X_all)[:, 1]
clean_df['p_default'] = p_default

# expected profit if we approve (approx using plug-in p_default)
loan_amnt = clean_df['loan_amnt'].values
int_rate = clean_df['int_rate'].values / 100.0
expected_profit_if_approve = loan_amnt * int_rate * (1 - p_default) - loan_amnt * p_default
clean_df['expected_profit_if_approve'] = expected_profit_if_approve

# profit-based baseline decision: approve if expected_profit_if_approve > 0
clean_df['profit_baseline_action'] = (clean_df['expected_profit_if_approve'] > 0).astype(int)

# quick stats
approval_rate = clean_df['profit_baseline_action'].mean()
avg_expected_profit = (clean_df['profit_baseline_action'] * clean_df['expected_profit_if_approve']).mean()
print("Profit baseline approval rate:", approval_rate)
print("Average expected profit per applicant under baseline (direct mean):", avg_expected_profit)

# show a few examples
clean_df[['loan_amnt','int_rate','p_default','expected_profit_if_approve','profit_baseline_action']].sample(5)

Profit baseline approval rate: 0.017421128704155315
Average expected profit per applicant under baseline (direct mean): 6.601445022912803


Unnamed: 0,loan_amnt,int_rate,p_default,expected_profit_if_approve,profit_baseline_action
296075,7200.0,14.65,0.237512,-905.814087,0
211271,3000.0,16.99,0.237931,-325.366102,0
1162499,14400.0,20.49,0.367273,-3421.832859,0
1166317,6500.0,7.9,0.077984,-33.443482,0
1155119,7800.0,15.8,0.200374,-577.460932,0


In [9]:
# Cell 3: Direct Method OPE with reward regressor
from sklearn.ensemble import RandomForestRegressor
import numpy as np

# Train a reward model r_hat(s) to predict actual reward
r_model = RandomForestRegressor(
    n_estimators=100,
    max_depth=12,
    n_jobs=-1,
    random_state=42
)
r_model.fit(X_all, clean_df['reward'].values)

# Predict reward if approve for every sample
r_hat_approve = r_model.predict(X_all)

# helper: DM estimated policy value
def estimate_policy_value_dm(pi_approve_prob):
    return (pi_approve_prob * r_hat_approve).mean()

# supervised (approve if p_default < 0.5)
pi_supervised = (clean_df['p_default'] < 0.5).astype(float).values

# profit policy
pi_profit = clean_df['profit_baseline_action'].astype(float).values

value_supervised = estimate_policy_value_dm(pi_supervised)
value_profit = estimate_policy_value_dm(pi_profit)

print("DM: estimated expected reward per applicant")
print("Supervised (threshold 0.5):", value_supervised)
print("Profit-based policy:", value_profit)

DM: estimated expected reward per applicant
Supervised (threshold 0.5): -1460.3806348536928
Profit-based policy: -4.2982352196135425


In [10]:
# Cell 4: compare actions and show disagreement examples

clean_df['supervised_action'] = (clean_df['p_default'] < 0.5).astype(int)
clean_df['profit_action'] = clean_df['profit_baseline_action']

mask = clean_df['supervised_action'] != clean_df['profit_action']
num_disagree = mask.sum()

print("Number of disagreements between supervised and profit baseline:", num_disagree)

examples = clean_df[mask].sort_values('loan_amnt', ascending=False).head(10)
examples[['loan_amnt','int_rate','p_default','expected_profit_if_approve','supervised_action','profit_action']]

Number of disagreements between supervised and profit baseline: 1288906


Unnamed: 0,loan_amnt,int_rate,p_default,expected_profit_if_approve,supervised_action,profit_action
572928,40000.0,15.31,0.246183,-5230.94282,1,0
382976,40000.0,9.93,0.165891,-3322.576893,1,0
383012,40000.0,6.08,0.116745,-2521.705325,1,0
511726,40000.0,7.21,0.142037,-3207.109338,1,0
401770,40000.0,15.05,0.312929,-8380.996049,1,0
511735,40000.0,12.73,0.200377,-3943.421896,1,0
1206671,40000.0,13.99,0.123954,-55.799984,1,0
449022,40000.0,9.75,0.139695,-2232.605356,1,0
1206666,40000.0,8.99,0.143606,-2664.656793,1,0
1300296,40000.0,12.74,0.182324,-3126.077889,1,0


In [11]:
# Cell 5: Prepare offline RL dataset for d3rlpy (one-step MDP)

import numpy as np

states = clean_df[features].values.astype(np.float32)
actions = np.ones(len(clean_df), dtype=np.int64)  # all approved
rewards = clean_df['reward'].values.astype(np.float32)

# next_states and terminal: one-step environment
next_states = np.zeros_like(states)  # unused but required
terminals = np.ones(len(clean_df), dtype=np.float32)  # episode ends immediately

# summary
print("states:", states.shape)
print("actions:", actions.shape)
print("rewards:", rewards.shape)
print("next_states:", next_states.shape)
print("terminals:", terminals.shape)

states: (1344976, 4)
actions: (1344976,)
rewards: (1344976,)
next_states: (1344976, 4)
terminals: (1344976,)


In [14]:
# Fixed Cell A (version 2): create d3rlpy dataset and a prototyping sample (robust prints)
import numpy as np
from d3rlpy.dataset import MDPDataset

# ensure arrays are correct dtypes and shapes (we set these earlier)
states = states.astype(np.float32)         # shape (N, D)
actions = actions.astype(np.int64)         # shape (N,)
rewards = rewards.astype(np.float32)       # shape (N,)
terminals = terminals.astype(np.float32)   # shape (N,)

# Build the MDPDataset properly (observations, actions, rewards, terminals)
dataset = MDPDataset(states, actions, rewards, terminals)

# Instead of using dataset.observations (may not exist), print from arrays directly
print("Full dataset prepared. N (from states.shape[0]) =", states.shape[0], ", obs dim =", states.shape[1])
print("Actions shape:", actions.shape, "Rewards shape:", rewards.shape, "Terminals shape:", terminals.shape)

# create a prototyping sample (200k rows) to run faster on laptop
sample_n = 200_000
if states.shape[0] > sample_n:
    rng = np.random.RandomState(42)
    idx = rng.choice(states.shape[0], size=sample_n, replace=False)
    states_s = states[idx]
    actions_s = actions[idx]
    rewards_s = rewards[idx]
    terminals_s = terminals[idx]
    dataset_sample = MDPDataset(states_s, actions_s, rewards_s, terminals_s)
    print("Sample dataset prepared. N_sample =", states_s.shape[0])
else:
    dataset_sample = dataset
    idx = None
    print("Dataset small enough; sample = full dataset.")


[2m2025-12-11 17:22.27[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('int64')], shape=[(1,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(4,)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(1,)])[0m
[2m2025-12-11 17:22.27[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.DISCRETE: 2>[0m
[2m2025-12-11 17:22.29[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m2[0m
Full dataset prepared. N (from states.shape[0]) = 1344976 , obs dim = 4
Actions shape: (1344976,) Rewards shape: (1344976,) Terminals shape: (1344976,)
[2m2025-12-11 17:22.30[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('int64')], sha

In [20]:
# Fallback BC with sklearn that imitates the profit baseline policy
from sklearn.linear_model import LogisticRegression

# sample training data (100k rows) for faster training
train_sample = clean_df.sample(n=100000, random_state=42)

X_train_bc = train_sample[features]
y_train_bc = train_sample['profit_baseline_action']  # imitate the profit-driven policy

# Train a simple classifier that tries to learn when to approve/deny
clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_bc, y_train_bc)

print("Fallback BC (sklearn) training finished.")


Fallback BC (sklearn) training finished.


In [21]:
# Evaluate fallback BC using Direct Method (DM)
import numpy as np

try:
    eval_idx = idx   # from dataset_sample creation
    X_eval = clean_df[features].iloc[eval_idx]
    r_hat_eval = r_hat_approve[eval_idx]
except NameError:
    # fallback: use the first N rows of the full dataset
    N_eval = 200000
    X_eval = clean_df[features].iloc[:N_eval]
    r_hat_eval = r_hat_approve[:N_eval]

# BC decisions
pred_actions = clf.predict(X_eval).astype(float)

print("Fallback BC acceptance rate:", pred_actions.mean())
print("Fallback BC DM value:", (pred_actions * r_hat_eval).mean())


Fallback BC acceptance rate: 0.01597
Fallback BC DM value: -3.984822043641007


In [22]:
# Compare supervised and profit baseline on same eval sample
supervised_action_eval = (clean_df['p_default'].iloc[:len(X_eval)] < 0.5).astype(float)
profit_action_eval = clean_df['profit_baseline_action'].iloc[:len(X_eval)].astype(float)

value_supervised_eval = (supervised_action_eval * r_hat_eval).mean()
value_profit_eval = (profit_action_eval * r_hat_eval).mean()

print("DM - Supervised (0.5):", value_supervised_eval)
print("DM - Profit baseline:", value_profit_eval)

DM - Supervised (0.5): -1629.1599242890122
DM - Profit baseline: -30.141581412118335


In [23]:
# Save models and disagreement examples
import joblib

# save pipeline and sklearn BC
joblib.dump(pipeline, "supervised_pipeline.joblib")   # already saved earlier
joblib.dump(clf, "bc_fallback_clf.joblib")

# export top disagreement examples to CSV for the report
mask = clean_df['supervised_action'] != clean_df['profit_action']
examples = clean_df[mask].sort_values('loan_amnt', ascending=False).head(200)  # top 200 disagreements
examples.to_csv("disagreement_examples_top200.csv", index=False)
print("Saved bc_fallback_clf.joblib and disagreement_examples_top200.csv")


Saved bc_fallback_clf.joblib and disagreement_examples_top200.csv


In [24]:
# Bootstrap CI for DM estimates (uses r_hat_approve and eval indices)
import numpy as np

def bootstrap_dm_estimate(pi_prob, r_hat, n_boot=200, seed=0):
    rng = np.random.RandomState(seed)
    n = len(r_hat)
    vals = []
    for _ in range(n_boot):
        idx_bs = rng.randint(0, n, size=n)
        vals.append((pi_prob[idx_bs] * r_hat[idx_bs]).mean())
    arr = np.array(vals)
    return arr.mean(), np.percentile(arr, 2.5), np.percentile(arr, 97.5)

# choose eval set (same as you used)
try:
    eval_idx = idx
    r_hat_eval = r_hat_approve[eval_idx]
    pi_profit_eval = clean_df['profit_baseline_action'].values[eval_idx]
    pi_supervised_eval = (clean_df['p_default'].values[eval_idx] < 0.5).astype(float)
    pi_bc_eval = clf.predict(clean_df[features].iloc[eval_idx]).astype(float)
except NameError:
    N_eval = len(r_hat_approve)
    r_hat_eval = r_hat_approve[:N_eval]
    pi_profit_eval = clean_df['profit_baseline_action'].values[:N_eval]
    pi_supervised_eval = (clean_df['p_default'].values[:N_eval] < 0.5).astype(float)
    pi_bc_eval = clf.predict(clean_df[features].iloc[:N_eval]).astype(float)

print("Bootstrap DM - profit baseline:", bootstrap_dm_estimate(pi_profit_eval, r_hat_eval))
print("Bootstrap DM - supervised:", bootstrap_dm_estimate(pi_supervised_eval, r_hat_eval))
print("Bootstrap DM - BC fallback:", bootstrap_dm_estimate(pi_bc_eval, r_hat_eval))


Bootstrap DM - profit baseline: (np.float64(-4.290175151069416), np.float64(-4.955790551868244), np.float64(-3.739426404399698))
Bootstrap DM - supervised: (np.float64(-1461.8135234674812), np.float64(-1470.1151431190267), np.float64(-1453.477254292464))
Bootstrap DM - BC fallback: (np.float64(-4.016478307397215), np.float64(-4.6433883535336244), np.float64(-3.3766996225430597))


In [25]:
# Cell 1 — prepare PyTorch and data
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, f1_score
import joblib

# device (use GPU if available, otherwise CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

# Use the same feature list used earlier
# features = ['loan_amnt','int_rate','annual_inc','dti']  # should already exist

# Fit scaler on training data and transform both train and test
scaler = StandardScaler()
X_train_np = X_train.values.astype(np.float32)
X_test_np = X_test.values.astype(np.float32)
scaler.fit(X_train_np)
X_train_s = scaler.transform(X_train_np)
X_test_s = scaler.transform(X_test_np)

y_train_np = y_train.values.astype(np.float32)
y_test_np = y_test.values.astype(np.float32)

# Save scaler for later use
joblib.dump(scaler, "scaler_for_mlp.joblib")

# PyTorch Dataset wrapper
class TabularDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y).unsqueeze(1)  # shape (N,1)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = TabularDataset(X_train_s, y_train_np)
test_dataset  = TabularDataset(X_test_s, y_test_np)

print("Train rows:", len(train_dataset), "Test rows:", len(test_dataset))


Using device: cpu
Train rows: 1075980 Test rows: 268996


In [26]:
# Cell 2 — define model, dataloaders, optimizer
import torch.nn as nn
from torch.utils.data import DataLoader

batch_size = 1024  # reduce to 512 if you see memory/slow issues

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
test_loader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False)

class MLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.Linear(32, 1)   # single logit output
        )
    def forward(self, x):
        return self.net(x)

input_dim = X_train_s.shape[1]
model = MLP(input_dim).to(device)

criterion = nn.BCEWithLogitsLoss()   # expects raw logits
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

print("Model parameters:", sum(p.numel() for p in model.parameters()))


Model parameters: 2625


In [27]:
# Cell 3 — training loop
n_epochs = 8   # reduce to 3 for a faster quick run if you prefer
model.train()
for epoch in range(1, n_epochs+1):
    epoch_loss = 0.0
    for Xb, yb in train_loader:
        Xb = Xb.to(device)
        yb = yb.to(device)
        optimizer.zero_grad()
        logits = model(Xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * Xb.size(0)
    epoch_loss /= len(train_loader.dataset)
    print(f"Epoch {epoch:02d}/{n_epochs} — loss: {epoch_loss:.6f}")

# save model weights
import torch
torch.save(model.state_dict(), "mlp_model.pt")
print("Training finished and model saved to mlp_model.pt")


Epoch 01/8 — loss: 0.475654
Epoch 02/8 — loss: 0.460996
Epoch 03/8 — loss: 0.460830
Epoch 04/8 — loss: 0.460659
Epoch 05/8 — loss: 0.460547
Epoch 06/8 — loss: 0.460473
Epoch 07/8 — loss: 0.460419
Epoch 08/8 — loss: 0.460414
Training finished and model saved to mlp_model.pt


In [28]:
# Cell 4 — evaluation: AUC & F1
model.eval()
all_probs = []
all_preds = []
all_targets = []

with torch.no_grad():
    for Xb, yb in test_loader:
        Xb = Xb.to(device)
        logits = model(Xb)
        probs = torch.sigmoid(logits).cpu().numpy().flatten()
        preds = (probs >= 0.5).astype(int)
        all_probs.append(probs)
        all_preds.append(preds)
        all_targets.append(yb.numpy().flatten())

import numpy as np
from sklearn.metrics import roc_auc_score, f1_score

y_proba = np.concatenate(all_probs)
y_pred  = np.concatenate(all_preds)
y_true  = np.concatenate(all_targets)

auc = roc_auc_score(y_true, y_proba)
f1 = f1_score(y_true, y_pred)

print(f"AUC: {auc:.6f}")
print(f"F1 Score: {f1:.6f}")

# Save full model and scaler artifacts
import joblib, torch
torch.save({'model_state_dict': model.state_dict()}, "mlp_model_full.pt")
joblib.dump({"scaler": scaler}, "mlp_artifacts.joblib")
print("Saved mlp_model_full.pt and mlp_artifacts.joblib")


AUC: 0.695755
F1 Score: 0.055261
Saved mlp_model_full.pt and mlp_artifacts.joblib
