In [5]:
import joblib
import numpy as np
import pandas as pd
import tensorflow as tf
import d3rlpy
from d3rlpy.algos import DiscreteCQLConfig
from d3rlpy.dataset import MDPDataset

print("Libraries loaded.")

Libraries loaded.


In [7]:
# Load preprocessed data
data = joblib.load('artifacts/preprocessed_data.joblib')
X_test = data['X_test']
y_test = data['y_test']
test_data_rl = data['test_data_rl']  # The raw test data
preprocessor = data['preprocessor']

print("✅ Data loaded successfully.")

# Load DL model
dl_model = tf.keras.models.load_model('artifacts/dl_model.keras')
print("✅ Deep Learning model loaded.")

# --- Build the RL model ---
# --- Build the RL model ---
from d3rlpy.dataset import MDPDataset

# Create configuration (discrete version)
cql_config = DiscreteCQLConfig()
cql = cql_config.create(device="cpu")

# Create a dummy dataset for initialization
dummy_observations = np.random.rand(10, X_test.shape[1])
dummy_actions = np.random.randint(0, 2, size=(10,))
dummy_rewards = np.zeros(10)
dummy_terminals = np.zeros(10)
dummy_terminals[-1] = 1  # ✅ mark last sample as terminal (required)

dummy_dataset = MDPDataset(
    observations=dummy_observations,
    actions=dummy_actions,
    rewards=dummy_rewards,
    terminals=dummy_terminals
)

# Build model architecture before loading weights
cql.build_with_dataset(dummy_dataset)
print("✅ RL model architecture built successfully.")

# Load pretrained weights
cql.load_model('artifacts/discrete_cql_model.d3')  # or 'cql_model.d3'
print("✅ RL model weights loaded.")

✅ Data loaded successfully.
✅ Deep Learning model loaded.
2025-10-29 22:09.40 [info     ] Signatures have been automatically determined. action_signature=Signature(dtype=[dtype('int32')], shape=[(1,)]) observation_signature=Signature(dtype=[dtype('float64')], shape=[(37,)]) reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)])
2025-10-29 22:09.40 [info     ] Action-space has been automatically determined. action_space=<ActionSpace.DISCRETE: 2>
2025-10-29 22:09.40 [info     ] Action size has been automatically determined. action_size=2
✅ RL model architecture built successfully.
✅ RL model weights loaded.


In [8]:
# --- 2. Generate Policies from Both Models ---

# Model 1: Deep Learning Policy
# The DL model outputs a probability of default.
# The policy is this probability + a threshold.
# Let's use a standard 0.5 threshold.
y_pred_prob = dl_model.predict(X_test).flatten()
dl_policy_action = (y_pred_prob <= 0.5).astype(int) # 0 = Deny (predicts default), 1 = Approve (predicts paid)

# Correction: The DL model predicts P(default=1).
# So, prob > 0.5 means "predicts default".
# The action should be "Approve" (1) if P(default) is LOW
# and "Deny" (0) if P(default) is HIGH.
# Let's define the policy as: "Approve if predicted default prob < threshold"
THRESHOLD = 0.5 
dl_policy_action = (y_pred_prob < THRESHOLD).astype(int) # 1 = Approve, 0 = Deny

print(f"DL Policy: {np.sum(dl_policy_action == 1)} approvals, {np.sum(dl_policy_action == 0)} denials")

# Model 2: Reinforcement Learning Policy
# The RL agent directly outputs the optimal action (0 or 1).
rl_policy_action = cql.predict(X_test)
print(f"RL Policy: {np.sum(rl_policy_action == 1)} approvals, {np.sum(rl_policy_action == 0)} denials")

559/559 ━━━━━━━━━━━━━━━━━━━━ 28s 51ms/st ━━━━━━━━━━━━━━━━━━━━ 0s 641us/st ━━━━━━━━━━━━━━━━━━━━ 0s 608us/st ━━━━━━━━━━━━━━━━━━━━ 0s 574us/st ━━━━━━━━━━━━━━━━━━━━ 0s 539us/st ━━━━━━━━━━━━━━━━━━━━ 0s 523us/st ━━━━━━━━━━━━━━━━━━━━ 0s 547us/st ━━━━━━━━━━━━━━━━━━━━ 0s 571us/step
DL Policy: 10109 approvals, 7777 denials
RL Policy: 17886 approvals, 0 denials


In [9]:
# --- 3. Find and Analyze Disagreements (Task 4.3) ---
# This is the most important part for your report.

# Add these policies to our raw test dataframe to analyze
analysis_df = test_data_rl.copy()
analysis_df['dl_policy_action'] = dl_policy_action
analysis_df['rl_policy_action'] = rl_policy_action
analysis_df['dl_default_prob'] = y_pred_prob
analysis_df['actual_outcome'] = y_test.values

# Find cases where the policies disagree
disagreements_df = analysis_df[analysis_df['dl_policy_action'] != analysis_df['rl_policy_action']]

print(f"Found {len(disagreements_df)} disagreements out of {len(analysis_df)} test samples.")

# Let's look for the most interesting case:
# DL model says "Deny" (high risk) but RL agent says "Approve"
# dl_policy_action == 0 (meaning default_prob > 0.5)
# rl_policy_action == 1
interesting_cases = disagreements_df[
    (disagreements_df['dl_policy_action'] == 0) & 
    (disagreements_df['rl_policy_action'] == 1)
]

print(f"\nFound {len(interesting_cases)} cases where DL denies but RL approves.")
print("Here are a few examples:")

# Show the key features for these cases
print(interesting_cases[[
    'loan_amnt', 
    'int_rate', 
    'annual_inc', 
    'grade', 
    'dl_default_prob',  # The DL model's risk score
    'dl_policy_action', 
    'rl_policy_action',
    'actual_outcome'    # What actually happened
]].head(10))

Found 7777 disagreements out of 17886 test samples.

Found 7777 cases where DL denies but RL approves.
Here are a few examples:
        loan_amnt  int_rate  annual_inc grade  dl_default_prob  \
76399     15000.0     15.59     90000.0     D         0.643804   
143331    27950.0     18.06     69000.0     D         0.676681   
88699     23000.0     14.64     70000.0     C         0.525788   
50657     40000.0     11.99    220000.0     C         0.511253   
139122    35000.0     10.42     55000.0     B         0.603592   
23490      7850.0     16.55     38980.0     D         0.569693   
114486    14000.0     19.20     64000.0     D         0.725955   
115718     5000.0     17.76     60000.0     D         0.508849   
43680     16000.0     23.32     51000.0     E         0.869769   
9938      20000.0     13.33     48000.0     C         0.573668   

        dl_policy_action  rl_policy_action  actual_outcome  
76399                  0                 1               0  
143331                 