In [10]:
"""
Task 4: Analysis, Comparison, and Future Steps

Loads the pre-trained MLP model (Task 2) and CQL agent (Task 3).
Loads the preprocessed test data (Task 1).
Compares the models on two fronts:
1.  Predictive Performance (MLP): AUC, F1-Score.
2.  Decision-Making Value (CQL): Estimated Policy Value ($) vs. baselines.

Prints a final report comparing the approaches.
"""

'\nTask 4: Analysis, Comparison, and Future Steps\n\nLoads the pre-trained MLP model (Task 2) and CQL agent (Task 3).\nLoads the preprocessed test data (Task 1).\nCompares the models on two fronts:\n1.  Predictive Performance (MLP): AUC, F1-Score.\n2.  Decision-Making Value (CQL): Estimated Policy Value ($) vs. baselines.\n\nPrints a final report comparing the approaches.\n'

In [11]:
!pip install d3rlpy



In [12]:
import numpy as np
import pandas as pd
import d3rlpy
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import roc_auc_score, f1_score, classification_report
import joblib
import os
import warnings

warnings.filterwarnings('ignore')

# --- 0. Configuration ---
RANDOM_SEED = 42
DATA_DIR = 'data'
MODEL_DIR = 'models'

# Set Random Seed
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

In [13]:
def main():

    # --- Load Data and Models ---
    print("Step 1: Loading all data and trained models...")
    try:
        # Load test data
        X_test_final = joblib.load(os.path.join(DATA_DIR, 'X_test_final.pkl'))
        y_test = joblib.load(os.path.join(DATA_DIR, 'y_test.pkl'))

        # Load data for reward calculation
        df_model = joblib.load(os.path.join(DATA_DIR, 'df_model_for_rewards.pkl'))
        test_indices = joblib.load(os.path.join(DATA_DIR, 'test_indices.pkl'))

        # Load data signature for building CQL
        X_train_final = joblib.load(os.path.join(DATA_DIR, 'X_train_final.pkl'))

        # Load trained MLP model
        model_mlp = keras.models.load_model(os.path.join(MODEL_DIR, 'mlp_model.keras'))

        # Load MLP predictions
        y_pred_probs_mlp = np.load(os.path.join(MODEL_DIR, 'mlp_test_pred_probs.npy'))

    except FileNotFoundError as e:
        print(f"Error loading file: {e}")
        print("Please ensure Tasks 1 and 2 have been run successfully.")
        return

    print("Data and MLP model loaded successfully.")

    # --- 2. Load CQL Agent (Robust Method) ---
    print("Loading CQL agent using robust method...")
    try:

      dummy_obs = X_train_final.to_numpy()[:2]

      if len(dummy_obs) < 2:
          dummy_obs = np.vstack([dummy_obs, dummy_obs])

      dummy_actions = np.array([0, 1], dtype=np.int32)

      dataset = d3rlpy.dataset.MDPDataset(
          observations=dummy_obs,
          actions=dummy_actions,
          rewards=np.zeros(2),
          terminals=np.ones(2)
      )

      # 1. Re-instantiate the same config as in Task 3
      cql_config = d3rlpy.algos.DiscreteCQLConfig(
          batch_size=128,
          learning_rate=6.25e-5,
          alpha=1.0,
      )
      cql_agent = cql_config.create(device=False)

      # 2. Build the agent with the *corrected* dataset signature
      cql_agent.build_with_dataset(dataset)

      # 3. Load the saved model weights
      model_path = os.path.join(MODEL_DIR, 'cql_agent.d3')
      cql_agent.load_model(model_path)

      print(f"CQL agent weights loaded manually from {model_path}")

    except FileNotFoundError as e:
      print(f"Error loading CQL model: {e}")
      print(f"Please ensure `task_3_cql.py` has run and {model_path} exists.")
      return
    except Exception as e:
      print(f"An unexpected error occurred while loading CQL model: {e}")
      return


    # Convert test data to NumPy
    observations_test = X_test_final.to_numpy()
    y_test_np = y_test.to_numpy()

    # --- Evaluate Model 1 (MLP) ---
    print("\n--- 📊 Model 1: Supervised DL (MLP) Evaluation ---")
    print("Objective: Predict default probability.")

    y_pred_classes_mlp = (y_pred_probs_mlp > 0.5).astype(int)
    auc_mlp = roc_auc_score(y_test_np, y_pred_probs_mlp)
    f1_mlp = f1_score(y_test_np, y_pred_classes_mlp)

    print(f"  > Test Set AUC: {auc_mlp:.4f}")
    print(f"  > Test Set F1-Score (threshold 0.5): {f1_mlp:.4f}")
    print("\n  Classification Report (threshold 0.5):")
    print(classification_report(y_test_np, y_pred_classes_mlp, target_names=['  Fully Paid (0)', '  Defaulted (1)']))
    print("  MLP Conclusion: The model provides a probabilistic risk score (AUC).")
    print("  Its F1-score shows its ability to balance precision/recall for default detection.")

    # --- Evaluate Model 2 (CQL Agent) ---
    print("\n--- 📊 Model 2: Offline RL (CQL) Evaluation ---")
    print("Objective: Learn a policy to maximize financial return.")

    # Get Q-values for both actions on the test set
    print("  Predicting Q-values for 'Deny' (0) and 'Approve' (1)...")
    q_values_deny = cql_agent.predict_value(observations_test, np.zeros(len(observations_test), dtype=np.int32))
    q_values_approve = cql_agent.predict_value(observations_test, np.ones(len(observations_test), dtype=np.int32))

    # RL Agent's policy: Choose action with higher Q-value
    agent_policy_actions = (q_values_approve > q_values_deny).astype(int) # 1 if Approve > Deny, 0 otherwise

    # Calculate raw rewards for the test set for analysis
    rewards_test_raw = np.where(
        df_model.loc[test_indices, 'is_default'] == 0,
        df_model.loc[test_indices, 'total_rec_int'],
        df_model.loc[test_indices, 'total_pymnt'] - df_model.loc[test_indices, 'loan_amnt']
    )

    # --- Calculate and Compare Policy Values (OPE) ---
    print("\n--- 📈 Estimated Policy Value (EPV) Comparison ---")

    # Baseline 1: Historical Policy (Always Approve)
    historical_policy_value = rewards_test_raw.mean()

    # Baseline 2: Always Deny Policy
    always_deny_policy_value = 0.0

    # Policy 3: RL Agent's Policy
    rl_policy_rewards = np.where(agent_policy_actions == 1, rewards_test_raw, 0)
    rl_policy_value = rl_policy_rewards.mean()

    print(f"  > Baseline (Historical - Approve All): ${historical_policy_value:,.2f} avg profit/loan")
    print(f"  > Baseline (Always Deny):              ${always_deny_policy_value:,.2f} avg profit/loan")
    print(f"  > New RL (CQL) Policy Value:           ${rl_policy_value:,.2f} avg profit/loan")

    if rl_policy_value > historical_policy_value and rl_policy_value > always_deny_policy_value:
        print("\n  ✅ RL Agent Policy is the most profitable on the test set.")
    elif historical_policy_value > rl_policy_value and historical_policy_value > always_deny_policy_value:
        print("\n  ⚠️ Historical (Approve All) Policy remains the most profitable.")
    else:
        print("\n  ⚠️ Always Deny Policy is the most profitable (this indicates a poor-quality loan book).")

    if rl_policy_value == historical_policy_value:
         print("  NOTE: RL agent policy value is identical to historical, suggesting it learned to 'Always Approve'.")
         print("  This is a common result of selection bias in accepted-only datasets.")

    # --- Policy Behavior Breakdown ---
    policy_comparison = pd.DataFrame({
        'RL_Policy': pd.Series(agent_policy_actions, index=y_test.index).map({0: 'Deny', 1: 'Approve'}),
        'Historical_Status': y_test.map({0: 'Paid', 1: 'Defaulted'}),
        'Actual_Profit_Loss': rewards_test_raw
    })

    print("\n--- RL Policy Decision Breakdown (Test Set) ---")
    print(policy_comparison.groupby(['RL_Policy', 'Historical_Status']).agg(
        count=('Actual_Profit_Loss', 'size'),
        avg_profit_loss=('Actual_Profit_Loss', 'mean')
    ).round(2))

    print("\n--- End of Analysis ---")

if __name__ == "__main__":
    main()

Step 1: Loading all data and trained models...
Data and MLP model loaded successfully.
Loading CQL agent using robust method...
[2m2025-10-30 09:41.58[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('int32')], shape=[(1,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(132,)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(1,)])[0m
[2m2025-10-30 09:41.58[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.DISCRETE: 2>[0m
[2m2025-10-30 09:41.58[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m2[0m
CQL agent weights loaded manually from models/cql_agent.d3

--- 📊 Model 1: Supervised DL (MLP) Evaluation ---
Objective: Predict default probability.
  > Test Set AUC: 0.7165
  > Test Set F1-Score (