In [None]:
import os
!pip install -q kaggle

if not os.path.exists("/root/.kaggle/kaggle.json"):
    from google.colab import files
    print("Upload your kaggle.json file from your Kaggle account")
    files.upload()
    !mkdir -p ~/.kaggle
    !mv kaggle.json ~/.kaggle/
    !chmod 600 ~/.kaggle/kaggle.json
else:
    print("kaggle.json already configured")

if not os.path.exists("accepted_2007_to_2018Q4.csv.gz"):
    print("Downloading Lending Club dataset...")
    !kaggle datasets download -d wordsforthewise/lending-club -q
    !unzip -q lending-club.zip
    print("Dataset downloaded & extracted")
else:
    print("Dataset already available")

✅ kaggle.json already configured
✅ Dataset already available


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    roc_auc_score,
    f1_score,
    classification_report,
    confusion_matrix
)


import warnings

warnings.filterwarnings('ignore')

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv("accepted_2007_to_2018Q4.csv.gz", low_memory=False, nrows=200000)

In [None]:
df.head()

Unnamed: 0,id,member_id,loan_amnt,funded_amnt,funded_amnt_inv,term,int_rate,installment,grade,sub_grade,...,hardship_payoff_balance_amount,hardship_last_payment_amount,disbursement_method,debt_settlement_flag,debt_settlement_flag_date,settlement_status,settlement_date,settlement_amount,settlement_percentage,settlement_term
0,68407277,,3600.0,3600.0,3600.0,36 months,13.99,123.03,C,C4,...,,,Cash,N,,,,,,
1,68355089,,24700.0,24700.0,24700.0,36 months,11.99,820.28,C,C1,...,,,Cash,N,,,,,,
2,68341763,,20000.0,20000.0,20000.0,60 months,10.78,432.66,B,B4,...,,,Cash,N,,,,,,
3,66310712,,35000.0,35000.0,35000.0,60 months,14.85,829.9,C,C5,...,,,Cash,N,,,,,,
4,68476807,,10400.0,10400.0,10400.0,60 months,22.45,289.91,F,F1,...,,,Cash,N,,,,,,


In [None]:
df.shape

(200000, 151)

In [None]:
df.info

In [None]:
import d3rlpy
from d3rlpy.dataset import MDPDataset
from d3rlpy.algos import DiscreteCQLConfig

from d3rlpy.metrics import InitialStateValueEstimationEvaluator
from d3rlpy.metrics import DiscreteActionMatchEvaluator

warnings.filterwarnings('ignore')

In [None]:
print("Step 1: Loading data from 'task_3_data.npz'...")
data = np.load('task_3_data.npz', allow_pickle=True)
X_train_processed = data['X_train_processed']
y_train = data['y_train']
X_train_reward_features = data['X_train_reward_features']
X_test_processed = data['X_test_processed']
y_test = data['y_test']
X_test_reward_features = data['X_test_reward_features']
feature_names = data['feature_names']
print(f"Loaded training states: {X_train_processed.shape}")



print("\nStep 2: Engineering the offline RL dataset...")

def create_rl_dataset(states, outcomes, reward_features):
    num_samples = len(states)

    # Data for Action 1 (Approve)
    approved_states = states
    approved_actions = np.ones(num_samples, dtype=int)
    loan_amnt = reward_features[:, 0]
    int_rate = reward_features[:, 1]
    approved_rewards = np.where(
        outcomes == 0,
        (loan_amnt * int_rate / 100.0),
        -loan_amnt
    ).astype(float)
    approved_terminals = np.ones(num_samples, dtype=float)

    # Synthetic Data for Action 0 (Deny)
    denied_states = states
    denied_actions = np.zeros(num_samples, dtype=int)
    denied_rewards = np.zeros(num_samples, dtype=float)
    denied_terminals = np.ones(num_samples, dtype=float)

    # Combine (UnboundLocalError fixed)
    all_states = np.concatenate([approved_states, denied_states])
    all_actions = np.concatenate([approved_actions, denied_actions])
    all_rewards = np.concatenate([approved_rewards, denied_rewards])
    all_terminals = np.concatenate([approved_terminals, denied_terminals])

    return MDPDataset(
        observations=all_states.astype(np.float32),
        actions=all_actions.astype(np.uint8),
        rewards=all_rewards.astype(np.float32),
        terminals=all_terminals.astype(np.float32)
    )

train_dataset = create_rl_dataset(X_train_processed, y_train, X_train_reward_features)
test_dataset = create_rl_dataset(X_test_processed, y_test, X_test_reward_features)

print(f"Total training transitions (real + synthetic): {train_dataset.size()}")

Step 1: Loading data from 'task_3_data.npz'...
Loaded training states: (140866, 76)

Step 2: Engineering the offline RL dataset...
[2m2025-10-26 09:59.09[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('uint8')], shape=[(1,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(76,)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float32')], shape=[(1,)])[0m
[2m2025-10-26 09:59.09[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.DISCRETE: 2>[0m
[2m2025-10-26 09:59.11[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m2[0m
[2m2025-10-26 09:59.12[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('uint8')], shape=[(1,)])[0m [36m

In [None]:
print("\nStep 3: Defining the Conservative Q-Learning (CQL) agent...")
use_gpu = len(tf.config.list_physical_devices('GPU')) > 0
if use_gpu:
    device = 'cuda:0'
else:
    device = 'cpu'
print(f"Using device: {device}")


cql_config = DiscreteCQLConfig(
    reward_scaler=None,
    action_scaler=None
)

cql_agent = cql_config.create(device=device)



Step 3: Defining the Conservative Q-Learning (CQL) agent...
Using device: cpu


In [None]:
print("\nStep 4: Training the agent...")

value_estimator = InitialStateValueEstimationEvaluator()
action_matcher = DiscreteActionMatchEvaluator()

cql_agent.fit(
    dataset=train_dataset,
    n_steps=10,
    n_steps_per_epoch=10000,
    evaluators={
        'policy_value': value_estimator,
        'action_match': action_matcher
    },
    show_progress=True,
    save_interval=1
)

print("Training complete!")


Step 4: Training the agent...
[2m2025-10-26 09:59.13[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float32')], shape=[(76,)]), action_signature=Signature(dtype=[dtype('uint8')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float32')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=2)[0m
[2m2025-10-26 09:59.13[0m [[32m[1mdebug    [0m] [1mBuilding models...            [0m
[2m2025-10-26 09:59.13[0m [[32m[1mdebug    [0m] [1mModels have been built.       [0m
[2m2025-10-26 09:59.13[0m [[32m[1minfo     [0m] [1mDirectory is created at d3rlpy_logs/DiscreteCQL_20251026095913[0m
[2m2025-10-26 09:59.13[0m [[32m[1minfo     [0m] [1mParameters                    [0m [36mparams[0m=[35m{'observation_shape': [76], 'action_size': 2, 'config': {'type': 'discrete_cql', 'params': {'batch_size': 32, 'gamma': 0.99, 'observation_scaler': {'t

In [None]:
# --- 6. Save Model and Prepare Data for Task 4 ---

print("\nStep 5: Saving policy...")
cql_agent.save_model('cql_policy.pt')

# Get the RL agent's predicted actions on the test set
rl_policy_actions = cql_agent.predict(X_test_processed.astype(np.float32))

# Calculate the ground-truth rewards for both possible actions
test_loan_amnt = X_test_reward_features[:, 0]
test_int_rate = X_test_reward_features[:, 1]

# Ground-truth reward if action == 1 (Approve)
reward_if_approve = np.where(
    y_test == 0,  # 0 = Fully Paid
    (test_loan_amnt * test_int_rate / 100.0), # Reward = Profit
    -test_loan_amnt # 1 = Default, Reward = Loss
).astype(float)

# Ground-truth reward if action == 0 (Deny)
reward_if_deny = np.zeros_like(reward_if_approve, dtype=float)

# Load the trained DL model from Task 2 to get its predictions
try:
    dl_model = tf.keras.models.load_model('dl_model.h5')
    # Corrected typo: dl.model -> dl_model
    dl_model_probas = dl_model.predict(X_test_processed).flatten()
except Exception as e:
    print(f"\nWarning: Could not load 'dl_model.h5'. (Error: {e})")
    print("Using placeholder (zeros) for DL model probabilities.")
    dl_model_probas = np.zeros_like(y_test, dtype=float)

# Save all necessary data for the final analysis
np.savez_compressed(
    'task_4_analysis_data.npz',
    y_test=y_test,
    rl_policy_actions=rl_policy_actions,
    dl_model_probas=dl_model_probas,
    reward_if_approve=reward_if_approve,
    reward_if_deny=reward_if_deny,
    X_test=X_test_processed,
    feature_names=feature_names
)

print("Data for Task 4 saved to 'task_4_analysis_data.npz'")


Step 5: Saving policy...




[1m1101/1101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
Data for Task 4 saved to 'task_4_analysis_data.npz'
