In [1]:

import minari
import numpy as np
import gymnasium as gym
from PIL import Image
from minari import DataCollector
import torch
import torch.nn as nn
import tqdm
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
# from torchvision.models import resnet50 # No longer using resnet50
from torchvision.models import resnet18, ResNet18_Weights # Import ResNet-18
from sklearn.model_selection import train_test_split
from copy import deepcopy
import d3rlpy

import numpy as np
import pandas as pd
from copy import deepcopy

from simulation import cancer
from generate import simulate_blackwell_glynn
from nsmm import nsmm_lag1, nsmm_lag1_cate
from msm import MarginalStructuralModel


In [2]:

np.random.seed(100)

num_time_steps = 60  # 6 month followup
num_patients = 1000

simulation_params = cancer.get_confounding_params(num_patients, chemo_coeff=10.0, radio_coeff=10.0)
simulation_params['window_size'] = 15

outputs = cancer.simulate(simulation_params, num_time_steps)

  if recovery_rvs[i, t] < np.exp(-cancer_volume[i, t] * tumour_cell_density):


In [3]:
# Assuming df is your dataframe
def prepare_data_for_outcome(df, drug_half_life = 1):
    df = df.copy()
    df['chemo_dosage'] = 0.0
    df['previous_cancer_volume'] = df['cancer_volume']
    for pid, group in df.groupby('Patient_ID'):
        group = group.sort_values('Time_Point')
        chemo_instant_dosage = group['chemo_instant_dosage']
        previous_chemo_dose = group['chemo_instant_dosage'].shift(1)
        previous_cancer_volume = group['cancer_volume'].shift(1)
        chemo_dosages = cancer.get_chemo_dosage(chemo_instant_dosage, previous_chemo_dose, drug_half_life)
        

        df.loc[group.index, 'chemo_dosage'] = chemo_dosages
        df.loc[group.index, 'previous_cancer_volume'] = previous_cancer_volume

        df['termination'] = 0
        df.loc[group.index[-1], 'termination'] = 1

    return df.dropna().reset_index(drop=True)

# Example usage
df = prepare_data_for_outcome(outputs)

# drop some row since it need lag data
n_time = int(len(df)/num_patients)
df['category'] = (df['chemo_application'] * 2 + df['radio_application'])



In [4]:
from d3rlpy.constants import ActionSpace
from d3rlpy.algos import DQNConfig
state = df[['previous_cancer_volume',  'chemo_instant_dosage','radio_dosage']].to_numpy()
# action = df[['chemo_application', 'radio_application']].to_numpy()
action = df[['category']].to_numpy().astype(int)
reward = df[['previous_cancer_volume']].to_numpy()-df[['cancer_volume']].to_numpy()
next_state = df[['cancer_volume']].to_numpy()
terminations = df[['termination']].to_numpy()


dataset = d3rlpy.dataset.MDPDataset(
    observations=state,
    actions=action,
    rewards=reward,
    terminals=terminations,
    action_space = ActionSpace.DISCRETE,
    action_size  = 4
)

# if you don't use GPU, set device=None instead.
dqn = DQNConfig(gamma = 0).create(device="cpu")

# initialize neural networks with the given observation shape and action size.
# this is not necessary when you directly call fit or fit_online method.
dqn.build_with_dataset(dataset)

dqn.fit(
    dataset,
    n_steps=10000,
    
)

2025-07-28 23:20.04 [info     ] Signatures have been automatically determined. action_signature=Signature(dtype=[dtype('int32')], shape=[(1,)]) observation_signature=Signature(dtype=[dtype('float64')], shape=[(3,)]) reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)])
2025-07-28 23:20.04 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], shape=[(3,)]), action_signature=Signature(dtype=[dtype('int32')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=4)
2025-07-28 23:20.04 [info     ] Directory is created at d3rlpy_logs\DQN_20250728232004
2025-07-28 23:20.04 [info     ] Parameters                     params={'observation_shape': [3], 'action_size': 4, 'config': {'type': 'dqn', 'params': {'batch_size': 32, 'gamma': 0, 'observation_scaler': {'type': 'none', 'params': {}}, 'action_scaler': {'type': 'none', 'params': 

Epoch 1/1:   0%|          | 0/10000 [00:00<?, ?it/s]

2025-07-28 23:20.32 [info     ] DQN_20250728232004: epoch=1 step=10000 epoch=1 metrics={'time_sample_batch': 0.0005855859279632568, 'time_algorithm_update': 0.002180792236328125, 'loss': 3.3211084084954114, 'time_step': 0.002828042411804199} step=10000
2025-07-28 23:20.32 [info     ] Model parameters are saved to d3rlpy_logs\DQN_20250728232004\model_10000.d3


[(1,
  {'time_sample_batch': 0.0005855859279632568,
   'time_algorithm_update': 0.002180792236328125,
   'loss': 3.3211084084954114,
   'time_step': 0.002828042411804199})]

In [15]:
baseline = dqn.predict_value(state, np.array([0]*len(state)))

radio = dqn.predict_value(state, np.array([2]*len(state)))

(radio - baseline).mean()

1.2960536

In [70]:
dqn.predict(state[:200])

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1], dtype=int64)