In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Load

In [None]:
import numpy as np
import pandas as pd
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_column", None)
import tensorflow as tf

import os

from dice_rl.data.dataset import Dataset, convert_to_tfagents_timestep
from dice_rl.scripts.run_neural_dice import run_neural_dice_loaded
from dice_rl.scripts.run_neural_dual_dice import run_neural_dual_dice_loaded
from dice_rl.scripts.run_neural_coin_dice import run_neural_coin_dice_loaded
from dice_rl_plugins.estimators.estimator import get_fullbatch_average

from plugins.medical_rl.sepsis_mimic_iv.dataset import TFOffpolicyDatasetGeneratorSepsisMimicIV
from plugins.medical_rl.sepsis_mimic_iv.policy import TFPolicySepsisMimicIV
from dice_rl_plugins.utils import get_logs, plot_log, print_namedtuple

In [129]:
data_dir = os.path.join("data", "RRT_mimic_iv")

n_pads = 10

hparam_str_dataset = f"n_pads_{n_pads}"
dataset_dir = os.path.join(data_dir, "datasets", hparam_str_dataset)

# Prepare

## Dataset

In [159]:
try:
    dataset = Dataset.load(dataset_dir)

except:
    generator = TFOffpolicyDatasetGeneratorSepsisMimicIV(
        clustered=False,
        n_pads=10,
    )

    generator.add_episodes_to_dataset(verbosity=1)

    dataset = generator.dataset

    if not tf.io.gfile.isdir(dataset_dir):
        tf.io.gfile.makedirs(dataset_dir)

    dataset.save(dataset_dir)

data/RRT_mimic_iv/datasets/n_pads_10/dataset-ctr.pkl


# Run

## DICE

In [136]:
neural_dice_estimator_evaluation_policy = run_neural_dice_loaded(
    save_dir=os.path.join(data_dir, "outputs"),
    gamma=1,
    nu_learning_rate=0.0001,
    zeta_learning_rate=0.0001,
    nu_regularizer=0.0,
    zeta_regularizer=0.0,
    num_steps=100_000,
    batch_size=2048,
    f_exponent=2,
    primal_form=False,
    regularizer_primal=0.,
    regularizer_dual=1.,
    zero_reward=False,
    regularizer_norm=1.,
    zeta_pos=True,
    scale_reward=1.,
    shift_reward=0.,
    transform_reward=None,
    dataset=dataset,
    hparam_str=hparam_str_dataset,
)

num loaded steps 
 tf.Tensor(70697, shape=(), dtype=int64)
num loaded total steps 
 tf.Tensor(74139, shape=(), dtype=int64)
num loaded episodes 
 tf.Tensor(3442, shape=(), dtype=int64)
num loaded total episodes 
 tf.Tensor(3442, shape=(), dtype=int64)
min reward 
 tf.Tensor(-100.0, shape=(), dtype=float32)
max reward 
 tf.Tensor(100.0, shape=(), dtype=float32)
behavior per-step 
 tf.Tensor(74.700485, shape=(), dtype=float32)



  0%|          | 42/100000 [00:04<2:58:08,  9.35it/s]


KeyboardInterrupt: 

## DualDICE

In [None]:
neural_dual_dice_estimator = run_neural_dual_dice_loaded(
    save_dir=os.path.join(data_dir, "outputs"),
    gamma=0.99,
    nu_learning_rate=0.0001,
    zeta_learning_rate=0.0001,
    nu_regularizer=1.0,
    zeta_regularizer=1.0,
    num_steps=300_000,
    batch_size=4096,
    f_exponent=2,
    primal_form=False,
    solve_for_state_action_ratio=True,
    dataset=dataset,
    hparam_str=hparam_str_dataset,
)

num loaded steps 
 tf.Tensor(27662, shape=(), dtype=int64)
num loaded total steps 
 tf.Tensor(29116, shape=(), dtype=int64)
num loaded episodes 
 tf.Tensor(1454, shape=(), dtype=int64)
num loaded total episodes 
 tf.Tensor(1454, shape=(), dtype=int64)
min reward 
 tf.Tensor(0.0, shape=(), dtype=float32)
max reward 
 tf.Tensor(1.0, shape=(), dtype=float32)
behavior per-step 
 tf.Tensor(0.12401538, shape=(), dtype=float32)



 74%|███████▎  | 220871/300000 [14:04:31<3:26:09,  6.40it/s]     