In [3]:
import numpy as np
import pandas as pd

from utils.utils import load_data, extract_trajectories
from policy.policy import GreedyPolicy, RandomPolicy, StochasticPolicy
from policy.custom_policy import get_physician_policy
from mdp.builder import make_mdp
from mdp.solver import Q_value_iteration
from irl.max_margin import run_max_margin
from irl.irl import  make_state_centroid_finder, make_phi, make_initial_state_sampler
from constants import NUM_STATES, NUM_ACTIONS, TERMINAL_STATE_ALIVE, TERMINAL_STATE_DEAD, NUM_PURE_STATES
# set hyperparams here
num_iterations = 2
num_trials = 2
svm_penalty = 300.0
svm_epsilon = 0.01
verbose = True

df, df_cleansed, df_centroids = load_data()
feature_columns = df_centroids.columns
trajectories = extract_trajectories(df_cleansed, NUM_PURE_STATES)
trajectory_ids = trajectories[:, 0]
num_exp_trajectories = np.unique(trajectories[:, 0]).shape[0]

transition_matrix, reward_matrix = make_mdp(trajectories, NUM_STATES, NUM_ACTIONS)

# adjust rmax, rmin to keep w^Tphi(s) <= 1
reward_matrix[TERMINAL_STATE_ALIVE] = np.sqrt(len(feature_columns))
reward_matrix[TERMINAL_STATE_DEAD]  = -np.sqrt(len(feature_columns))
## make r(s, a) -> r(s)
## r(s) = E_pi_uniform[r(s,a)]
reward_matrix = np.mean(reward_matrix, axis=1)

# check irl/max_margin for implementation
if verbose:
    print('number of features', len(feature_columns))
    print('transition_matrix size', transition_matrix.shape)
    print('reward_matrix size', reward_matrix.shape)
    print('max rewards: ', np.max(reward_matrix))
    print('min rewards: ', np.min(reward_matrix))
    print('max intermediate rewards: ', np.max(reward_matrix[:-2]))
    print('min intermediate rewards: ', np.min(reward_matrix[:-2]))
    print('')

# initialize max margin irl stuff
sample_initial_state = make_initial_state_sampler(df_cleansed)
get_state = make_state_centroid_finder(df_centroids, feature_columns)
phi = make_phi(df_centroids)

# extract empirical expert policy
pi_expert = get_physician_policy(trajectories)



NameError: name 'df_train' is not defined

In [None]:
mu_pi_expert, v_pi_expert = estimate_feature_expectation(transition_matrix, sample_initial_state, get_state, phi, pi_expert)


In [7]:
import pandas as pd
train_df = pd.read_csv('data/Sepsis_imp_train.csv')
print(train_df.columns)
full_df = pd.read_csv('data/sepsis.csv')
print(full_df.columns)

Index(['bloc', 'icustayid', 'charttime', 'gender', 'age', 'elixhauser',
       're_admission', 'SOFA', 'SIRS', 'Weight_kg', 'GCS', 'HR', 'SysBP',
       'MeanBP', 'DiaBP', 'Shock_Index', 'RR', 'SpO2', 'Temp_C', 'FiO2_1',
       'Potassium', 'Sodium', 'Chloride', 'Glucose', 'BUN', 'Creatinine',
       'Magnesium', 'Calcium', 'Ionised_Ca', 'CO2_mEqL', 'SGOT', 'SGPT',
       'Total_bili', 'Albumin', 'Hb', 'WBC_count', 'Platelets_count', 'PTT',
       'PT', 'INR', 'Arterial_pH', 'paO2', 'paCO2', 'Arterial_BE',
       'Arterial_lactate', 'HCO3', 'PaO2_FiO2', 'median_dose_vaso',
       'max_dose_vaso', 'input_total_tev', 'input_4hourly_tev', 'output_total',
       'output_4hourly', 'cumulated_balance_tev', 'sedation', 'mechvent',
       'rrt', 'died_in_hosp', 'mortality_90d'],
      dtype='object')
Index(['bloc', 'icustayid', 'charttime', 'gender', 'age', 'elixhauser',
       're_admission', 'SOFA', 'SIRS', 'Weight_kg', 'GCS', 'HR', 'SysBP',
       'MeanBP', 'DiaBP', 'Shock_Index', 'RR', 'Sp