In [23]:
import numpy as np
from scipy.special import expit
from evaluator_Linear import evaluator
from probLearner import PMLearner, RewardLearner, PALearner
from ratioLearner import  RatioLinearLearner as RatioLearner
from qLearner_Linear import Qlearner

## STEP0： Define the Target Policy and the Control Policy

Modify the Policy.py file accordingly

In [24]:
from Policy import target_policy, control_policy

## STEP1: prepare the dataset

The following is an example of a proper input dataset with 2 trajectories and 3 observations for each trajectory, which is a dictionary with keys:
- s0: stacked initial states of all the trajectories, initial state, 2d-array
- state: stacked states of all the trajectories at all time points, 2d-array
- action: stacked sequence of actions for all trajectories at all time points, 1d-array
- mediator: stacked mediators of all the trajectories at all time points, 2d-array
- reward: stacked sequence of rewards for all trajectories at all time points, 1d-array
- next_state: stacked next_states of all the trajectories at all time points, 2d-array

In [25]:
#from Simulator import Simulator
#dim_state=3; dim_mediator = 2
#simulator = Simulator(model_type='Gaussian_semi', dim_state=dim_state, dim_mediator = dim_mediator)
#simulator.sample_trajectory(num_trajectory=30, num_time=30, seed=0)
#simulator.trajectory2iid()
#sim_iid_dataset = simulator.iid_dataset
#dataset = sim_iid_dataset
#dataset

## STEP2: Modify the hyper-parameters

In [26]:
#Fixed hyper-parameter--no need to modify
expectation_MCMC_iter = 100
expectation_MCMC_iter_Q3 = expectation_MCMC_iter_Q_diff = 100
truncate = 50
problearner_parameters = {"splitter":["best","random"], "max_depth" : range(1,50)},

#hyperparameters that need modification
#dim_state = the dimension of the state variable
#dim_meditor = the dimension of the mediator variable
#ratio_ndim = number of features used to learn the ratio model # can be modified accordingly 
                #to learn how the ratio_ndim affect the estimation performance
dim_state=3; dim_mediator = 2

ratio_ndim = 15


## STEP3: Causal Effect Estimation (target policy = behavior policy)

In [28]:
def target_policy_2(state, dim_state = 1, action=None, matrix_based=False):
    if not matrix_based:
        pa = .5

        prob_arr = np.array([1-pa, pa])

        if action is None:
            action_value = np.random.choice([0, 1], 1, p=prob_arr)
        else:
            action_value = np.array([prob_arr[int(action)]])
    else:
        state1 = state.reshape((-1, dim_state))
        action1 = np.copy(action).flatten()
        pa = .5 * np.ones((state1.shape[0])).flatten()
        action_value = pa * action1 + (1-pa) * (1-action1)
    return action_value

ratio_ndim = 15
d = 3
L = 7
est_obj1 = evaluator(dataset,
                     Qlearner, PMLearner, 
                     RewardLearner, PALearner, RatioLearner,
                     problearner_parameters = problearner_parameters,
                     ratio_ndim = ratio_ndim, truncate = truncate, l2penalty = 1.0,
                     target_policy=target_policy_2, control_policy = control_policy, 
                     dim_state = dim_state, dim_mediator = dim_mediator, 
                     Q_settings = {'scaler': 'Identity','product_tensor': True, 'beta': 3/7, 
                                   'include_intercept': False, 'expectation_MCMC_iter_Q3': expectation_MCMC_iter_Q3, 
                                   'expectation_MCMC_iter_Q_diff':expectation_MCMC_iter_Q_diff, 
                                   'penalty': 10**(-9),'d': d, 'min_L': L},
                     expectation_MCMC_iter = expectation_MCMC_iter,
                     seed = 10)

est_obj1.estimate_DE_ME_SE()
est_value1 = est_obj1.est_DEMESE
var_value1 = est_obj1.var_DEMESE


#The following are the estimations of our interest

#1. estimation used the proposed triply robust estimator
DE_TR, ME_TR, SE_TR = est_value1[:3]

#2. estimation used the direct estimator of etas
DE_Direct, ME_Direct, SE_Direct = est_value1[3:6]

#3. estimation used the baseline method
DE_base, ME_base = est_value1[6:8]
SE_base= np.nan


reward {'max_depth': 3, 'splitter': 'best'}
2.064634207711125


# Try different dimensions

In [31]:
out = []
NT = len(dataset['state'])
max_L = int(np.sqrt((NT)**(3/7)))
for ration_ndim in np.arange(5,25,2):
    for d in [1,2,3]:
        for L in np.arange(d+2, d+7):
            est_obj1 = evaluator(dataset,
                                 Qlearner, PMLearner, 
                                 RewardLearner, PALearner, RatioLearner,
                                 problearner_parameters = problearner_parameters,
                                 ratio_ndim = ratio_ndim, truncate = truncate, l2penalty = 1.0,
                                 target_policy=target_policy_2, control_policy = control_policy, 
                                 dim_state = dim_state, dim_mediator = dim_mediator, 
                                 Q_settings = {'scaler': 'Identity','product_tensor': True, 'beta': 3/7, 
                                               'include_intercept': False, 'expectation_MCMC_iter_Q3': expectation_MCMC_iter_Q3, 
                                               'expectation_MCMC_iter_Q_diff':expectation_MCMC_iter_Q_diff, 
                                               'penalty': 10**(-9),'d': d, 'min_L': L},
                                 expectation_MCMC_iter = expectation_MCMC_iter,
                                 seed = 10)

            est_obj1.estimate_DE_ME_SE()
            est_value1 = est_obj1.est_DEMESE
            var_value1 = est_obj1.var_DEMESE


            #The following are the estimations of our interest

            #1. estimation used the proposed triply robust estimator
            DE_TR, ME_TR, SE_TR = est_value1[:3]

            #2. estimation used the direct estimator of etas
            DE_Direct, ME_Direct, SE_Direct = est_value1[3:6]

            #3. estimation used the baseline method
            DE_base, ME_base = est_value1[6:8]
            SE_base= np.nan
            print(ration_ndim, d, L)
            real_L = max(L, max_L+d)
            out.append([ration_ndim, d, real_L, DE_TR, ME_TR, SE_TR, DE_Direct, ME_Direct, SE_Direct, DE_base, ME_base,SE_base])

            
            
import pandas as pd
out = pd.DataFrame(out, columns = ['ration_ndim', 'd', 'real_L', 'DE_TR', 'ME_TR', 'SE_TR', 
                                   'DE_Direct', 'ME_Direct', 'SE_Direct', 'DE_base', 'ME_base','SE_base'])