In [28]:
import tensorflow as tf
import numpy as np
import tensorflow.keras.backend as tfkb
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Concatenate
from tensorflow.keras import regularizers
from tensorflow.keras import Model
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.metrics import binary_accuracy
from tensorflow.keras.losses import Loss
from tensorflow.keras.layers import Layer
class EpsilonLayer(Layer):

    def __init__(self):
        super(EpsilonLayer, self).__init__()

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.epsilon = self.add_weight(name='epsilon',
                                       shape=[1, 1],
                                       initializer='RandomNormal',
                                       #  initializer='ones',
                                       trainable=True)
        super(EpsilonLayer, self).build(input_shape)  # Be sure to call this at the end

    def call(self, inputs, **kwargs):
        #note there is only one epsilon were just duplicating it for conformability
        return self.epsilon * tf.ones_like(inputs)[:, 0:1]

def make_dragonnet(input_dim, reg_l2):

    x = Input(shape=(input_dim,), name='input')
    # representation
    phi = Dense(units=200, activation='elu', kernel_initializer='RandomNormal',name='phi_1')(x)
    phi = Dense(units=200, activation='elu', kernel_initializer='RandomNormal',name='phi_2')(phi)
    phi = Dense(units=200, activation='elu', kernel_initializer='RandomNormal',name='phi_3')(phi)

    # HYPOTHESIS
    y0_hidden = Dense(units=100, activation='elu', kernel_regularizer=regularizers.l2(reg_l2),name='y0_hidden_1')(phi)
    y1_hidden = Dense(units=100, activation='elu', kernel_regularizer=regularizers.l2(reg_l2),name='y1_hidden_1')(phi)

    # second layer
    y0_hidden = Dense(units=100, activation='elu', kernel_regularizer=regularizers.l2(reg_l2),name='y0_hidden_2')(y0_hidden)
    y1_hidden = Dense(units=100, activation='elu', kernel_regularizer=regularizers.l2(reg_l2),name='y1_hidden_2')(y1_hidden)

    # third
    y0_predictions = Dense(units=1, activation=None, kernel_regularizer=regularizers.l2(reg_l2), name='y0_predictions')(y0_hidden)
    y1_predictions = Dense(units=1, activation=None, kernel_regularizer=regularizers.l2(reg_l2), name='y1_predictions')(y1_hidden)

    #propensity prediction
    #Note that the activation is actually sigmoid, but we will squish it in the loss function for numerical stability reasons
    t_predictions = Dense(units=1,activation=None,name='t_prediction')(phi)
    #Although the epsilon layer takes an input, it really just houses a free parameter. 
    epsilons = EpsilonLayer()(t_predictions)
    concat_pred = Concatenate(1)([y0_predictions, y1_predictions,t_predictions,epsilons,phi])
    model = Model(inputs=x, outputs=concat_pred)
    return model

class Base_Loss(Loss):
    #initialize instance attributes
    def __init__(self, alpha=1.0):
        super().__init__()
        self.alpha = alpha
        self.name='standard_loss'

    def split_pred(self,concat_pred):
        #generic helper to make sure we dont make mistakes
        preds={}
        preds['y0_pred'] = concat_pred[:, 0]
        preds['y1_pred'] = concat_pred[:, 1]
        preds['t_pred'] = concat_pred[:, 2]
        preds['phi'] = concat_pred[:, 3:]
        return preds

    #for logging purposes only
    def treatment_acc(self,concat_true,concat_pred):
        t_true = concat_true[:, 1]
        p = self.split_pred(concat_pred)
        #Since this isn't used as a loss, I've used tf.reduce_mean for interpretability
        return tf.reduce_mean(binary_accuracy(t_true, tf.math.sigmoid(p['t_pred']), threshold=0.5))

    def treatment_bce(self,concat_true,concat_pred):
        t_true = concat_true[:, 1]
        p = self.split_pred(concat_pred)
        lossP = tf.reduce_sum(binary_crossentropy(t_true,p['t_pred'],from_logits=True))
        return lossP
    
    def regression_loss(self,concat_true,concat_pred):
        y_true = concat_true[:, 0]
        t_true = concat_true[:, 1]
        p = self.split_pred(concat_pred)
        loss0 = tf.reduce_sum((1. - t_true) * tf.square(y_true - p['y0_pred']))
        loss1 = tf.reduce_sum(t_true * tf.square(y_true - p['y1_pred']))
        return loss0+loss1

    def standard_loss(self,concat_true,concat_pred):
        lossR = self.regression_loss(concat_true,concat_pred)
        lossP = self.treatment_bce(concat_true,concat_pred)
        return lossR + self.alpha * lossP

    #compute loss
    def call(self, concat_true, concat_pred):        
        return self.standard_loss(concat_true,concat_pred)
        
class TarReg_Loss(Base_Loss):
    #initialize instance attributes
    def __init__(self, alpha=1,beta=1):
        super().__init__()
        self.alpha = alpha
        self.beta=beta
        self.name='tarreg_loss'

    def split_pred(self,concat_pred):
        #generic helper to make sure we dont make mistakes
        preds={}
        preds['y0_pred'] = concat_pred[:, 0]
        preds['y1_pred'] = concat_pred[:, 1]
        preds['t_pred'] = concat_pred[:, 2]
        preds['epsilon'] = concat_pred[:, 3] #we're moving epsilon into slot three
        preds['phi'] = concat_pred[:, 4:]
        return preds

    def calc_hstar(self,concat_true,concat_pred):
        #step 2 above
        p=self.split_pred(concat_pred)
        y_true = concat_true[:, 0]
        t_true = concat_true[:, 1]

        t_pred = tf.math.sigmoid(concat_pred[:, 2])
        t_pred = (t_pred + 0.001) / 1.002 # a little numerical stability trick implemented by Shi
        y_pred = t_true * p['y1_pred'] + (1 - t_true) * p['y0_pred']

        #calling it cc for "clever covariate" as in SuperLearner TMLE literature
        cc = t_true / t_pred - (1 - t_true) / (1 - t_pred)
        h_star = y_pred + p['epsilon'] * cc
        return h_star

    def call(self,concat_true,concat_pred):
        y_true = concat_true[:, 0]

        standard_loss=self.standard_loss(concat_true,concat_pred)
        h_star=self.calc_hstar(concat_true,concat_pred)
        #step 3 above
        targeted_regularization = tf.reduce_sum(tf.square(y_true - h_star))

        # final
        loss = standard_loss + self.beta * targeted_regularization
        return loss

In [29]:
from evaluation import TarReg_Metrics

In [30]:
#@title First load the data! (Click Play)
import numpy as np
from sklearn.preprocessing import StandardScaler
!wget -nc http://www.fredjo.com/files/ihdp_npci_1-100.train.npz
!wget -nc http://www.fredjo.com/files/ihdp_npci_1-100.test.npz 

def load_IHDP_data(training_data,testing_data,i=7):
    with open(training_data,'rb') as trf, open(testing_data,'rb') as tef:
        train_data=np.load(trf); test_data=np.load(tef)
        y=np.concatenate(   (train_data['yf'][:,i],   test_data['yf'][:,i])).astype('float32') #most GPUs only compute 32-bit floats
        t=np.concatenate(   (train_data['t'][:,i],    test_data['t'][:,i])).astype('float32')
        x=np.concatenate(   (train_data['x'][:,:,i],  test_data['x'][:,:,i]),axis=0).astype('float32')
        mu_0=np.concatenate((train_data['mu0'][:,i],  test_data['mu0'][:,i])).astype('float32')
        mu_1=np.concatenate((train_data['mu1'][:,i],  test_data['mu1'][:,i])).astype('float32')

        data={'x':x,'t':t,'y':y,'t':t,'mu_0':mu_0,'mu_1':mu_1}
        data['t']=data['t'].reshape(-1,1) #we're just padding one dimensional vectors with an additional dimension 
        data['y']=data['y'].reshape(-1,1)
        
        #rescaling y between 0 and 1 often makes training of DL regressors easier
        data['y_scaler'] = StandardScaler().fit(data['y'])
        data['ys'] = data['y_scaler'].transform(data['y'])

    return data

data_train=load_IHDP_data(training_data='./ihdp_npci_1-100.train.npz',testing_data='./ihdp_npci_1-100.train.npz')
data_valid=load_IHDP_data(training_data='./ihdp_npci_1-100.test.npz',testing_data='./ihdp_npci_1-100.test.npz')
np.shape(data_train['x'])

文件 “ihdp_npci_1-100.train.npz” 已经存在；不获取。

文件 “ihdp_npci_1-100.test.npz” 已经存在；不获取。



(1344, 25)

In [31]:
import tensorflow as tf
import numpy as np
import datetime
%load_ext tensorboard

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau, TerminateOnNaN
from tensorflow.keras.optimizers import SGD, Adam

val_split=0.2
batch_size=64
verbose=1
i = 0
tf.random.set_seed(i)
np.random.seed(i)

# Clear any logs from previous runs
!rm -rf ./logs/ 
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
file_writer.set_as_default()
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0)

sgd_callbacks = [
        TerminateOnNaN(),
        EarlyStopping(monitor='val_loss', patience=40, min_delta=0.),
        ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, verbose=verbose, mode='auto',
                          min_delta=0., cooldown=0, min_lr=0),
        tensorboard_callback,
        TarReg_Metrics(data_train,'train', verbose=verbose),
        TarReg_Metrics(data_valid,'valid', verbose=verbose) 
   ]

sgd_lr = 1e-5
momentum = 0.9

dragonnet_model=make_dragonnet(data_train['x'].shape[1],.01)
tarreg_loss=TarReg_Loss(alpha=1)

dragonnet_model.compile(optimizer=SGD(lr=sgd_lr, momentum=momentum, nesterov=True),
                      loss=tarreg_loss,
                 metrics=[tarreg_loss,tarreg_loss.regression_loss,tarreg_loss.treatment_acc])

dragonnet_model.fit(x=data_train['x'],y=np.concatenate([data_train['ys'], data_train['t']], 1),
                 callbacks=sgd_callbacks,
                  validation_data=[data_valid['x'],np.concatenate([data_valid['ys'], data_valid['t']], 1)],
                  epochs=300,
                  batch_size=batch_size,
                  verbose=verbose)

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


  super(SGD, self).__init__(name, **kwargs)


Epoch 1/300
 — ate_err: 0.5727  — aipw_err: 0.6116 — tarreg_err: 0.5560 — cate_err: 2.3910 — cate_nn_err: 2.6645 
Epoch 2/300
 — ate_err: 0.3425  — aipw_err: 2.6843 — tarreg_err: 0.4764 — cate_err: 2.9520 — cate_nn_err: 2.3503 
Epoch 3/300
 — ate_err: 0.1305  — aipw_err: 2.8372 — tarreg_err: 0.2011 — cate_err: 3.1398 — cate_nn_err: 2.2443 
Epoch 4/300
 — ate_err: 0.2954  — aipw_err: 0.1946 — tarreg_err: 0.3351 — cate_err: 3.1047 — cate_nn_err: 2.1290 
Epoch 5/300
 — ate_err: 0.4474  — aipw_err: 5.3614 — tarreg_err: 0.4769 — cate_err: 3.1112 — cate_nn_err: 2.1453 
Epoch 6/300
 — ate_err: 0.3336  — aipw_err: 1.1989 — tarreg_err: 0.3513 — cate_err: 3.1075 — cate_nn_err: 2.1363 
Epoch 7/300
 — ate_err: 0.2025  — aipw_err: 0.9579 — tarreg_err: 0.2128 — cate_err: 3.0959 — cate_nn_err: 2.1521 
Epoch 8/300
 — ate_err: 0.3416  — aipw_err: 8.2712 — tarreg_err: 0.3506 — cate_err: 3.0490 — cate_nn_err: 2.1112 
Epoch 9/300
 — ate_err: 0.4464  — aipw_err: 2.5224 — tarreg_err: 0.4568 — cate_err: 3.08

<keras.callbacks.History at 0x160bec4c0>

In [32]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6007 (pid 45269), started 2 days, 2:45:33 ago. (Use '!kill 45269' to kill it.)