In [1]:
import datetime
import numpy as np
import tensorflow as tf
import tensorflow.keras as tfk
import tensorflow_probability as tfp
import tensorflow.keras.layers as tfkl
tfd,tfpl = tfp.distributions,tfp.layers
import tensorflow.keras.backend as tfkb
from tensorflow.keras.callbacks import Callback
from sklearn.preprocessing import StandardScaler
# from networks import fc_net, p_x_z, p_t_z, p_y_tz, q_t_x, q_y_tx, q_z_txy
from evaluation import Evaluator, pdist2sq, Full_Metrics
#################################IHDP Data
# data information 
t_bin_dim = 1
y_dim, default_y_scale = 1,tf.exp(0.)
M = None        # batch size during training
z_dim = 20          # latent z dimension
lamba = 1e-4    # weight decay
nh, h = 3, 200  # number and size of hidden layers
binfeats = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
numfeats = [i for i in range(25) if i not in binfeats]
x_bin_dim = len(binfeats)
x_num_dim = len(numfeats)
x_bin_dim, x_num_dim, t_bin_dim, y_dim, z_dim

2022-03-07 20:33:57.434901: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


(19, 6, 1, 1, 20)

In [2]:
!wget -nc http://www.fredjo.com/files/ihdp_npci_1-100.train.npz
!wget -nc http://www.fredjo.com/files/ihdp_npci_1-100.test.npz 

def load_IHDP_data(training_data,testing_data,i=7):
    with open(training_data,'rb') as trf, open(testing_data,'rb') as tef:
        train_data=np.load(trf); test_data=np.load(tef)
        y=np.concatenate(   (train_data['yf'][:,i],   test_data['yf'][:,i])).astype('float32') #most GPUs only compute 32-bit floats
        t=np.concatenate(   (train_data['t'][:,i],    test_data['t'][:,i])).astype('float32')
        x=np.concatenate(   (train_data['x'][:,:,i],  test_data['x'][:,:,i]),axis=0).astype('float32')
        mu_0=np.concatenate((train_data['mu0'][:,i],  test_data['mu0'][:,i])).astype('float32')
        mu_1=np.concatenate((train_data['mu1'][:,i],  test_data['mu1'][:,i])).astype('float32')
        ycf=np.concatenate((train_data['ycf'][:,i],  test_data['ycf'][:,i])).astype('float32')

        data={'x':x,'t':t,'y':y,'t':t,'mu_0':mu_0,'mu_1':mu_1}
        data['t']=data['t'].reshape(-1,1) #we're just padding one dimensional vectors with an additional dimension 
        data['y']=data['y'].reshape(-1,1)
        data['ycf'] = ycf.reshape(-1,1)
        
        #rescaling y between 0 and 1 often makes training of DL regressors easier
        data['y_scaler'] = StandardScaler().fit(data['y'])
        data['ys'] = data['y_scaler'].transform(data['y'])

    return data

data=load_IHDP_data(training_data='./ihdp_npci_1-100.train.npz',testing_data='./ihdp_npci_1-100.test.npz')

文件 “ihdp_npci_1-100.train.npz” 已经存在；不获取。

文件 “ihdp_npci_1-100.test.npz” 已经存在；不获取。



In [3]:
#Colab command to allow us to run Colab in TF2
!rm -rf ./logs/ 
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
file_writer.set_as_default()
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
%reload_ext tensorboard 

activation_global 

def fc_net(input_shape, layers, out_layers = [], activation = activation_global, lamba = 1e-4):
    net = tfk.Sequential([tfkl.InputLayer([input_shape])])
    for hidden in layers:
        net.add(tfkl.Dense(
            hidden, 
            activation = activation,
            kernel_regularizer = tf.keras.regularizers.l2(lamba) 
            )
        )
    if len(out_layers) > 0:
        [outdim, activation_out] = out_layers
        net.add(tfkl.Dense(outdim, activation = activation_out))
    return net

class q_y_tx(tf.keras.Model):
    def __init__(self, x_bin_dim, x_num_dim, y_dim, t_dim, nh, h):
        super(q_y_tx, self).__init__()
        self.t_dim = t_dim
        self.q_y_xt_shared_hqy = fc_net(x_bin_dim + x_num_dim, (nh - 1) * [h], [])
        self.q_y_xt0_mu = fc_net(h, [h], [y_dim, None])
        self.q_y_xt1_mu = fc_net(h, [h], [y_dim, None])

    def call(self, tx_input, training=False, serving=False):
        t = tx_input[...,:self.t_dim] 
        x = tx_input[...,self.t_dim:]
        hqy = self.q_y_xt_shared_hqy(x)
        
        qy_t0_mu = self.q_y_xt0_mu(hqy)
        qy_t1_mu = self.q_y_xt1_mu(hqy)
        
        y_loc =  t * qy_t1_mu + (1-t) * qy_t0_mu
        return tfd.Normal(
            loc =  y_loc, 
            scale = tf.ones_like(y_loc),
            ) 

NameError: name 'activation_global' is not defined

In [None]:
class CEVAE(tf.keras.Model):
    def __init__(self):
        super(CEVAE, self).__init__()
        ########################################
        # networks
        self.activation = 'elu'
        # CEVAE Model (decoder)
        self.q_y_tx = q_y_tx(x_bin_dim, x_num_dim, y_dim, t_bin_dim, 3, 100)

    def call(self, data, training=False):
        if training:
            x_train,t_train,y_train = data
            ## q(y|x,t)
            y_infer = self.q_y_tx( tf.concat([t_train, x_train],-1) )
            return y_infer
        else:
            # when training need x,y,t
            x_train = data
            ## q(t|x)
            t = tf.ones([tf.shape(x_train)[0],t_bin_dim])
            t0 = tf.cast(tf.zeros_like(t), tf.float32)
            t1 = tf.cast(tf.ones_like(t), tf.float32)
            ## q(y|x,t)
            t0x = tf.concat([t0, x_train],-1)
            t1x = tf.concat([t1, x_train],-1)
            y0 = self.q_y_tx(t0x).sample()
            y1 = self.q_y_tx(t1x).sample()
            return 0,y0,y1

    def cevae_loss(self, data, pred):
        # read labels
        _, t_train, y_train = data[0],data[1],data[2]
        # get preds
        y_infer = pred
        l7 = tfkb.sum(y_infer.log_prob(y_train),-1)
        # layer_loss
        l8 = tfkb.sum(self.losses)
        
        return l7,l8

    def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        # 这里data[0]因为会自动在外面拼接一层
        data = data[0]
        x,t,y = data
        with tf.GradientTape() as tape:
            pred = self(data, training=True)  # Forward pass
            # loss = self.cevae_loss(data,pred)
            l7,l8 = self.cevae_loss(data,pred)
            loss = -tfkb.mean(l7 - l8)
        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        metrics = {
            "loss": loss,
            # "loss_t_aux":tfkb.mean(l6),
            "loss_y_aux":tfkb.mean(l7),
        }
        return metrics

    def test_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        data = data[0]
        x,t,y = data
        with tf.GradientTape() as tape:
            pred = self(x, training=False)  # Forward pass
            t_infer,y0,y1 = pred
        metrics = {"y0": tfkb.mean(y0),"y1": tfkb.mean(y1)}
        return metrics


In [None]:
model = CEVAE()
### MAIN CODE ####
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TerminateOnNaN
from tensorflow.keras.optimizers import SGD
from evaluation import *
 
val_split=0.2
batch_size=64
verbose=True
i = 0
tf.random.set_seed(i)
np.random.seed(i)
yt = np.concatenate([data['ys'], data['t']], 1)
 
sgd_callbacks = [
        TerminateOnNaN(),
        EarlyStopping(monitor='val_loss', patience=40, min_delta=0), 
        #40 is Shi's recommendation patience for this dataset, but you should tune for your data 
        ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, verbose=verbose, mode='auto',
                          min_delta=0, cooldown=0, min_lr=0),
        #This learning rate scheduling is quite agressive which seems good for this dataset
        # Full_Metrics(data,verbose),
        metrics_for_cevae(data,verbose),
        tensorboard_callback
    ]
    
#optimizer hyperparameters
sgd_lr = 1e-5
momentum = 0.9
model.compile(
    optimizer=SGD(
        learning_rate=sgd_lr, 
        momentum=momentum, 
        nesterov=True
        )
    )

model.fit(
    [data['x'],data['t'],data['y']],
    callbacks=sgd_callbacks,
    validation_split=val_split,
    epochs=30,
    batch_size=batch_size,
    verbose=verbose
    )
print("Done!")

In [None]:
%tensorboard --logdir logs/fit