In [1]:
import datetime
import numpy as np
import tensorflow as tf
import tensorflow.keras as tfk
import tensorflow_probability as tfp
import tensorflow.keras.layers as tfkl
tfd,tfpl = tfp.distributions,tfp.layers
import tensorflow.keras.backend as tfkb
from tensorflow.keras.callbacks import Callback
from sklearn.preprocessing import StandardScaler
from networks import fc_net, p_x_z, p_t_z, p_y_tz, q_t_x, q_y_tx, q_z_txy
from evaluation import Evaluator, pdist2sq, Full_Metrics
#################################IHDP Data
# data information 
t_bin_dim = 1
y_dim, default_y_scale = 1,tf.exp(0.)
M = None        # batch size during training
z_dim = 20          # latent z dimension
lamba = 1e-4    # weight decay
nh, h = 3, 200  # number and size of hidden layers
binfeats = [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
numfeats = [i for i in range(25) if i not in binfeats]
x_bin_dim = len(binfeats)
x_num_dim = len(numfeats)
x_bin_dim, x_num_dim, t_bin_dim, y_dim, z_dim

2022-03-07 18:53:59.920058: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


(19, 6, 1, 1, 20)

In [2]:
!wget -nc http://www.fredjo.com/files/ihdp_npci_1-100.train.npz
!wget -nc http://www.fredjo.com/files/ihdp_npci_1-100.test.npz 

def load_IHDP_data(training_data,testing_data,i=7):
    with open(training_data,'rb') as trf, open(testing_data,'rb') as tef:
        train_data=np.load(trf); test_data=np.load(tef)
        y=np.concatenate(   (train_data['yf'][:,i],   test_data['yf'][:,i])).astype('float32') #most GPUs only compute 32-bit floats
        t=np.concatenate(   (train_data['t'][:,i],    test_data['t'][:,i])).astype('float32')
        x=np.concatenate(   (train_data['x'][:,:,i],  test_data['x'][:,:,i]),axis=0).astype('float32')
        mu_0=np.concatenate((train_data['mu0'][:,i],  test_data['mu0'][:,i])).astype('float32')
        mu_1=np.concatenate((train_data['mu1'][:,i],  test_data['mu1'][:,i])).astype('float32')
        ycf=np.concatenate((train_data['ycf'][:,i],  test_data['ycf'][:,i])).astype('float32')

        data={'x':x,'t':t,'y':y,'t':t,'mu_0':mu_0,'mu_1':mu_1}
        data['t']=data['t'].reshape(-1,1) #we're just padding one dimensional vectors with an additional dimension 
        data['y']=data['y'].reshape(-1,1)
        data['ycf'] = ycf.reshape(-1,1)
        
        #rescaling y between 0 and 1 often makes training of DL regressors easier
        data['y_scaler'] = StandardScaler().fit(data['y'])
        data['ys'] = data['y_scaler'].transform(data['y'])

    return data

data=load_IHDP_data(training_data='./ihdp_npci_1-100.train.npz',testing_data='./ihdp_npci_1-100.test.npz')

文件 “ihdp_npci_1-100.train.npz” 已经存在；不获取。

文件 “ihdp_npci_1-100.test.npz” 已经存在；不获取。



In [3]:
#Colab command to allow us to run Colab in TF2
!rm -rf ./logs/ 
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
file_writer = tf.summary.create_file_writer(log_dir + "/metrics")
file_writer.set_as_default()
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
%reload_ext tensorboard 

In [4]:
class CEVAE(tf.keras.Model):
    def __init__(self):
        super(CEVAE, self).__init__()
        ########################################
        # networks
        self.activation = 'elu'
        # CEVAE Model (decoder)
        self.p_x_z = p_x_z(x_bin_dim, x_num_dim, z_dim, 3, 100)
        self.p_t_z = p_t_z(t_bin_dim, z_dim, 1, 100)
        self.p_y_tz = p_y_tz(y_dim, t_bin_dim, z_dim, 3, 100)
        # CEVAE Model (encoder)
        self.q_t_x = q_t_x(x_bin_dim, x_num_dim, t_bin_dim, 1, 20)
        self.q_y_tx = q_y_tx(x_bin_dim, x_num_dim, y_dim, t_bin_dim, 3, 100)
        self.q_z_txy = q_z_txy(x_bin_dim, x_num_dim, y_dim, t_bin_dim, z_dim, 3, 100) 

    def call(self, data, training=False):
        if training:
            x_train,t_train,y_train = data
            # CEVAE
            ## q(z|t,x,y)
            z_infer = self.q_z_txy( tf.concat([t_train,x_train,y_train], axis=-1) )
            z_infer_sample = z_infer.sample()
            ## p(x|z)
            [x_bin,x_con] = self.p_x_z(z_infer_sample)
            ## p(t|z)
            t = self.p_t_z(z_infer_sample)
            ## p(y|t,z)
            y = self.p_y_tz( tf.concat([t_train,z_infer_sample],-1) )
            ## q(t|x)
            t_infer = self.q_t_x(x_train)
            ## q(y|x,t)
            y_infer = self.q_y_tx( tf.concat([t_train,x_train],-1) )
            output = [x_bin, x_con, t, y, z_infer, t_infer, y_infer]
            return output
        else:
            # when training need x,y,t
            x_train = data
            ## q(t|x)
            t_infer = self.q_t_x(x_train).sample()
            t0 = tf.cast(tf.zeros_like(t_infer), tf.float32)
            t1 = tf.cast(tf.ones_like(t_infer), tf.float32)
            ## q(y|x,t)
            t0x = tf.concat([t0, x_train],-1)
            t1x = tf.concat([t1, x_train],-1)
            y0 = self.q_y_tx(t0x).sample()
            y1 = self.q_y_tx(t1x).sample()
            return t_infer,y0,y1

    def cevae_loss(self, data, pred):
        # read labels
        x_train, t_train, y_train = data[0],data[1],data[2]
        # get preds
        [x_bin, x_con, t, y, z_infer, t_infer, y_infer] = pred
        
        # Reconstruction loss
        l1 = tfkb.sum(x_bin.log_prob(x_train[:, :x_bin_dim]),-1)
        l2 = tfkb.sum(x_con.log_prob(x_train[:, x_bin_dim:]),-1)
        l3 = tfkb.sum(t.log_prob(t_train),-1)
        l4 = tfkb.sum(y.log_prob(y_train),-1)
        
        # REGULARIZATION LOSS
        # p(z) - q(z|x,t,y)
        # approximate KL
        z_infer_sample = z_infer.sample()
        z = tfd.Normal(loc = [0] * 20, scale = [1]*20)
        l5 = tfkb.sum((z.log_prob(z_infer_sample) - z_infer.log_prob(z_infer_sample)), -1)

        # AUXILIARY LOSS
        l6 = t_infer.log_prob(t_train)
        l7 = tfkb.sum(y_infer.log_prob(y_train),-1)

        # layer_loss
        l8 = tfkb.sum(self.losses)
        
        return l1,l2,l3,l4,l5,l6,l7,l8

    def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        # 这里data[0]因为会自动在外面拼接一层
        data = data[0]
        x,t,y = data
        with tf.GradientTape() as tape:
            pred = self(data, training=True)  # Forward pass
            # loss = self.cevae_loss(data,pred)
            l1,l2,l3,l4,l5,l6,l7,l8 = self.cevae_loss(data,pred)
            # loss = -tfkb.mean(l1 + l2 + l3 + l4 + l5 +  l6 + l7 + l8)
            loss = -tfkb.mean(l1 + l2 + l3 + l4 + l5 +  l6 + l7)
        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        metrics = {
            "loss": loss,
            "loss_bin":tfkb.mean(l1),
            "loss_con":tfkb.mean(l2),
            "loss_t":tfkb.mean(l3),
            "loss_y":tfkb.mean(l4),
            "kl_z":tfkb.mean(l5),
            "loss_t_aux":tfkb.mean(l6),
            "loss_y_aux":tfkb.mean(l7),
        }
        return metrics

    def test_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        data = data[0]
        x,t,y = data
        with tf.GradientTape() as tape:
            pred = self(x, training=False)  # Forward pass
            t_infer,y0,y1 = pred
        metrics = {"y0": tfkb.mean(y0),"y1": tfkb.mean(y1)}
        return metrics


In [5]:
model = CEVAE()
### MAIN CODE ####
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TerminateOnNaN
from tensorflow.keras.optimizers import SGD
from evaluation import *
 
val_split=0.2
batch_size=64
verbose=True
i = 0
tf.random.set_seed(i)
np.random.seed(i)
yt = np.concatenate([data['ys'], data['t']], 1)
 
sgd_callbacks = [
        TerminateOnNaN(),
        EarlyStopping(monitor='val_loss', patience=40, min_delta=0), 
        #40 is Shi's recommendation patience for this dataset, but you should tune for your data 
        ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, verbose=verbose, mode='auto',
                          min_delta=0, cooldown=0, min_lr=0),
        #This learning rate scheduling is quite agressive which seems good for this dataset
        # Full_Metrics(data,verbose),
        metrics_for_cevae(data,verbose),
        tensorboard_callback
    ]
    
#optimizer hyperparameters
sgd_lr = 1e-5
momentum = 0.9
model.compile(
    optimizer=SGD(
        learning_rate=sgd_lr, 
        momentum=momentum, 
        nesterov=True
        )
    )

model.fit(
    [data['x'],data['t'],data['y']],
    callbacks=sgd_callbacks,
    validation_split=val_split,
    epochs=300,
    batch_size=batch_size,
    verbose=verbose
    )
print("Done!")

Epoch 1/300
 — ite: 8.3615  — ate: 3.5300 — pehe: 4.1293 
Epoch 2/300
 — ite: 8.3363  — ate: 3.6093 — pehe: 4.1924 
Epoch 3/300
 — ite: 8.3243  — ate: 3.6450 — pehe: 4.2316 
Epoch 4/300
 — ite: 8.3280  — ate: 3.7390 — pehe: 4.2972 
Epoch 5/300
 — ite: 8.1756  — ate: 3.7409 — pehe: 4.2943 
Epoch 6/300
 — ite: 8.2009  — ate: 3.7601 — pehe: 4.3178 
Epoch 7/300
 — ite: 8.1628  — ate: 3.8446 — pehe: 4.3718 
Epoch 8/300
 — ite: 8.1066  — ate: 3.8808 — pehe: 4.4262 
Epoch 9/300
 — ite: 8.0381  — ate: 3.8928 — pehe: 4.4269 
Epoch 10/300
 — ite: 8.0523  — ate: 3.9597 — pehe: 4.4875 
Epoch 11/300
 — ite: 7.9392  — ate: 4.0030 — pehe: 4.5271 
Epoch 12/300
 — ite: 7.9066  — ate: 3.9556 — pehe: 4.4670 
Epoch 13/300
 — ite: 7.9407  — ate: 4.0278 — pehe: 4.5707 
Epoch 14/300
 — ite: 7.8049  — ate: 4.0400 — pehe: 4.5691 
Epoch 15/300

Epoch 00015: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-06.
 — ite: 7.7938  — ate: 4.2019 — pehe: 4.7210 
Epoch 16/300
 — ite: 7.7981  — ate: 4.1910 

In [6]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6006 (pid 11635), started 2:46:59 ago. (Use '!kill 11635' to kill it.)

In [7]:
np.shape(data['x'])

(747, 25)

In [8]:
data['x']

array([[-0.65613806, -1.0024741 , -0.360898  , ...,  1.        ,
         0.        ,  0.        ],
       [-1.7153288 , -1.0024741 , -0.733261  , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.5543657 ,  0.19681813, -0.360898  , ...,  0.        ,
         1.        ,  0.        ],
       ...,
       [-0.24543142, -0.20294595,  0.38382798, ...,  0.        ,
         0.        ,  1.        ],
       [ 0.53274953,  0.5965822 , -1.105624  , ...,  0.        ,
         0.        ,  0.        ],
       [-1.2830061 , -1.0024741 , -0.360898  , ...,  0.        ,
         0.        ,  0.        ]], dtype=float32)