In [1]:
import tensorflow
print(tensorflow.__version__)

1.15.0


In [2]:
import lifelines
import numpy as np
import pandas

import tensorflow as tf
import tensorflow.contrib.slim as slim

import logging
import math
import os
import threading
import time
from datetime import timedelta

from lifelines.utils import concordance_index
from scipy.stats.stats import spearmanr

import pprint
import sys

seed = 31415
np.random.seed(seed)

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [3]:
### C(t)-INDEX CALCULATION
def c_index(Prediction, Time_survival, Death, Time):
    '''
        Raiber: okay, so it does not make a diffrence if the predictions are scores or risks(softmax probabilities) 
        This is a cause-specific c(t)-index
        - Prediction      : risk at Time (higher --> more risky) <class 'numpy.ndarray'> 
          shape (7997,)
        - Time_survival   : survival/censoring time   <class 'numpy.ndarray'>
        shape (7997, 1)
        - Death           :   <class 'numpy.ndarray'>
            > 1: death
            > 0: censored (including death from other cause)
        shape (7997,)
        - Time            : time of evaluation (time-horizon when evaluating C-index)   <class 'int'>
        scalar value 
    '''
    N = len(Prediction)# N = 7997 
    #A.shape (7997, 7997) for validation 
    A = np.zeros((N,N))
    Q = np.zeros((N,N))
    N_t = np.zeros((N,N))
    Num = 0
    Den = 0
    for i in range(N):
        # np.where return the index of the values that fullfil the condition 
        # let assume i = 0, then np.where(Time_survival[i] < Time_survival) will return the indexes 
        # from 0 until 7996 (because the length is 7996) that have a bigger time then the index 0
        # and the assign to A[0, list of biiger times that the value in 0 ] the value 1 
        A[i, np.where(Time_survival[i] < Time_survival)] = 1
        Q[i, np.where(Prediction[i] > Prediction)] = 1
  
        if (Time_survival[i]<=Time and Death[i]==1):
            N_t[i,:] = 1

    Num  = np.sum(((A)*N_t)*Q)
    Den  = np.sum((A)*N_t)

    if Num == 0 and Den == 0:
        result = -1 # not able to compute c-index!
    else:
        result = float(Num/Den)

    return result


##### WEIGHTED C-INDEX & BRIER-SCORE
def CensoringProb(Y, T):

    T = T.reshape([-1]) # (N,) - np array
    Y = Y.reshape([-1]) # (N,) - np array

    kmf = KaplanMeierFitter()
    kmf.fit(T, event_observed=(Y==0).astype(int))  # censoring prob = survival probability of event "censoring"
    G = np.asarray(kmf.survival_function_.reset_index()).transpose()
    G[1, G[1, :] == 0] = G[1, G[1, :] != 0][-1]  #fill 0 with ZoH (to prevent nan values)
    
    return G

def weighted_brier_score(T_train, Y_train, Prediction, T_test, Y_test, Time):
    G = CensoringProb(Y_train, T_train)
    N = len(Prediction)

    W = np.zeros(len(Y_test))
    Y_tilde = (T_test > Time).astype(float)

    for i in range(N):
        tmp_idx1 = np.where(G[0,:] >= T_test[i])[0]
        tmp_idx2 = np.where(G[0,:] >= Time)[0]

        if len(tmp_idx1) == 0:
            G1 = G[1, -1]
        else:
            G1 = G[1, tmp_idx1[0]]

        if len(tmp_idx2) == 0:
            G2 = G[1, -1]
        else:
            G2 = G[1, tmp_idx2[0]]
        W[i] = (1. - Y_tilde[i])*float(Y_test[i])/G1 + Y_tilde[i]/G2

    y_true = ((T_test <= Time) * Y_test).astype(float)

    return np.mean(W*(Y_tilde - (1.-Prediction))**2)

# 1. Preprocessing

In [4]:
# raiber used 
def f_get_Normalization(X, norm_mode): # raiber added functions 
    num_Patient, num_Feature = np.shape(X)

    if norm_mode == 'standard': #zero mean unit variance
        for j in range(num_Feature):
            if np.std(X[:,j]) != 0:
                X[:,j] = (X[:,j] - np.mean(X[:, j]))/np.std(X[:,j])
            else:
                X[:,j] = (X[:,j] - np.mean(X[:, j]))
    elif norm_mode == 'normal': #min-max normalization
        for j in range(num_Feature):
            X[:,j] = (X[:,j] - np.min(X[:,j]))/(np.max(X[:,j]) - np.min(X[:,j]))
    else:
        print("INPUT MODE ERROR!")

    return X
 
def formatted_data(x, t, e, idx):
    death_time = np.array(t[idx], dtype=float)
    censoring = np.array(e[idx], dtype=float)
    covariates = np.array(x[idx])

    print("observed fold:{}".format(sum(e[idx]) / len(e[idx])))
    survival_data = {'x': covariates, 't': death_time, 'e': censoring}
    return survival_data


def risk_set(data_t):
    size = len(data_t)
    risk_set = np.zeros(shape=(size, size))
    for idx in range(size):
        temp = np.zeros(shape=size)
        t_i = data_t[idx]
        at_risk = data_t > t_i
        temp[at_risk] = 1
        # temp[idx] = 0
        risk_set[idx] = temp
    return risk_set

def one_hot_encoder(data, encode):
    print("Encoding data:{}".format(data.shape))
    data_encoded = data.copy()
    encoded = pandas.get_dummies(data_encoded, prefix=encode, columns=encode)
    print("head of data:{}, data shape:{}".format(data_encoded.head(), data_encoded.shape))
    print("Encoded:{}, one_hot:{}{}".format(encode, encoded.shape, encoded[0:5]))
    return encoded

def get_train_median_mode(x, categorial):
    categorical_flat = flatten_nested(categorial)
    print("categorical_flat:{}".format(categorical_flat))
    imputation_values = []
    print("len covariates:{}, categorical:{}".format(x.shape[1], len(categorical_flat)))
    median = np.nanmedian(x, axis=0)
    mode = []
    for idx in np.arange(x.shape[1]):
        a = x[:, idx]
        (_, idx, counts) = np.unique(a, return_index=True, return_counts=True)
        index = idx[np.argmax(counts)]
        mode_idx = a[index]
        mode.append(mode_idx)
    for i in np.arange(x.shape[1]):
        if i in categorical_flat:
            imputation_values.append(mode[i])
        else:
            imputation_values.append(median[i])
    print("imputation_values:{}".format(imputation_values))
    return imputation_values


def missing_proportion(dataset):
    missing = 0
    columns = np.array(dataset.columns.values)
    for column in columns:
        missing += dataset[column].isnull().sum()
    return 100 * (missing / (dataset.shape[0] * dataset.shape[1]))


def one_hot_indices(dataset, one_hot_encoder_list):
    indices_by_category = []
    for colunm in one_hot_encoder_list:
        values = dataset.filter(regex="{}_.*".format(colunm)).columns.values
        # print("values:{}".format(values, len(values)))
        indices_one_hot = []
        for value in values:
            indice = dataset.columns.get_loc(value)
            # print("column:{}, indice:{}".format(colunm, indice))
            indices_one_hot.append(indice)
        indices_by_category.append(indices_one_hot)
    # print("one_hot_indices:{}".format(indices_by_category))
    return indices_by_category

def flatten_nested(list_of_lists):
    flattened = [val for sublist in list_of_lists for val in sublist]
    return flattened

def get_missing_mask(data, imputation_values=None):
    copy = data
    for i in np.arange(len(data)):
        row = data[i]
        indices = np.isnan(row)
        # print("indices:{}, {}".format(indices, np.where(indices)))
        if imputation_values is None:
            copy[i][indices] = 0
        else:
            for idx in np.arange(len(indices)):
                if indices[idx]:
                    # print("idx:{}, imputation_values:{}".format(idx, np.array(imputation_values)[idx]))
                    copy[i][idx] = imputation_values[idx]
    # print("copy;{}".format(copy))
    return copy

# 2. Metrics

In [11]:
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns

fontsize = 18
SMALL_SIZE = 8
MEDIUM_SIZE = 10
BIGGER_SIZE = 12

plt.rc('font', size=MEDIUM_SIZE)  # controls default text sizes
plt.rc('axes', titlesize=MEDIUM_SIZE)  # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)  # fontsize of the x and y labels
plt.rc('xtick', labelsize=MEDIUM_SIZE)  # fontsize of the tick labels
plt.rc('ytick', labelsize=MEDIUM_SIZE)  # fontsize of the tick labels
plt.rc('legend', fontsize=MEDIUM_SIZE)  # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title
plt.rc('xtick', labelsize=20)
plt.rc('ytick', labelsize=20)

font = {'family': 'normal',
        'weight': 'bold',
        'size': 24}

plt.rc('font', **font)
params = {'legend.fontsize': 'x-large',
          # 'figure.figsize': (15, 5),
          'axes.labelsize': 'x-large',
          'axes.titlesize': 'x-large',
          'xtick.labelsize': 'x-large',
          'ytick.labelsize': 'x-large'}
plt.rcParams.update(params)

# We'll hack a bit with the t-SNE code in sklearn 0.15.2.

sns.set_style('white')
sns.set_context('paper')
sns.set()
title_fontsize = 18
label_fontsize = 18


def plot_cost(training, validation, name, model, epochs, best_epoch):
    x = np.arange(start=0, stop=len(training), step=1).tolist()
    constant = 1e-10
    plt.figure()
    plt.xlim(min(x), max(x))
    plt.ylim(min(min(training), min(validation), 0) - constant, max(max(training), max(validation)) + constant)
    plt.plot(x, training, color='blue', linestyle='-', label='training')
    plt.plot(x, validation, color='green', linestyle='-', label='validation')
    plt.axvline(x=best_epoch, color='red')
    title = 'Training {} {}: epochs={}, best epoch={} '.format(model, name, epochs, best_epoch)
    plt.title(title, fontsize=title_fontsize)
    plt.ylabel(name)
    plt.xlabel('Epoch')
    plt.legend(loc='best', fontsize=10)
    plt.savefig('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\plots\\mort_p\\{}_{}'.format(model, name))


def box_plots(empirical, predicted, name='data', time='days', log_domain=True):
    plt.figure()
    if log_domain:
        plt.yscale('log')
    plt.boxplot(x=predicted, sym='o', notch=0, whis='range')
    plt.scatter(x=np.arange(start=1, stop=len(predicted) + 1), y=empirical, color='purple', label='empirical')
    plt.legend(loc='best', fontsize=10)
    plt.xticks(fontsize=5)
    plt.ylabel('t ({})'.format(time))
    plt.xlabel('Observation index')
    plt.savefig('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\plots\\mort_p\\{}_box_plot'.format(name))


def hist_plots(samples, name, xlabel, empirical=None):
    plt.figure()
    plt.axvline(x=np.mean(samples), color='grey', label='mean', linestyle='--', )
    if empirical:
        plt.axvline(x=empirical, color='purple', label='empirical', linestyle='--', )
    plt.legend(loc='best', fontsize=10)
    plt.hist(samples, bins=25)
    plt.xlabel(xlabel)
    plt.savefig("C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\plots\\mort_p\\{}_hist".format(name))
    plt.figure()
    plt.boxplot(x=samples, sym='o', notch=0, whis='range')
    plt.scatter(x=1, y=np.mean(samples), color='purple', label='mean')
    plt.legend(loc='best', fontsize=10)
    plt.savefig('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\plots\\mort_p\\{}_box_plot'.format(name))

# 3. Distribution

In [12]:
#raiber: we draw form the uniform distribution a tensor with shape batch_size (128) and dim(feature dim 17) and values between zero and 1
# shape: (128, 17) 
def uniform(dim, batch_size):
    ones = np.ones(shape=dim, dtype=np.float32)
    noise = tf.distributions.Uniform(low=0 * ones, high=ones).sample(sample_shape=[batch_size])
    return noise

# 4. tf_helpers 

In [13]:
def show_all_variables():
    model_vars = tf.trainable_variables()
    slim.model_analyzer.analyze_vars(model_vars, print_info=True)


def mlp_neuron(layer_input, weights, biases, activation=True): # if activation is set to false from the called function, we stay false
    mlp = tf.add(tf.matmul(layer_input, weights), biases)
    if activation:
        return tf.nn.relu(mlp)
    else:
        return mlp


def normalized_mlp(layer_input, weights, biases, is_training, batch_norm, layer, activation=tf.nn.relu):
    mlp = tf.add(tf.matmul(layer_input, weights), biases)
    if batch_norm:
        norm = batch_norm_wrapper(mlp, is_training, layer=layer)
        # norm = tf_batch_norm(is_training=is_training, inputs=mlp, layer=layer)
        return activation(norm)
    else:
        return activation(mlp)


def dropout_normalised_mlp(layer_input, weights, biases, is_training, batch_norm, layer, keep_prob=1,
                           activation=tf.nn.relu):
    mlp = normalized_mlp(layer_input, weights, biases, is_training, batch_norm,
                         layer=layer, activation=activation)  # apply DropOut to hidden layer
    drop_out = tf.cond(is_training, lambda: tf.nn.dropout(mlp, keep_prob), lambda: mlp)
    return drop_out


def create_nn_weights(layer, network, shape): # layer: the name of the layer, exp: h0_z hiddem layer 0 
                                              # network: string (in this example would be the "decoder")
                                              # shape: [input_shape, hidden_dim] in one of the examples ][34,50]
    # raiber: create the name of the weights and variables
    h_vars = {}
    w_h = 'W_' + network + '_' + layer
    b_h = 'b_' + network + '_' + layer
    # get the values of the weights and variables 
    h_vars[w_h] = create_weights(shape=shape, name=w_h)
    h_vars[b_h] = create_biases([shape[1]], name=b_h)
    variable_summaries(h_vars[w_h], w_h)
    variable_summaries(h_vars[b_h], b_h)

    return h_vars[w_h], h_vars[b_h]


def create_biases(shape, name):
    print("name:{}, shape{}".format(name, shape))
    return tf.Variable(tf.constant(shape=shape, value=0.0), name=name)


def create_weights(shape, name):
    print("name:{}, shape{}".format(name, shape))
    # initialize weights using Glorot and Bengio(2010) scheme
    a = tf.sqrt(6.0 / (shape[0] + shape[1]))
    # return tf.Variable(tf.random_normal(shape, stddev=tf.square(0.0001)), name=name)
    return tf.Variable(tf.random_uniform(shape, minval=-a, maxval=a, dtype=tf.float32), name=name)


def variable_summaries(var, summary_name):
    """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
    with tf.name_scope(summary_name):
        mean = tf.reduce_mean(var)
        tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar('stddev', stddev)
        tf.summary.scalar('max', tf.reduce_max(var))
        tf.summary.scalar('min', tf.reduce_min(var))


def batch_norm_wrapper(inputs, is_training, layer):
    # http://r2rt.com/implementing-batch-normalization-in-tensorflow.html
    # raiber: get_shape()[-1]] gives the dim of the cols 
    pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False, name='{}_batch_norm_mean'.format(layer))
    pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False, name='{}_batch_norm_var'.format(layer))
    print("batch inputs {}, shape for var{}".format(inputs.get_shape(), inputs.get_shape()[-1]))

    offset = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), name='{}_batch_norm_offset'.format(layer))
    scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]), name='{}_batch_norm_scale'.format(layer))
    epsilon = 1e-5
    alpha = 0.9  # use numbers closer to 1 if you have more data

    def batch_norm():
        batch_mean, batch_var = tf.nn.moments(inputs, [0])
        print("batch mean {}, var {}".format(batch_mean.shape, batch_var.shape))
        train_mean = tf.assign(pop_mean,
                                pop_mean * alpha + batch_mean * (1 - alpha))
        train_var = tf.assign(pop_var,
                              pop_var * alpha + batch_var * (1 - alpha))
        with tf.control_dependencies([train_mean, train_var]):
            return tf.nn.batch_normalization(inputs, mean=batch_mean, variance=batch_var, offset=offset, scale=scale,
                                              variance_epsilon=epsilon)

    def pop_norm():
        return tf.nn.batch_normalization(inputs, pop_mean, pop_var, offset=offset, scale=scale,
                                          variance_epsilon=epsilon)

    return tf.cond(is_training, batch_norm, pop_norm)


def hidden_mlp_layers(batch_norm, hidden_dim, is_training, keep_prob, layer_input, size):
    tmp = layer_input
    for i in np.arange(size):
        input_shape = tmp.get_shape().as_list()[1]
        print("layer input shape:{}".format(input_shape))
        w_hi, b_hi = create_nn_weights('h{}_z'.format(i), 'decoder', [input_shape, hidden_dim[i]])
        h_i = dropout_normalised_mlp(layer_input=tmp, weights=w_hi, biases=b_hi,
                                     is_training=is_training,
                                     batch_norm=batch_norm, keep_prob=keep_prob,
                                     layer='h{}_z_decoder'.format(i))

        tmp = h_i
    return tmp


def hidden_mlp_layers_noise(batch_norm, hidden_dim, is_training, keep_prob, layer_input, noise_alpha, size,
                            batch_size):
    # hidden_dim = [50,50]
    # layer input shape is (?,34) 34 = 17 + 17 (x + noise)
    # size = len(hiiden_dim) = 2
    tmp = layer_input
    for i in np.arange(size):
        input_shape = tmp.get_shape().as_list()[1] # in the second loop input_shape would be 100
        print("layer input shape:{}".format(input_shape))
        w_hi, b_hi = create_nn_weights('h{}_z'.format(i), 'decoder', [input_shape, hidden_dim[i]])
        h_i = dropout_normalised_mlp(layer_input=tmp, weights=w_hi, biases=b_hi,
                                     is_training=is_training,
                                     batch_norm=batch_norm, keep_prob=keep_prob,
                                     layer='h{}_z_decoder'.format(i)) # h_i shape is (?,50)
                                                                      # second loop also (?,50)

        # noise = standard_gaussian(dim=hidden_dim[i], batch_size=batch_size) * tf.gather(noise_alpha, i + 1)
        noise = uniform(dim=hidden_dim[i], batch_size=batch_size) * tf.gather(noise_alpha, i + 1) # noise shape (350 batch_size),50), the same for the second loop
        tmp = tf.concat([h_i, noise], axis=1) # the shape become (?, 100) because 50 (h_i) and 50 (noise)
    return tmp

# 5. Generated_times

In [14]:
def plot_predicted_distribution(predicted, empirical, data, time='days', cens=False):
    predicted_samples = np.transpose(predicted)
    print("observed_samples:{}, empirical_observed:{}".format(predicted_samples.shape,
                                                              empirical.shape))

    best_samples, diff, worst_samples = get_best_worst_indices(cens, empirical, predicted_samples)

    predicted_best = predicted_samples[best_samples]
    predicted_worst = predicted_samples[worst_samples]
    hist_plots(samples=diff, name='{}_absolute_error'.format(data), xlabel=r'|\tilde{t}-t|')

    box_plots(empirical=empirical[best_samples], predicted=list(predicted_best), name=('%s_best' % data),
              time=time)
    box_plots(empirical=empirical[worst_samples], predicted=list(predicted_worst), name=('%s_worst' % data),
              time=time)


def get_best_worst_indices(cens, empirical, predicted, size=50):
    diff = compute_relative_error(cens=cens, empirical=empirical, predicted=predicted)
    indices = sorted(range(len(abs(diff))), key=lambda k: diff[k])
    best_samples = indices[0:size]
    worst_samples = indices[len(indices) - size - 1: len(indices) - 1]
    return best_samples, diff, worst_samples


def compute_relative_error(cens, empirical, predicted, relative=False):
    predicted_median = np.median(predicted, axis=1)
    if cens:
        diff = np.minimum(0, predicted_median - empirical)
    else:
        diff = predicted_median - empirical
    if relative:
        return diff
    else:
        return abs(diff)

# 6. Cost 

In [15]:
def batch_metrics(e, risk_set, predicted, batch_size, empirical):
    partial_likelihood = tf.constant(0.0, shape=())
    rel_abs_err = tf.constant(0.0, shape=())
    total_cens_loss = tf.constant(0.0, shape=())
    total_obs_loss = tf.constant(0.0, shape=())
    predicted = tf.squeeze(predicted)
    observed = tf.reduce_sum(e) # compute sum across the tensor
    censored = tf.subtract(tf.cast(batch_size, dtype=tf.float32), observed) # what is left from the batch which is not bserved, it is then censored 

    def condition(i, likelihood, rae, recon_loss, obs_recon_loss):
        return i < batch_size

    def body(i, likelihood, rae, cens_recon_loss, obs_recon_loss):
        # get edges for observation i
        pred_t_i = tf.gather(predicted, i)
        emp_t_i = tf.gather(empirical, i)
        e_i = tf.gather(e, i)
        censored = tf.equal(e_i, 0)
        obs_at_risk = tf.gather(risk_set, i)
        print("obs_at_risk:{}, g_theta:{}".format(obs_at_risk.shape, predicted.shape))
        risk_hazard_list = tf.multiply(predicted, obs_at_risk)
        num_adjacent = tf.reduce_sum(obs_at_risk)
        # calculate partial likelihood
        risk = tf.subtract(pred_t_i, risk_hazard_list)
        activated_risk = tf.nn.sigmoid(risk)
        # logistic = map((lambda ele: log(1 + exp(ele * -1)) * 1 / log(2)), x)
        constant = 1e-8
        log_activated_risk = tf.div(tf.log(activated_risk + constant), tf.log(2.0))
        obs_likelihood = tf.add(log_activated_risk, num_adjacent)
        uncensored_likelihood = tf.cond(censored, lambda: tf.constant(0.0), lambda: obs_likelihood)
        cumulative_likelihood = tf.reduce_sum(uncensored_likelihood)
        updated_likelihood = tf.add(cumulative_likelihood, likelihood)

        # RElative absolute error
        abs_error_i = tf.abs(tf.subtract(pred_t_i, emp_t_i))
        pred_great_empirical = tf.greater(pred_t_i, emp_t_i)
        min_rea_i = tf.minimum(tf.div(abs_error_i, pred_t_i), tf.constant(1.0))
        rea_i = tf.cond(tf.logical_and(censored, pred_great_empirical), lambda: tf.constant(0.0), lambda: min_rea_i)
        cumulative_rae = tf.add(rea_i, rae)

        # Censored generated t loss
        diff_time = tf.subtract(pred_t_i, emp_t_i)
        # logistic = map((lambda ele: log(1 + exp(ele * -1)) * 1 / log(2)), x)
        # logistic = tf.div(tf.nn.sigmoid(diff_time) + constant, tf.log(2.0))
        # hinge = map(lambda ele: max(0, 1 - ele), x)
        hinge = tf.nn.relu(1.0 - diff_time)
        censored_loss_i = tf.cond(censored, lambda: hinge, lambda: tf.constant(0.0))
        # Sum over all edges and normalize by number of edges
        # L1 recon
        observed_loss_i = tf.cond(censored, lambda: tf.constant(0.0),
                                  lambda: tf.losses.absolute_difference(labels=emp_t_i, predictions=pred_t_i))
        # add observation risk to total risk
        cum_cens_loss = tf.add(cens_recon_loss, censored_loss_i)
        cum_obs_loss = tf.add(obs_recon_loss, observed_loss_i)
        return [i + 1, tf.reshape(updated_likelihood, shape=()), tf.reshape(cumulative_rae, shape=()),
                tf.reshape(cum_cens_loss, shape=()), tf.reshape(cum_obs_loss, shape=())]

    # Relevant Functions
    idx = tf.constant(0, shape=())
    _, total_likelihood, total_rel_abs_err, batch_cens_loss, batch_obs_loss = \
        tf.while_loop(condition, body,
                      loop_vars=[idx,
                                 partial_likelihood,
                                 rel_abs_err,
                                 total_cens_loss,
                                 total_obs_loss],
                      shape_invariants=[
                          idx.get_shape(),
                          partial_likelihood.get_shape(),
                          rel_abs_err.get_shape(),
                          total_cens_loss.get_shape(),
                          total_obs_loss.get_shape()])
    square_batch_size = tf.pow(batch_size, tf.constant(2))

    def normarlize_loss(cost, size):
        cast_size = tf.cast(size, dtype=tf.float32)
        norm = tf.cond(tf.greater(cast_size, tf.constant(0.0)), lambda: tf.div(cost, cast_size), lambda: 0.0)
        return norm

    total_recon_loss = tf.add(normarlize_loss(batch_cens_loss, size=censored),
                              normarlize_loss(batch_obs_loss, size=observed))
    normalized_log_likelihood = normarlize_loss(total_likelihood, size=square_batch_size)
    return normalized_log_likelihood, normarlize_loss(total_rel_abs_err, size=batch_size), total_recon_loss


def l2_loss(scale):
    l2 = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])
    return l2 * scale


def l1_loss(scale):
    l1_regularizer = tf.contrib.layers.l1_regularizer(
        scale=scale, scope=None
    )
    weights = tf.trainable_variables()  # all vars of your graph
    l1 = tf.contrib.layers.apply_regularization(l1_regularizer, weights)
    return l1

# 7. Data Extraction 

In [16]:
def generate_data():
    import pdb
    np.random.seed(31415)
    data_frame = pandas.read_csv('C:\\Users\\raibe\\Desktop\\Thesis Code\\datasets\\mortgage\\WideFormatMortgageAfterRemovingNull.csv', sep=',')
    print("head of data:{}, data shape:{}".format(data_frame.head(), data_frame.shape))
    # x_data = data_frame[['age', 'sex', 'kappa', 'lambda', 'flc.grp', 'creatinine', 'mgus']]
    # Preprocess
    to_drop = ['id','time', 'duration','first_time', 'default_time', 'payoff_time', 'status_time']
    print("missing:{}".format(missing_proportion(data_frame.drop(labels=to_drop, axis=1))))
    one_hot_encoder_list = ['REtype_CO_orig_time', 'REtype_PU_orig_time', 'REtype_SF_orig_time', 'investor_orig_time']
    #data_frame = one_hot_encoder(data_frame, encode=one_hot_encoder_list)
    data_frame = data_frame[['id', 'time', 'duration', 'orig_time', 'first_time', 'mat_time',
           'balance_time', 'LTV_time', 'interest_rate_time', 'hpi_time',
           'gdp_time', 'uer_time', 'balance_orig_time',
           'FICO_orig_time', 'LTV_orig_time', 'Interest_Rate_orig_time',
           'hpi_orig_time', 'default_time', 'payoff_time', 'status_time', 'REtype_CO_orig_time', 'REtype_PU_orig_time',
           'REtype_SF_orig_time', 'investor_orig_time']]
    t_data = data_frame[['duration']]
    e_data = data_frame[['payoff_time']]
    dataset1 = data_frame.drop(labels=to_drop, axis=1)
    #pdb.set_trace()

    ll_n = f_get_Normalization(np.asarray(dataset1.iloc[:,:13]), 'standard')
    ll_p = pandas.DataFrame(ll_n, columns=dataset1.iloc[:,:13].columns)
    ll = pandas.concat([ll_p, dataset1.iloc[:,13:].reindex(ll_p.index)], axis=1)
    dataset = ll

    print("head of dataset data:{}, data shape:{}".format(dataset.head(), dataset.shape))
    encoded_indices = [[13], [14], [15], [16]]
    print("data description:{}".format(dataset.describe()))
    covariates = np.array(dataset.columns.values)
    print("columns:{}".format(covariates))
    x = np.array(dataset).reshape(dataset.shape)
    t = np.array(t_data).reshape(len(t_data))
    e = np.array(e_data).reshape(len(e_data))

    print("x:{}, t:{}, e:{}, len:{}".format(x[0], t[0], e[0], len(t)))
    idx = np.arange(0, x.shape[0])
    print("x_shape:{}".format(x.shape))

    np.random.shuffle(idx)
    x = x[idx]
    t = t[idx]
    e = e[idx]
    end_time = max(t)
    print("end_time:{}".format(end_time))
    print("observed percent:{}".format(sum(e) / len(e)))
    print("shuffled x:{}, t:{}, e:{}, len:{}".format(x[0], t[0], e[0], len(t)))

    num_examples = int(0.80 * len(e))
    print("num_examples:{}".format(num_examples))
    vali_example = int(0.20 * num_examples)
    train_idx = idx[0: num_examples - vali_example]
    valid_idx = idx[num_examples - vali_example: num_examples]
    split = int(len(t) - num_examples)
    test_idx = idx[num_examples: num_examples + split]

    print("test:{}, valid:{}, train:{}, all: {}".format(len(test_idx), len(valid_idx), num_examples,
                                                        len(test_idx) + len(valid_idx) + num_examples))
    # print("test_idx:{}, valid_idx:{},train_idx:{} ".format(test_idx, valid_idx, train_idx))

    imputation_values = get_train_median_mode(x=np.array(x[train_idx]), categorial=encoded_indices)
    print("imputation_values:{}".format(imputation_values))
    preprocessed = {
        'train': formatted_data(x=x, t=t, e=e, idx=train_idx),
        'test': formatted_data(x=x, t=t, e=e, idx=test_idx),
        'valid': formatted_data(x=x, t=t, e=e, idx=valid_idx),
        'end_t': end_time,
        'covariates': covariates,
        'one_hot_indices': encoded_indices,
        'imputation_values': imputation_values
    }
    return preprocessed

    
if __name__ == '__main__':
    generate_data()

head of data:   id  time  duration  orig_time  first_time  mat_time  balance_time  \
0   1    48        24         -7          25       113      29087.21   
1   2    26         2         18          25       138     105654.77   
2   3    29         5         -6          25       114      44378.60   
3   4    60        36         -2          25       119      52686.35   
4   5    27         3         18          25       138      52100.71   

    LTV_time  interest_rate_time  hpi_time  ...  REtype_SF_orig_time  \
0  26.658065               9.200    146.45  ...                    1   
1  65.469851               7.680    225.10  ...                    1   
2  31.459735              11.375    217.37  ...                    1   
3  34.898842              10.500    189.82  ...                    1   
4  66.346343               9.155    222.39  ...                    1   

   investor_orig_time  balance_orig_time  FICO_orig_time  LTV_orig_time  \
0                   0            45000.0      

observed fold:0.526257877363209
observed fold:0.5333249968738277


# 10. risk_network

In [17]:
#raiber: it return the network output after applying the relu fuction to it
def pt_given_x(x, hidden_dim, is_training, batch_norm, batch_size, input_dim, noise_alpha, keep_prob=0.9, reuse=False):
    size = len(hidden_dim)
    with tf.variable_scope('generate_t_given_x', reuse=reuse):
        # Variables
        # first we add the noise to the input, then using the function hidden_mlp_layers_noise we add the noise to each hidden layer 
        noise = uniform(dim=input_dim, batch_size=batch_size) * tf.gather(noise_alpha, 0) # tf.gather give us the value of noise_alpha inside the index 0
        x_plus_noise = tf.concat([x, noise], axis=1) # the layer input shape would be 17 + 17 = 34 (17 is the number of features)
        hidden_x = hidden_mlp_layers_noise(batch_norm=batch_norm, hidden_dim=hidden_dim,
                                           is_training=is_training, keep_prob=keep_prob,
                                           layer_input=x_plus_noise, size=size, batch_size=batch_size,
                                           noise_alpha=noise_alpha) # hidden_x shape (?,100)

        w_t, b_t = create_nn_weights('t', 'encoder', [hidden_x.get_shape().as_list()[1], 1])
        # name:W_encoder_t, shape[100, 1]
        #name:b_encoder_t, shape[1]
        t_mu = mlp_neuron(hidden_x, w_t, b_t, activation=False) # mlp = tf.add(tf.matmul(layer_input, weights), biases)
        # no activation is applied 
        logit = tf.nn.sigmoid(t_mu) 
        return tf.exp(t_mu), logit

def discriminator(pair_one, pair_two, hidden_dim, is_training, batch_norm, scope, keep_prob=1, reuse=False):
    size = len(hidden_dim)
    with tf.variable_scope(scope, reuse=reuse):
        # Variables
        print("scope:{}, pair_one:{}, pair_two:{}".format(scope, pair_one.shape, pair_two.shape))
        # create one structure for the input feature 
        hidden_pair_one = hidden_mlp_layers(batch_norm=batch_norm, hidden_dim=hidden_dim,
                                            is_training=is_training, keep_prob=keep_prob,
                                            layer_input=pair_one, size=size) # shape=(?, 50)

        # this structure is for the time 
        hidden_pair_two = hidden_mlp_layers(batch_norm=batch_norm, hidden_dim=hidden_dim,
                                            is_training=is_training, keep_prob=keep_prob,
                                            layer_input=pair_two, size=size) #shape=(?, 50)
        hidden_pairs = tf.concat([hidden_pair_one, hidden_pair_two], axis=1) #shape=(?, 100)
        print("hidden_pairs:{}".format(hidden_pairs.get_shape()))
        #name:W_discriminator_logits, shape[100, 1]
        #name:b_discriminator_logits, shape[1]
        w_logit, b_logit = create_nn_weights('logits', 'discriminator', [hidden_dim[size - 1] * 2, 1])
        f = mlp_neuron(layer_input=hidden_pairs, weights=w_logit, biases=b_logit, activation=False) 
        logit = tf.nn.sigmoid(f)

    return tf.squeeze(logit), tf.squeeze(f)

def discriminator_one(pair_one, pair_two, hidden_dim, is_training, batch_norm, keep_prob=1, reuse=False):
    score, f = discriminator(pair_one=pair_one, pair_two=pair_two, scope='Discriminator_one', batch_norm=batch_norm,
                             is_training=is_training,
                             keep_prob=keep_prob, reuse=reuse, hidden_dim=hidden_dim)
    return score, f

# 11. DATE-AE

In [18]:
class DATE_AE(object):
    def __init__(self,
                 batch_size,
                 learning_rate,
                 beta1,
                 beta2,
                 require_improvement,
                 seed,
                 num_iterations,
                 hidden_dim,
                 latent_dim,
                 input_dim,
                 num_examples,
                 keep_prob,
                 train_data,
                 valid_data,
                 test_data,
                 end_t,
                 gen_updates,
                 covariates,
                 imputation_values,
                 sample_size,
                 disc_updates,
                 categorical_indices,
                 l2_reg,
                 max_epochs,
                 path_large_data=""
                 ):
        self.max_epochs = max_epochs
        self.hidden_dim = hidden_dim
        self.batch_size = batch_size
        self.disc_updates = disc_updates
        self.gen_updates = gen_updates
        self.latent_dim = latent_dim
        self.path_large_data = path_large_data
        self.seed = seed
        self.require_improvement = require_improvement
        self.num_iterations = num_iterations
        self.learning_rate, self.beta1, self.beta2 = learning_rate, beta1, beta2
        self.l2_reg = l2_reg
        self.log_file = 'model.log'
        logging.basicConfig(filename=self.log_file, filemode='w', level=logging.DEBUG)
        np.random.seed(seed)
        tf.set_random_seed(seed)
        self.batch_norm = True
        self.covariates = covariates
        self.sample_size = sample_size
        self.z_sample_size = 10  # num of z_samples

        self.config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)
        self.config.gpu_options.allow_growth = True
        # self.config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction
        # Load Data
        self.train_x, self.train_t, self.train_e = train_data['x'], train_data['t'], train_data['e']
        self.valid_x, self.valid_t, self.valid_e = valid_data['x'], valid_data['t'], valid_data['e']

        self.test_x, self.test_t, self.test_e = test_data['x'], test_data['t'], test_data['e']
        self.end_t = end_t
        self.keep_prob = keep_prob
        self.input_dim = input_dim
        self.imputation_values = imputation_values
        self.imputation_values = np.zeros(shape=self.input_dim)
        self.num_examples = num_examples
        self.categorical_indices = categorical_indices
        self.continuous_indices = np.setdiff1d(np.arange(input_dim), flatten_nested(categorical_indices))
        print_features = "input_dim:{}, continuous:{}, size:{}, categorical:{}, " \
                         "size{}".format(self.input_dim,
                                         self.continuous_indices,
                                         len(
                                             self.continuous_indices),
                                         self.categorical_indices,
                                         len(
                                             self.categorical_indices))
        print(print_features)
        logging.debug(print_features)
        print_model = "model is DATE_AE"
        print(print_model)
        logging.debug("Imputation values:{}".format(imputation_values))
        logging.debug(print_model)
        self.model = 'DATE_AE'

        self._build_graph()
        self.train_cost, self.train_ci, self.train_t_rae, self.train_gen, self.train_disc, self.train_ranking, \
        self.train_layer_one_recon = [], [], [], [], [], [], []
        self.valid_cost, self.valid_ci, self.valid_t_rae, self.valid_gen, self.valid_disc, self.valid_ranking, \
        self.valid_layer_one_recon = [], [], [], [], [], [], []

    def _build_graph(self):
        self.G = tf.Graph()
        with self.G.as_default():
            self.x = tf.placeholder(tf.float32, shape=[None, self.input_dim], name='x')
            self.e = tf.placeholder(tf.float32, shape=[None], name='e')
            self.t = tf.placeholder(tf.float32, shape=[None], name='t')
            self.t_lab = tf.placeholder(tf.float32, shape=[None], name='t_lab')
            # are used to feed data into our queue
            self.batch_size_tensor = tf.placeholder(tf.int32, shape=[], name='batch_size')
            self.risk_set = tf.placeholder(tf.float32, shape=[None, None])
            self.impute_mask = tf.placeholder(tf.float32, shape=[None, self.input_dim], name='impute_mask')
            self.is_training = tf.placeholder(tf.bool)
            self.noise_dim = len(self.hidden_dim) + 1
            self.noise_alpha = tf.placeholder(tf.float32, shape=[self.noise_dim])

            self._objective()
            self.session = tf.Session(config=self.config)

            self.capacity = 1400
            self.coord = tf.train.Coordinator()
            enqueue_thread = threading.Thread(target=self.enqueue)
            self.queue = tf.RandomShuffleQueue(capacity=self.capacity, dtypes=[tf.float32, tf.float32, tf.float32],
                                               shapes=[[self.input_dim], [], []], min_after_dequeue=self.batch_size)
            # self.queue = tf.FIFOQueue(capacity=self.capacity, dtypes=[tf.float32, tf.float32, tf.float32],
            #                           shapes=[[self.input_dim], [], []])
            self.enqueue_op = self.queue.enqueue_many([self.x, self.t, self.e])
            # enqueue_thread.isDaemon()
            enqueue_thread.start()
            dequeue_op = self.queue.dequeue()
            self.x_batch, self.t_batch, self.e_batch = tf.train.batch(dequeue_op, batch_size=self.batch_size,
                                                                      capacity=self.capacity)
            self.threads = tf.train.start_queue_runners(coord=self.coord, sess=self.session)

            self.saver = tf.train.Saver()
            self.merged = tf.summary.merge_all()
            self.current_dir = os.getcwd()
            self.save_path = "C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\summaries\\mort_p\\{0}_model".format(self.model)
            self.train_writer = tf.summary.FileWriter(self.save_path, self.session.graph)

    def _objective(self):
        self.num_batches = self.num_examples / self.batch_size
        logging.debug("num batches:{}, batch_size:{} epochs:{}".format(self.num_batches, self.batch_size,
                                                                       int(self.num_iterations / self.num_batches)))
        self._build_model()
        self.reg_loss = l2_loss(self.l2_reg) + l1_loss(self.l2_reg)
        self.cost = self.t_regularization_loss + self.disc_one_loss + self.disc_two_loss + self.gen_one_loss + \
                    self.gen_two_loss + self.layer_one_recon
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.beta1,
                                           beta2=self.beta2)

        dvars1 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "Discriminator_one")
        dvars2 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "Discriminator_two")
        genvars1 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "generate_t_given_x")
        genvars2 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "generate_t_given_z")

        self.disc_solver = optimizer.minimize(self.disc_one_loss + self.disc_two_loss, var_list=dvars1 + dvars2)
        self.gen_solver = optimizer.minimize(
            self.gen_one_loss + self.gen_two_loss + self.t_regularization_loss + self.layer_one_recon,
            var_list=genvars1 + genvars2)

    def _build_model(self):
        self._denoising_date()
        self._risk_date()

    @staticmethod
    def log(x):
        return tf.log(x + 1e-8)

    def _risk_date(self):
        def expand_t_dim(t):
            return tf.expand_dims(t, axis=1)

        indices_lab = tf.where(tf.equal(tf.constant(1.0, dtype=tf.float32), self.e))
        z_lab = tf.squeeze(tf.gather(self.z_real, indices_lab), axis=[1])
        t_lab_exp = expand_t_dim(self.t_lab)

        t_gen = pt_given_z(z=self.z_real, hidden_dim=self.hidden_dim, is_training=self.is_training,
                           batch_norm=self.batch_norm, keep_prob=self.keep_prob, batch_size=self.batch_size_tensor,
                           latent_dim=self.latent_dim, noise_alpha=self.noise_alpha)

        # Discriminator B
        d_two_real, f_two_real = discriminator_two(pair_one=z_lab, pair_two=t_lab_exp, hidden_dim=self.hidden_dim,
                                                   is_training=self.is_training, batch_norm=self.batch_norm,
                                                   keep_prob=self.keep_prob)  # (z_nc, t_nc)
        d_two_fake, f_two_fake = discriminator_two(pair_one=self.z_real, pair_two=t_gen, hidden_dim=self.hidden_dim,
                                                   is_training=self.is_training, batch_norm=self.batch_norm,
                                                   reuse=True, keep_prob=self.keep_prob)  # (z, t_gen)

        # Discriminator loss
        self.disc_two_loss = -tf.reduce_mean(self.log(d_two_real)) - tf.reduce_mean(self.log(1 - d_two_fake))

        # Generator loss
        self.gen_two_loss = tf.reduce_mean(f_two_real) - tf.reduce_mean(f_two_fake)
        self.disc_logit = d_two_fake # added b raiber 
        self.disc_f = f_two_fake # added by raiber 
        self.predicted_time = tf.squeeze(t_gen)
        self.ranking_partial_lik, self.total_rae, self.total_t_recon_loss = \
            batch_metrics(e=self.e,
                          risk_set=self.risk_set,
                          predicted=self.predicted_time,
                          batch_size=self.batch_size_tensor,
                          empirical=self.t)

        # self.t_regularization_loss = tf.add(self.ranking_partial_lik, self.total_t_recon_loss)
        self.t_regularization_loss = self.total_t_recon_loss
        self.t_mse = tf.losses.mean_squared_error(labels=self.t_lab,
                                                  predictions=tf.gather(self.predicted_time, indices_lab))

    def _denoising_date(self):
        self.z_real = generate_z_given_x(latent_dim=self.latent_dim,
                                         is_training=self.is_training,
                                         batch_norm=self.batch_norm,
                                         input_dim=self.input_dim, batch_size=self.batch_size_tensor,
                                         hidden_dim=self.hidden_dim, x=self.impute_mask, keep_prob=self.keep_prob,
                                         reuse=True, sample_size=self.z_sample_size)

        z_ones = np.ones(shape=self.latent_dim, dtype=np.float32)
        print("z_ones:{}".format(z_ones.shape))

        z_fake = tf.distributions.Uniform(low=-z_ones, high=z_ones).sample(sample_shape=[self.batch_size_tensor])
        x_fake = generate_x_given_z(z=z_fake, latent_dim=self.latent_dim,
                                    is_training=self.is_training, batch_norm=self.batch_norm,
                                    hidden_dim=self.hidden_dim, keep_prob=self.keep_prob,
                                    batch_size=self.batch_size_tensor, input_dim=self.input_dim)

        self.x_recon = generate_x_given_z(z=self.z_real, latent_dim=self.latent_dim,
                                          is_training=self.is_training, batch_norm=self.batch_norm,
                                          hidden_dim=self.hidden_dim, reuse=True, keep_prob=self.keep_prob,
                                          batch_size=self.batch_size_tensor, input_dim=self.input_dim)

        z_rec = generate_z_given_x(x=x_fake, latent_dim=self.latent_dim,
                                   is_training=self.is_training,
                                   batch_norm=self.batch_norm,
                                   input_dim=self.input_dim, batch_size=self.batch_size_tensor,
                                   hidden_dim=self.hidden_dim, reuse=True, keep_prob=self.keep_prob,
                                   sample_size=self.z_sample_size)
        # Reconstruction Loss

        self.x_recon_loss = x_reconstruction(x_recon=self.x_recon, x=self.x,
                                             categorical_indices=self.categorical_indices,
                                             continuous_indices=self.continuous_indices,
                                             batch_size=self.batch_size_tensor)

        self.z_recon_loss = tf.losses.mean_squared_error(z_fake, z_rec)
        self.layer_one_recon = tf.add(self.x_recon_loss, self.z_recon_loss)

        d_one_real, f_one_real = discriminator_one(pair_one=self.impute_mask, pair_two=self.z_real,
                                                   hidden_dim=self.hidden_dim,
                                                   is_training=self.is_training, batch_norm=self.batch_norm,
                                                   keep_prob=self.keep_prob)  # real
        d_one_fake, f_one_fake = discriminator_one(pair_one=x_fake, pair_two=z_fake, hidden_dim=self.hidden_dim,
                                                   is_training=self.is_training, batch_norm=self.batch_norm,
                                                   reuse=True, keep_prob=self.keep_prob)  # fake

        self.disc_one_loss = -tf.reduce_mean(self.log(d_one_real)) - tf.reduce_mean(self.log(1 - d_one_fake))

        # Generator loss
        self.gen_one_loss = tf.reduce_mean(f_one_real) - tf.reduce_mean(f_one_fake)

    def predict_concordance_index(self, x, t, e, outcomes=None):
        input_size = x.shape[0]
        i = 0
        num_batches = input_size / self.batch_size
        predicted_time = np.zeros(shape=input_size, dtype=np.int)
        total_ranking = 0.0
        total_rae = 0.0
        total_cost = 0.0
        total_gen_loss = 0.0
        total_disc_loss = 0.0
        total_layer_one_recon = 0.0
        total_t_reg_loss = 0.0
        total_reg = 0.0
        total_mse = 0.0
        while i < input_size:
            # The ending index for the next batch is denoted j.
            j = min(i + self.batch_size, input_size)
            feed_dict = self.batch_feed_dict(e=e, i=i, j=j, t=t, x=x, outcomes=outcomes)
            cost, ranking, gen_loss, rae, reg, disc_loss, layer_one_recon, t_reg_loss, t_mse = self.session.run(
                [self.cost, self.ranking_partial_lik, self.gen_one_loss, self.total_rae,
                 self.reg_loss,
                 self.disc_one_loss, self.layer_one_recon, self.t_regularization_loss, self.t_mse],
                feed_dict=feed_dict)

            temp_pred_time = []
            for p in range(self.sample_size):
                gen_time = self.session.run(self.predicted_time, feed_dict=feed_dict)
                temp_pred_time.append(gen_time)

            temp_pred_time = np.array(temp_pred_time)
            # print("temp_pred_time:{}".format(temp_pred_time.shape))
            predicted_time[i:j] = np.median(temp_pred_time, axis=0)

            total_ranking += ranking
            total_cost += cost
            total_rae += rae
            total_gen_loss += gen_loss
            total_reg += reg
            total_layer_one_recon += layer_one_recon
            total_disc_loss += disc_loss
            total_t_reg_loss += t_reg_loss
            total_mse += t_mse
            i = j

        predicted_event_times = predicted_time.reshape(input_size)
        #RAIBER NEW CHANGE
        ci_index = concordance_index(event_times=t, predicted_scores=predicted_event_times.tolist(),
                                    event_observed=e)
        
        #ci_index = 0
        def batch_average(total):
            return total / num_batches

        return ci_index, batch_average(total_cost), batch_average(total_rae), batch_average(
            total_ranking), batch_average(
            total_gen_loss), batch_average(total_reg), batch_average(total_disc_loss), batch_average(
            total_layer_one_recon), batch_average(total_t_reg_loss), batch_average(total_mse)

    def batch_feed_dict(self, e, i, j, t, x, outcomes):
        batch_x = x[i:j, :]
        batch_t = t[i:j]
        batch_risk = risk_set(batch_t)
        batch_impute_mask = get_missing_mask(batch_x, self.imputation_values)
        batch_e = e[i:j]
        idx_observed = batch_e == 1
        feed_dict = {self.x: batch_x,
                     self.impute_mask: batch_impute_mask,
                     self.t: batch_t,
                     self.t_lab: batch_t[idx_observed],
                     self.e: batch_e,
                     self.risk_set: batch_risk,
                     self.batch_size_tensor: len(batch_t),
                     self.is_training: False,
                     self.noise_alpha: np.ones(shape=self.noise_dim)}
        # TODO replace with abstract methods

        updated_feed_dic = self.outcomes_function(idx=i, j=j, feed_dict=feed_dict, outcomes=outcomes)
        return updated_feed_dic

    def outcomes_function(self, idx, j, feed_dict, outcomes):
        return feed_dict

    def train_neural_network(self):
        train_print = "Training {0} Model:".format(self.model)
        params_print = "Parameters:, l2_reg:{}, learning_rate:{}," \
                       " momentum: beta1={} beta2={}, batch_size:{}, batch_norm:{}," \
                       " hidden_dim:{}, latent_dim:{}, num_of_batches:{}, keep_prob:{}, disc_update:{}" \
            .format(self.l2_reg, self.learning_rate, self.beta1, self.beta2, self.batch_size,
                    self.batch_norm, self.hidden_dim, self.latent_dim, self.num_batches, self.keep_prob,
                    self.disc_updates)
        print(train_print)
        print(params_print)
        logging.debug(train_print)
        logging.debug(params_print)
        self.session.run(tf.global_variables_initializer())

        best_ci = 0
        best_t_reg = np.inf
        best_validation_epoch = 0
        last_improvement = 0

        start_time = time.time()
        epochs = 0
        show_all_variables()
        j = 0

        for i in range(self.num_iterations):
            # Batch Training
            run_options = tf.RunOptions(timeout_in_ms=4000)
            x_batch, t_batch, e_batch = self.session.run([self.x_batch, self.t_batch, self.e_batch],
                                                         options=run_options)
            risk_batch = risk_set(data_t=t_batch)
            batch_impute_mask = get_missing_mask(x_batch, self.imputation_values)
            batch_size = len(t_batch)
            idx_observed = e_batch == 1
            # TODO simplify batch processing
            feed_dict_train = {self.x: x_batch,
                               self.impute_mask: batch_impute_mask,
                               self.t: t_batch,
                               self.t_lab: t_batch[idx_observed],
                               self.e: e_batch,
                               self.risk_set: risk_batch, self.batch_size_tensor: batch_size, self.is_training: True,
                               self.noise_alpha: np.ones(shape=self.noise_dim)}
            for k in range(self.disc_updates):
                _ = self.session.run([self.disc_solver], feed_dict=feed_dict_train)

            for m in range(self.gen_updates):
                _ = self.session.run([self.gen_solver], feed_dict=feed_dict_train)

            summary, train_time, train_cost, train_ranking, train_rae, train_reg, train_gen, train_layer_one_recon, \
            train_t_reg, train_t_mse, train_disc = self.session.run(
                [self.merged, self.predicted_time, self.cost, self.ranking_partial_lik, self.total_rae,
                 self.reg_loss, self.gen_one_loss, self.layer_one_recon, self.t_regularization_loss, self.t_mse,
                 self.disc_one_loss],
                feed_dict=feed_dict_train)
            try:
                #RAIBER NEW CHANGE
                #train_ci = 0.0
                train_ci = concordance_index(event_times=t_batch,
                                             predicted_scores=train_time.reshape(t_batch.shape),
                                             event_observed=e_batch)
            except IndexError:
                train_ci = 0.0
                print("C-Index IndexError")

            tf.verify_tensor_all_finite(train_cost, "Training Cost has Nan or Infinite")
            if j >= self.num_examples:
                epochs += 1
                is_epoch = True
                # idx = 0
                j = 0
            else:
                # idx = j
                j += self.batch_size
                is_epoch = False

            if i % 100 == 0:
                train_print = "it:{}, trainCI:{}, train_ranking:{}, train_RAE:{},  train_Gen:{}, train_Disc:{}, " \
                              "train_reg:{}, train_t_reg:{}, train_t_mse:{}, train_layer_one_recon:{}".format(
                    i, train_ci, train_ranking, train_rae, train_gen, train_disc, train_reg, train_t_reg, train_t_mse,
                    train_layer_one_recon)
                print(train_print)
                logging.debug(train_print)

            if is_epoch or (i == (self.num_iterations - 1)):
                improved_str = ''
                # Calculate  Vaid CI the CI
                self.train_ci.append(train_ci)
                self.train_cost.append(train_cost)
                self.train_t_rae.append(train_rae)
                self.train_gen.append(train_gen)
                self.train_disc.append(train_disc)
                self.train_ranking.append(train_ranking)
                self.train_layer_one_recon.append(train_layer_one_recon)

                self.train_writer.add_summary(summary, i)
                valid_ci, valid_cost, valid_rae, valid_ranking, valid_gen, valid_reg, valid_disc, \
                valid_layer_one_recon, valid_t_reg, valid_t_mse = \
                    self.predict_concordance_index(
                        x=self.valid_x,
                        e=self.valid_e,
                        t=self.valid_t)
                self.valid_cost.append(valid_cost)
                self.valid_ci.append(valid_ci)
                self.valid_t_rae.append(valid_rae)
                self.valid_gen.append(valid_gen)
                self.valid_disc.append(valid_disc)
                self.valid_ranking.append(valid_ranking)
                self.valid_layer_one_recon.append(valid_layer_one_recon)
                tf.verify_tensor_all_finite(valid_cost, "Validation Cost has Nan or Infinite")

                if valid_t_reg < best_t_reg:
                    self.saver.save(sess=self.session, save_path=self.save_path)
                    best_validation_epoch = epochs
                    best_t_reg = valid_t_reg
                    last_improvement = i
                    improved_str = '*'
                    # Save  Best Perfoming all variables of the TensorFlow graph to file.
                # update best validation accuracy
                optimization_print = "Iteration: {} epochs:{}, Training: RAE:{}, Loss: {}," \
                                     " Ranking:{}, Reg:{}, Gen:{}, Disc:{}, Recon_One:{}, T_Reg:{},T_MSE:{},  CI:{}" \
                                     " Validation RAE:{} Loss:{}, Ranking:{}, Reg:{}, Gen:{}, Disc:{}, " \
                                     "Recon_One:{}, T_Reg:{}, T_MSE:{}, CI:{}, {}" \
                    .format(i + 1, epochs, train_rae, train_cost, train_ranking, train_reg, train_gen,
                            train_disc, train_layer_one_recon, train_t_reg, train_t_mse,
                            train_ci, valid_rae, valid_cost, valid_ranking, valid_reg, valid_gen, valid_disc,
                            valid_layer_one_recon, valid_t_reg, valid_t_mse, valid_ci, improved_str)

                print(optimization_print)
                logging.debug(optimization_print)
                if i - last_improvement > self.require_improvement or math.isnan(
                        train_cost) or epochs >= self.max_epochs:
                    # if i - last_improvement > self.require_improvement:
                    print("No improvement found in a while, stopping optimization.")
                    # Break out from the for-loop.
                    break
        # Ending time.

        end_time = time.time()
        time_dif = end_time - start_time
        time_dif_print = "Time usage: " + str(timedelta(seconds=int(round(time_dif))))
        print(time_dif_print)
        logging.debug(time_dif_print)
        # shutdown everything to avoid zombies
        self.session.run(self.queue.close(cancel_pending_enqueues=True))
        self.coord.request_stop()
        self.coord.join(self.threads)
        return best_validation_epoch, epochs

    def train_test(self, train=True):

        def get_dict(x, t, e):
            observed_idx = e == 1
            feed_dict = {self.x: x,
                         self.impute_mask: get_missing_mask(x, self.imputation_values),
                         self.t: t,
                         self.t_lab: t[observed_idx],
                         self.e: e,
                         self.batch_size_tensor: len(t),
                         self.is_training: False, self.noise_alpha: np.ones(shape=self.noise_dim)}
            return {'feed_dict': feed_dict, 'outcomes': {}}

        session_dict = {'Test': get_dict(x=self.test_x, t=self.test_t, e=self.test_e),
                        'Train': get_dict(x=self.train_x, t=self.train_t, e=self.train_e),
                        'Valid': get_dict(x=self.valid_x, t=self.valid_t, e=self.valid_e)}

        if train:
            best_epoch, epochs = self.train_neural_network()
            self.time_related_metrics(best_epoch, epochs, session_dict=session_dict)
        else:
            self.generate_statistics(data_x=self.test_x, data_e=self.test_e, data_t=self.test_t, name='Test',
                                     session_dict=session_dict['Test'])

        self.session.close()

    def time_related_metrics(self, best_epoch, epochs, session_dict):
        #plot_cost(training=self.train_cost, validation=self.valid_cost, model=self.model, name="Cost",
        #          epochs=epochs,
        #          best_epoch=best_epoch)
        #plot_cost(training=self.train_ci, validation=self.valid_ci, model=self.model, name="CI",
        #          epochs=epochs,
        #          best_epoch=best_epoch)
        #plot_cost(training=self.train_t_rae, validation=self.valid_t_rae, model=self.model, name="RAE",
        #          epochs=epochs,
        #          best_epoch=best_epoch)
        #plot_cost(training=self.train_ranking, validation=self.valid_ranking, model=self.model, name="Rank",
        #          epochs=epochs,
        #          best_epoch=best_epoch)
        #plot_cost(training=self.train_gen, validation=self.valid_gen, model=self.model, name="Gen_Loss",
        #          epochs=epochs, best_epoch=best_epoch)

        #plot_cost(training=self.train_disc, validation=self.valid_disc, model=self.model, name="Disc_Loss",
        #          epochs=epochs, best_epoch=best_epoch)

        #plot_cost(training=self.train_layer_one_recon, validation=self.valid_layer_one_recon, model=self.model,
        #          name="Recon",
        #         epochs=epochs, best_epoch=best_epoch)
         # TEST
        self.generate_statistics(data_x=self.test_x, data_e=self.test_e, data_t=self.test_t, name='Test',
                                 session_dict=session_dict['Test'], t_train_R=self.train_t, y_train_R=self.train_e)

        # VALID
        self.generate_statistics(data_x=self.valid_x, data_e=self.valid_e, data_t=self.valid_t, name='Valid',
                                 session_dict=session_dict['Valid'], t_train_R=self.train_t, y_train_R=self.train_e)
        # TRAIN
        self.generate_statistics(data_x=self.train_x, data_e=self.train_e, data_t=self.train_t, name='Train',
                                 session_dict=session_dict['Train'], t_train_R=self.train_t, y_train_R=self.train_e)
      

    def generate_statistics(self, data_x, data_e, data_t, name, session_dict, t_train_R, y_train_R, save=True):
        self.saver.restore(sess=self.session, save_path=self.save_path)
        ci, cost, rae, ranking, gen, reg, disc, layer_one_recon, t_reg, t_mse = \
            self.predict_concordance_index(x=data_x,
                                           e=data_e,
                                           t=data_t,
                                           outcomes=
                                           session_dict[
                                               'outcomes'])

        observed_idx = self.extract_observed_death(name=name, observed_e=data_e, observed_t=data_t, save=save)

        median_predicted_time, median_disc_prob, median_disc_score, median_prob_t_gen = self.median_predict_time(session_dict)

        if name == 'Test':
            self.save_time_samples(x=data_x[observed_idx], e=data_e[observed_idx],
                                   t=data_t[observed_idx], name='obs_samples_predicted', cens=False)

            self.save_time_samples(x=data_x[np.logical_not(observed_idx)], e=data_e[np.logical_not(observed_idx)],
                                   t=data_t[np.logical_not(observed_idx)], name='cen_samples_predicted', cens=True)
            
            np.save('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\matrix\\mort_p\\{}_predicted_time'.format(name), median_predicted_time)
            np.save('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\matrix\\mort_p\\{}_disc_prob'.format(name), median_disc_prob)
            np.save('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\matrix\\mort_p\\{}_disc_score'.format(name), median_disc_score)
            np.save('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\matrix\\mort_p\\{}_prob_t_gen'.format(name), median_prob_t_gen)
            np.save('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\matrix\\mort_p\\{}_empirical_time'.format(name), data_t)
            np.save('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\matrix\\mort_p\\{}_data_e'.format(name), data_e)
            np.save('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\matrix\\mort_p\\{}_t_train'.format(name), t_train_R)
            np.save('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\matrix\\mort_p\\{}_y_train'.format(name), y_train_R)
        
        observed_empirical = data_t[observed_idx]
        observed_predicted = median_predicted_time[observed_idx]
        #RAIBER NEW CHANGE
        observed_ci = concordance_index(event_times=observed_empirical, predicted_scores=observed_predicted,
                                        event_observed=data_e[observed_idx])
        observed_ci = 0

        corr = spearmanr(observed_empirical, observed_predicted)
        results = ":{} RAE:{}, Loss:{}, Gen:{}, Disc:{}, Reg:{}, Ranking{}, Recon:{}, T_Reg:{},T_MSE:{}," \
                  " CI:{}, Observed: CI:{}, " \
                  "Correlation:{}".format(name, rae, cost, gen, disc, reg, ranking, layer_one_recon, t_reg, t_mse, ci,
                                          observed_ci, corr)
        logging.debug(results)
        print(results)

    def median_predict_time(self, session_dict):
        #pdb.set_trace()
        predicted_time = []
        disc_prob = []
        disc_score = []
        prob_t_gen = []
        for p in range(self.sample_size):
            gen_time, disc_prob1, disc_score1, prob_t_gen1 = self.session.run([self.predicted_time, self.disc_logit, self.disc_f, self.probability_t_gen], feed_dict=session_dict['feed_dict'])
            predicted_time.append(gen_time)
            disc_prob.append(disc_prob1)
            disc_score.append(disc_score1)
            prob_t_gen.append(prob_t_gen1)
        predicted_time = np.array(predicted_time)
        disc_prob = np.array(disc_prob)
        disc_score = np.array(disc_score)
        prob_t_gen = np.array(prob_t_gen)
        # print("predicted_time_shape:{}".format(predicted_time.shape))
        return np.median(predicted_time, axis=0), np.median(disc_prob, axis=0), np.median(disc_score, axis=0), np.median(prob_t_gen, axis=0)

    def save_time_samples(self, x, t, e, name, cens=False):
        predicted_time = self.generate_time_samples(e, x)
        plot_predicted_distribution(predicted=predicted_time, empirical=t, data='Test_' + name, cens=cens)
        return

    def generate_time_samples(self, e, x):
        # observed = e == 1
        feed_dict = {self.x: x,
                     self.impute_mask: get_missing_mask(x, self.imputation_values),
                     # self.t: t,
                     # self.t_lab: t[observed],
                     self.e: e,
                     # self.risk_set: risk_set(t),
                     self.batch_size_tensor: len(x),
                     self.is_training: False, self.noise_alpha: np.ones(shape=self.noise_dim)}
        predicted_time = []
        for p in range(self.sample_size):
            gen_time = self.session.run(self.predicted_time, feed_dict=feed_dict)
            predicted_time.append(gen_time)
        predicted_time = np.array(predicted_time)
        return predicted_time

    def enqueue(self):
        """ Iterates over our data puts small junks into our queue."""
        # TensorFlow Input Pipelines for Large Data Sets
        # ischlag.github.io
        # http://ischlag.github.io/2016/11/07/tensorflow-input-pipeline-for-large-datasets/
        # http://web.stanford.edu/class/cs20si/lectures/slides_09.pdf
        under = 0
        max = len(self.train_x)
        try:
            while not self.coord.should_stop():
                # print("starting to write into queue")
                upper = under + self.capacity
                # print("try to enqueue ", under, " to ", upper)
                if upper <= max:
                    curr_x = self.train_x[under:upper]
                    curr_t = self.train_t[under:upper]
                    curr_e = self.train_e[under:upper]
                    under = upper
                else:
                    rest = upper - max
                    curr_x = np.concatenate((self.train_x[under:max], self.train_x[0:rest]))
                    curr_t = np.concatenate((self.train_t[under:max], self.train_t[0:rest]))
                    curr_e = np.concatenate((self.train_e[under:max], self.train_e[0:rest]))
                    under = rest

                self.session.run(self.enqueue_op,
                                 feed_dict={self.x: curr_x, self.t: curr_t, self.e: curr_e})
        except tf.errors.CancelledError:
            print("finished enqueueing")

    @staticmethod
    def extract_observed_death(name, observed_e, observed_t, save=False):
        idx_observed = observed_e == 1
        observed_death = observed_t[idx_observed]
        if save:
            death_observed_print = "{} observed_death:{}, percentage:{}".format(name, observed_death.shape, float(
                len(observed_death) / len(observed_t)))
            logging.debug(death_observed_print)
            print(death_observed_print)
        return idx_observed


# 12. DATE

In [19]:
class DATE(DATE_AE):
    def __init__(self,
                 batch_size,
                 learning_rate,
                 beta1,
                 beta2,
                 require_improvement,
                 seed,
                 num_iterations,
                 hidden_dim,
                 latent_dim,
                 input_dim,
                 num_examples,
                 keep_prob,
                 train_data,
                 valid_data,
                 test_data,
                 end_t,
                 covariates,
                 imputation_values,
                 sample_size,
                 disc_updates,
                 categorical_indices,
                 l2_reg,
                 gen_updates,
                 max_epochs,
                 path_large_data=""
                 ):
        DATE_AE.__init__(self, batch_size=batch_size,
                         learning_rate=learning_rate,
                         beta1=beta1,
                         beta2=beta2,
                         require_improvement=require_improvement,
                         num_iterations=num_iterations, seed=seed,
                         l2_reg=l2_reg,
                         hidden_dim=hidden_dim,
                         train_data=train_data, test_data=test_data, valid_data=valid_data,
                         input_dim=input_dim,
                         num_examples=num_examples, keep_prob=keep_prob,
                         latent_dim=latent_dim, end_t=end_t,
                         path_large_data=path_large_data,
                         covariates=covariates,
                         categorical_indices=categorical_indices,
                         disc_updates=disc_updates,
                         sample_size=sample_size, imputation_values=imputation_values,
                         max_epochs=max_epochs, gen_updates=gen_updates)

        print_model = "model is DATE"
        print(print_model)
        logging.debug(print_model)
        self.model = 'DATE'
        self.imputation_values = imputation_values

    def _objective(self):
        self.num_batches = self.num_examples / self.batch_size
        logging.debug("num batches:{}, batch_size:{} epochs:{}".format(self.num_batches, self.batch_size,
                                                                       int(self.num_iterations / self.num_batches)))
        self._build_model()
        self.reg_loss = l2_loss(self.l2_reg) + l1_loss(self.l2_reg)
        self.layer_one_recon = tf.constant(0.0)
        self.cost = self.t_regularization_loss + self.disc_one_loss + self.gen_one_loss
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=self.beta1,
                                           beta2=self.beta2)

        dvars1 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "Discriminator_one")
        genvars1 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "generate_t_given_x")
        self.disc_solver = optimizer.minimize(self.disc_one_loss, var_list=dvars1)
        self.gen_solver = optimizer.minimize(self.gen_one_loss + self.t_regularization_loss, var_list=genvars1)

    def _build_model(self):
        self._risk_date()

    @staticmethod
    def log(x):
        return tf.log(x + 1e-8)

    def _risk_date(self):
        def expand_t_dim(t):
            return tf.expand_dims(t, axis=1)

        indices_lab = tf.where(tf.equal(tf.constant(1.0, dtype=tf.float32), self.e))
        x_lab = tf.squeeze(tf.gather(self.x, indices_lab), axis=[1])
        t_lab_exp = expand_t_dim(self.t_lab)

        t_gen, prob_t_gen = pt_given_x(x=self.x, hidden_dim=self.hidden_dim, is_training=self.is_training,
                           batch_norm=self.batch_norm, keep_prob=self.keep_prob, batch_size=self.batch_size_tensor,
                           input_dim=self.input_dim, noise_alpha=self.noise_alpha)

        # Discriminator B
        d_one_real, f_one_real = discriminator_one(pair_one=x_lab, pair_two=t_lab_exp, hidden_dim=self.hidden_dim,
                                                   is_training=self.is_training, batch_norm=self.batch_norm,
                                                   keep_prob=self.keep_prob)  # (x_nc, t_nc)
        d_one_fake, f_one_fake = discriminator_one(pair_one=self.x, pair_two=t_gen, hidden_dim=self.hidden_dim,
                                                   is_training=self.is_training, batch_norm=self.batch_norm,
                                                   reuse=True, keep_prob=self.keep_prob)  # (x, t_gen)

        # Discriminator loss
        self.disc_one_loss = -tf.reduce_mean(self.log(d_one_real)) - tf.reduce_mean(self.log(1 - d_one_fake))

        # Generator loss
        self.gen_one_loss = tf.reduce_mean(f_one_real) - tf.reduce_mean(f_one_fake)
        self.disc_logit = d_one_fake # added b raiber 
        self.disc_f = f_one_fake # added by raiber
        self.probability_t_gen = tf.squeeze(prob_t_gen) # added by raiber
        self.predicted_time = tf.squeeze(t_gen)
        self.ranking_partial_lik, self.total_rae, self.total_t_recon_loss = \
            batch_metrics(e=self.e,
                          risk_set=self.risk_set,
                          predicted=self.predicted_time,
                          batch_size=self.batch_size_tensor,
                          empirical=self.t)

        self.t_regularization_loss = self.total_t_recon_loss
        self.t_mse = tf.losses.mean_squared_error(labels=self.t_lab,
                                                  predictions=tf.gather(self.predicted_time, indices_lab))

# 13. train 

In [20]:
import os
import pprint
import sys

if __name__ == '__main__':
    
    r_epochs = 600
    
    # Two date models to choose
    simple = True
    if simple:
        model = DATE
    else:
        model = DATE_AE

    
    data_set = generate_data()
    train_data, valid_data, test_data, end_t, covariates, one_hot_indices, imputation_values \
        = data_set['train'], \
          data_set['valid'], \
          data_set['test'], \
          data_set['end_t'], \
          data_set['covariates'], \
          data_set[
              'one_hot_indices'], \
          data_set[
              'imputation_values']

    print("imputation_values:{}, one_hot_indices:{}".format(imputation_values, one_hot_indices))
    print("end_t:{}".format(end_t))
    train = {'x': train_data['x'], 'e': train_data['e'], 't': train_data['t']}
    valid = {'x': valid_data['x'], 'e': valid_data['e'], 't': valid_data['t']}
    test = {'x': test_data['x'], 'e': test_data['e'], 't': test_data['t']}

    perfomance_record = []

    date = model(batch_size=350,
                 learning_rate=3e-4,
                 beta1=0.9,
                 beta2=0.999,
                 require_improvement=10000,
                 num_iterations=40000, seed=31415,
                 l2_reg=0.001,
                 hidden_dim=[50, 50],
                 train_data=train, test_data=test, valid_data=valid,
                 input_dim=train['x'].shape[1],
                 num_examples=train['x'].shape[0], keep_prob=0.8,
                 latent_dim=50, end_t=end_t,
                 path_large_data='C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE',
                 covariates=covariates,
                 categorical_indices=one_hot_indices,
                 disc_updates=1,
                 sample_size=200, imputation_values=imputation_values,
                 max_epochs=r_epochs,  gen_updates=2)

    with date.session:
        date.train_test()

head of data:   id  time  duration  orig_time  first_time  mat_time  balance_time  \
0   1    48        24         -7          25       113      29087.21   
1   2    26         2         18          25       138     105654.77   
2   3    29         5         -6          25       114      44378.60   
3   4    60        36         -2          25       119      52686.35   
4   5    27         3         18          25       138      52100.71   

    LTV_time  interest_rate_time  hpi_time  ...  REtype_SF_orig_time  \
0  26.658065               9.200    146.45  ...                    1   
1  65.469851               7.680    225.10  ...                    1   
2  31.459735              11.375    217.37  ...                    1   
3  34.898842              10.500    189.82  ...                    1   
4  66.346343               9.155    222.39  ...                    1   

   investor_orig_time  balance_orig_time  FICO_orig_time  LTV_orig_time  \
0                   0            45000.0      

Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.
layer input shape:34
name:W_decoder_h0_z, shape[34, 50]
name:b_decoder_h0_z, shape[50]
batch inputs (?, 50), shape for var50
batch mean (50,), var (50,)
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
layer input shape:100
name:W_decoder_h1_z, shape[100, 50]
name:b_decoder_h1_z, shape[50]
batch inputs (?, 50), shape for var50
batch mean (50,), var (50,)
name:W_encoder_t, shape[100, 1]
name:b_encoder_t, shape[1]
scope:Discriminator_one, pair_one:(?, 17), pai

it:0, trainCI:0.5122450566252883, train_ranking:106.49032592773438, train_RAE:0.9140235185623169,  train_Gen:0.9928682446479797, train_Disc:1.2529733180999756, train_reg:3.214430093765259, train_t_reg:25.984174728393555, train_t_mse:141.7210235595703, train_layer_one_recon:0.0
Iteration: 93 epochs:1, Training: RAE:0.48607322573661804, Loss: 18.12088966369629, Ranking:106.60243225097656, Reg:3.236544609069824, Gen:4.725134372711182, Disc:0.28422820568084717, Recon_One:0.0, T_Reg:13.111526489257812,T_MSE:132.01254272460938,  CI:0.6663797032460627 Validation RAE:0.48728336375013387 Loss:17.496787289820627, Ranking:105.12426733389279, Reg:3.2579947609118527, Gen:4.149912470084988, Disc:0.28932935817846583, Recon_One:0.0, T_Reg:13.057545541121957, T_MSE:130.3351924257516, CI:0.7509217719264398, *
it:100, trainCI:0.6833419161823487, train_ranking:110.28694152832031, train_RAE:0.45404326915740967,  train_Gen:4.569278240203857, train_Disc:0.26786714792251587, train_reg:3.2377512454986572, trai

Iteration: 1023 epochs:11, Training: RAE:0.3387593924999237, Loss: 20.125991821289062, Ranking:99.84967041015625, Reg:3.257138252258301, Gen:12.144979476928711, Disc:0.007662579417228699, Recon_One:0.0, T_Reg:7.973349571228027,T_MSE:191.11106872558594,  CI:0.8184659773984131 Validation RAE:0.3125705043867378 Loss:18.589142475841314, Ranking:103.56528222657776, Reg:3.2787248881679782, Gen:11.627905372084417, Disc:0.007803957726854383, Recon_One:0.0, T_Reg:6.953433014026683, T_MSE:1964.0520249662868, CI:0.8502276911394786, *
it:1100, trainCI:0.798614898025957, train_ranking:97.33397674560547, train_RAE:0.34362050890922546,  train_Gen:12.214051246643066, train_Disc:0.007257615216076374, train_reg:3.256826877593994, train_t_reg:7.433902740478516, train_t_mse:135.36563110351562, train_layer_one_recon:0.0
Iteration: 1116 epochs:12, Training: RAE:0.3264888823032379, Loss: 19.0360050201416, Ranking:113.62300872802734, Reg:3.2570481300354004, Gen:11.773918151855469, Disc:0.009057383984327316, R

it:2000, trainCI:0.8593299134337928, train_ranking:98.90995788574219, train_RAE:0.2555827796459198,  train_Gen:14.147375106811523, train_Disc:0.0029252655804157257, train_reg:3.250941276550293, train_t_reg:4.680120468139648, train_t_mse:146.0420684814453, train_layer_one_recon:0.0
Iteration: 2046 epochs:22, Training: RAE:0.29366475343704224, Loss: 20.972688674926758, Ranking:107.55656433105469, Reg:3.2507119178771973, Gen:15.348555564880371, Disc:0.0021804552525281906, Recon_One:0.0, T_Reg:5.621953010559082,T_MSE:108.82624816894531,  CI:0.8247645017352504 Validation RAE:0.2518792757693419 Loss:20.857932112344255, Ranking:103.28768546154637, Reg:3.272255963350186, Gen:13.564358464744638, Disc:0.0031792069587316626, Recon_One:0.0, T_Reg:7.29039435358633, T_MSE:16186.474591677705, CI:0.884234662653018, 
it:2100, trainCI:0.8402632886239444, train_ranking:107.20105743408203, train_RAE:0.2987845838069916,  train_Gen:14.857328414916992, train_Disc:0.00225302972830832, train_reg:3.250382661819

it:3000, trainCI:0.879067755642063, train_ranking:104.0204849243164, train_RAE:0.24482356011867523,  train_Gen:16.465129852294922, train_Disc:0.0010475656017661095, train_reg:3.2467708587646484, train_t_reg:3.7869930267333984, train_t_mse:117.51766204833984, train_layer_one_recon:0.0
Iteration: 3069 epochs:33, Training: RAE:0.28645262122154236, Loss: 22.356149673461914, Ranking:103.10993194580078, Reg:3.246397018432617, Gen:16.865039825439453, Disc:0.0008919195388443768, Recon_One:0.0, T_Reg:5.490218162536621,T_MSE:234.1758575439453,  CI:0.8682208525672435 Validation RAE:0.23263921040929347 Loss:23.178973012497025, Ranking:103.09874346304852, Reg:3.267912466972936, Gen:15.145630351719266, Disc:0.0012825913855097223, Recon_One:0.0, T_Reg:8.032060159926743, T_MSE:33368.36025194629, CI:0.8964619797237443, 
it:3100, trainCI:0.863011583011583, train_ranking:102.75753784179688, train_RAE:0.2577272653579712,  train_Gen:16.5709228515625, train_Disc:0.0010139639489352703, train_reg:3.2464423179

it:4000, trainCI:0.8657303218459369, train_ranking:97.79691314697266, train_RAE:0.26154085993766785,  train_Gen:18.006420135498047, train_Disc:0.0005042750271968544, train_reg:3.245508909225464, train_t_reg:4.719810485839844, train_t_mse:156.47869873046875, train_layer_one_recon:0.0
Iteration: 4092 epochs:44, Training: RAE:0.25558024644851685, Loss: 22.784393310546875, Ranking:99.34451293945312, Reg:3.245208263397217, Gen:17.859426498413086, Disc:0.00049629807472229, Recon_One:0.0, T_Reg:4.924470901489258,T_MSE:173.4832000732422,  CI:0.8676525248234482 Validation RAE:0.2219024103247137 Loss:24.38728914002083, Ranking:103.06686128514013, Reg:3.2667158334810047, Gen:16.412682131974417, Disc:0.0006731019650842275, Recon_One:0.0, T_Reg:7.973934030836935, T_MSE:35941.30094417769, CI:0.9008044492466475, 
it:4100, trainCI:0.8721203324976909, train_ranking:100.11455535888672, train_RAE:0.24763815104961395,  train_Gen:18.43740463256836, train_Disc:0.00043777949758805335, train_reg:3.24520850181

Iteration: 5022 epochs:54, Training: RAE:0.24670320749282837, Loss: 24.600482940673828, Ranking:94.45259094238281, Reg:3.243790626525879, Gen:19.73611068725586, Disc:0.00024411105550825596, Recon_One:0.0, T_Reg:4.864129066467285,T_MSE:202.66648864746094,  CI:0.863020949409405 Validation RAE:0.22754074205111396 Loss:35.98681189081617, Ranking:102.9768358130179, Reg:3.2652888012421313, Gen:17.781667243067407, Disc:0.00034598529816493235, Recon_One:0.0, T_Reg:18.204797974851587, T_MSE:515933.3738725573, CI:0.9023646903072812, 
it:5100, trainCI:0.8878386731224889, train_ranking:103.25394439697266, train_RAE:0.25860267877578735,  train_Gen:20.925304412841797, train_Disc:0.00021150708198547363, train_reg:3.2438724040985107, train_t_reg:4.027962684631348, train_t_mse:145.8992156982422, train_layer_one_recon:0.0
Iteration: 5115 epochs:55, Training: RAE:0.231073260307312, Loss: 24.112674713134766, Ranking:98.28901672363281, Reg:3.2437944412231445, Gen:19.991710662841797, Disc:0.0002420171804260

it:6000, trainCI:0.8708486138537687, train_ranking:103.6722412109375, train_RAE:0.24024246633052826,  train_Gen:21.492717742919922, train_Disc:0.00011837045894935727, train_reg:3.246936559677124, train_t_reg:4.1176371574401855, train_t_mse:118.13385772705078, train_layer_one_recon:0.0
Iteration: 6045 epochs:65, Training: RAE:0.2230335772037506, Loss: 24.66299057006836, Ranking:99.35002899169922, Reg:3.2474000453948975, Gen:20.8028564453125, Disc:0.00014661331078968942, Recon_One:0.0, T_Reg:3.859987735748291,T_MSE:151.16542053222656,  CI:0.8850260278338468 Validation RAE:0.21933357973046283 Loss:28.317536397831162, Ranking:103.00539557421313, Reg:3.268922141481671, Gen:19.717128160254155, Disc:0.00013602089958136853, Recon_One:0.0, T_Reg:8.600272106143464, T_MSE:56852.31908354831, CI:0.9048124726136131, 
it:6100, trainCI:0.8986756416570897, train_ranking:96.78571319580078, train_RAE:0.2530302107334137,  train_Gen:21.4840145111084, train_Disc:0.00012070855882484466, train_reg:3.247317790

it:7000, trainCI:0.9013715812723256, train_ranking:97.11621856689453, train_RAE:0.23790346086025238,  train_Gen:21.773700714111328, train_Disc:9.077254799194634e-05, train_reg:3.2481467723846436, train_t_reg:5.762490272521973, train_t_mse:765.8975219726562, train_layer_one_recon:0.0
Iteration: 7068 epochs:76, Training: RAE:0.22972385585308075, Loss: 26.477291107177734, Ranking:100.5341796875, Reg:3.248419761657715, Gen:22.14539337158203, Disc:8.539494592696428e-05, Recon_One:0.0, T_Reg:4.331812858581543,T_MSE:141.2552490234375,  CI:0.860163370367452 Validation RAE:0.2149137898551027 Loss:32.463952608670205, Ranking:102.97410876002328, Reg:3.269948615899038, Gen:20.210670575180902, Disc:0.00010265698361325412, Recon_One:0.0, T_Reg:12.253179147092345, T_MSE:183253.70439617906, CI:0.9034888617270628, 
it:7100, trainCI:0.8871330118900361, train_ranking:105.39442443847656, train_RAE:0.2467389851808548,  train_Gen:22.455089569091797, train_Disc:6.86925632180646e-05, train_reg:3.2483386993408

it:8000, trainCI:0.9038852527503133, train_ranking:94.90543365478516, train_RAE:0.20688454806804657,  train_Gen:23.73615264892578, train_Disc:4.355429337010719e-05, train_reg:3.2526166439056396, train_t_reg:3.918276071548462, train_t_mse:173.5854949951172, train_layer_one_recon:0.0
Iteration: 8091 epochs:87, Training: RAE:0.24123388528823853, Loss: 27.526012420654297, Ranking:102.59442138671875, Reg:3.2529795169830322, Gen:23.493593215942383, Disc:3.996302984887734e-05, Recon_One:0.0, T_Reg:4.032380104064941,T_MSE:189.41534423828125,  CI:0.8683768632617738 Validation RAE:0.2023245768016676 Loss:32.59686288407881, Ranking:102.92198288003937, Reg:3.274538590935777, Gen:20.56912886913052, Disc:0.00010002045307300108, Recon_One:0.0, T_Reg:12.027634144187108, T_MSE:159920.8724411276, CI:0.9065257261682119, 
it:8100, trainCI:0.8770594043968609, train_ranking:102.02788543701172, train_RAE:0.22110721468925476,  train_Gen:23.425994873046875, train_Disc:4.302442539483309e-05, train_reg:3.2527661

Iteration: 9021 epochs:97, Training: RAE:0.2495632767677307, Loss: 29.284194946289062, Ranking:105.20092010498047, Reg:3.256786346435547, Gen:24.70645523071289, Disc:2.2231528419069946e-05, Recon_One:0.0, T_Reg:4.577718257904053,T_MSE:171.70465087890625,  CI:0.857857377421881 Validation RAE:0.2070220073409508 Loss:30.58508647237999, Ranking:103.0331529467288, Reg:3.2783706500945544, Gen:21.845830182992206, Disc:5.2165328287976325e-05, Recon_One:0.0, T_Reg:8.739203938368158, T_MSE:67478.27368957245, CI:0.907544491164554, 
it:9100, trainCI:0.8979140409989969, train_ranking:103.36186218261719, train_RAE:0.23099224269390106,  train_Gen:24.469871520996094, train_Disc:3.0302533559734002e-05, train_reg:3.2572240829467773, train_t_reg:3.6782002449035645, train_t_mse:131.38291931152344, train_layer_one_recon:0.0
Iteration: 9114 epochs:98, Training: RAE:0.2370024472475052, Loss: 29.38186264038086, Ranking:100.9814453125, Reg:3.2573742866516113, Gen:25.1121826171875, Disc:2.1090145310154185e-05, 

it:10000, trainCI:0.8962710941752858, train_ranking:96.68494415283203, train_RAE:0.22201856970787048,  train_Gen:25.60824203491211, train_Disc:1.918268208100926e-05, train_reg:3.260953426361084, train_t_reg:3.1763916015625, train_t_mse:131.49984741210938, train_layer_one_recon:0.0
Iteration: 10044 epochs:108, Training: RAE:0.2447056621313095, Loss: 30.55759620666504, Ranking:99.35292053222656, Reg:3.2613844871520996, Gen:26.22403335571289, Disc:1.3716808098251931e-05, Recon_One:0.0, T_Reg:4.3335490226745605,T_MSE:205.98397827148438,  CI:0.8975165209976551 Validation RAE:0.20585262804861976 Loss:38.26135598645742, Ranking:102.96955187350224, Reg:3.2829992649211457, Gen:23.91167967799784, Disc:1.6119252941066325e-05, Recon_One:0.0, T_Reg:14.349660280720657, T_MSE:341447.8112249743, CI:0.9089733856518645, 
it:10100, trainCI:0.8708977880584068, train_ranking:110.50247192382812, train_RAE:0.25372105836868286,  train_Gen:26.207962036132812, train_Disc:1.4066499716136605e-05, train_reg:3.2615

it:11000, trainCI:0.9217069299947099, train_ranking:105.58412170410156, train_RAE:0.20953984558582306,  train_Gen:26.859268188476562, train_Disc:1.0588895747787319e-05, train_reg:3.2633426189422607, train_t_reg:3.011533737182617, train_t_mse:178.178466796875, train_layer_one_recon:0.0
Iteration: 11067 epochs:119, Training: RAE:0.22172005474567413, Loss: 31.291397094726562, Ranking:103.40496063232422, Reg:3.2638344764709473, Gen:27.633007049560547, Disc:6.161356395750772e-06, Recon_One:0.0, T_Reg:3.658384323120117,T_MSE:200.17941284179688,  CI:0.9064127896533142 Validation RAE:0.20166251692411763 Loss:38.62092612131664, Ranking:103.06562948662206, Reg:3.2854654915082064, Gen:24.69974632067607, Disc:1.1549062263014125e-05, Recon_One:0.0, T_Reg:13.921167936357271, T_MSE:335537.36150526203, CI:0.9100667285791229, 
it:11100, trainCI:0.8884513441878077, train_ranking:104.81214904785156, train_RAE:0.2243395298719406,  train_Gen:27.302719116210938, train_Disc:7.79490073909983e-06, train_reg:3.

it:12000, trainCI:0.9032289532728515, train_ranking:109.22297668457031, train_RAE:0.22280724346637726,  train_Gen:28.54633331298828, train_Disc:5.091725597594632e-06, train_reg:3.2695975303649902, train_t_reg:3.5947439670562744, train_t_mse:141.218017578125, train_layer_one_recon:0.0
Iteration: 12090 epochs:130, Training: RAE:0.23990032076835632, Loss: 32.868072509765625, Ranking:105.98748779296875, Reg:3.269906520843506, Gen:28.69440269470215, Disc:4.079576683579944e-06, Recon_One:0.0, T_Reg:4.173666000366211,T_MSE:169.67149353027344,  CI:0.8752532199574841 Validation RAE:0.198784661271415 Loss:42.14086117588819, Ranking:103.01750185967565, Reg:3.2915777782656273, Gen:25.58333845425952, Disc:8.014561175811307e-06, Recon_One:0.0, T_Reg:16.55751384167697, T_MSE:503104.99117074796, CI:0.9091821842068015, 
it:12100, trainCI:0.8993510724244872, train_ranking:101.81364440917969, train_RAE:0.23483215272426605,  train_Gen:28.567028045654297, train_Disc:4.688890840043314e-06, train_reg:3.26980

Iteration: 13020 epochs:140, Training: RAE:0.22981026768684387, Loss: 33.991432189941406, Ranking:100.280517578125, Reg:3.273334503173828, Gen:30.113502502441406, Disc:2.156350547011243e-06, Recon_One:0.0, T_Reg:3.877929210662842,T_MSE:188.017333984375,  CI:0.8596468107538219 Validation RAE:0.19591139233289726 Loss:50.985060922351494, Ranking:102.98362823520674, Reg:3.2950284794984763, Gen:25.664655521569795, Disc:1.0338710250881735e-05, Recon_One:0.0, T_Reg:25.32039572779083, T_MSE:1318866.4045671828, CI:0.9099010899137181, 
it:13100, trainCI:0.8893808668370315, train_ranking:104.93358612060547, train_RAE:0.24529553949832916,  train_Gen:29.705354690551758, train_Disc:2.71396652351541e-06, train_reg:3.2734084129333496, train_t_reg:3.870999813079834, train_t_mse:141.92872619628906, train_layer_one_recon:0.0
Iteration: 13113 epochs:141, Training: RAE:0.21264898777008057, Loss: 33.0339241027832, Ranking:98.30445861816406, Reg:3.273545742034912, Gen:29.85329818725586, Disc:2.91128117169137

it:14000, trainCI:0.89300121301024, train_ranking:92.745361328125, train_RAE:0.21059131622314453,  train_Gen:31.148151397705078, train_Disc:1.430120619261288e-06, train_reg:3.280116319656372, train_t_reg:3.433284282684326, train_t_mse:151.9996795654297, train_layer_one_recon:0.0
Iteration: 14043 epochs:151, Training: RAE:0.22267396748065948, Loss: 34.678741455078125, Ranking:95.64431762695312, Reg:3.2802212238311768, Gen:31.169925689697266, Disc:1.4368770280270837e-06, Recon_One:0.0, T_Reg:3.5088140964508057,T_MSE:125.94746398925781,  CI:0.8724212961687828 Validation RAE:0.19492700703444893 Loss:33.998086957226725, Ranking:102.95985242016721, Reg:3.301960841795795, Gen:26.879456205130726, Disc:5.415711028347804e-06, Recon_One:0.0, T_Reg:7.11862533647328, T_MSE:41326.66475187022, CI:0.911518836552835, 
it:14100, trainCI:0.8706805421785807, train_ranking:113.52363586425781, train_RAE:0.2287215292453766,  train_Gen:32.03965759277344, train_Disc:1.4098507108428748e-06, train_reg:3.28016424

it:15000, trainCI:0.9118140302897392, train_ranking:102.54013061523438, train_RAE:0.20326420664787292,  train_Gen:32.83633041381836, train_Disc:1.0454168659634888e-06, train_reg:3.2854652404785156, train_t_reg:3.4604439735412598, train_t_mse:198.45179748535156, train_layer_one_recon:0.0
Iteration: 15066 epochs:162, Training: RAE:0.21028313040733337, Loss: 35.84884262084961, Ranking:108.40348052978516, Reg:3.2856686115264893, Gen:32.68573760986328, Disc:5.830265763506759e-07, Recon_One:0.0, T_Reg:3.1631038188934326,T_MSE:148.73435974121094,  CI:0.8952959028831563 Validation RAE:0.19854421975300493 Loss:77.96654954647681, Ranking:102.8818604408599, Reg:3.3074443319730196, Gen:29.131784185075873, Disc:1.3098234025999857e-06, Recon_One:0.0, T_Reg:48.834763577062816, T_MSE:6273564.6309498055, CI:0.9119257235250678, 
it:15100, trainCI:0.8997956659676292, train_ranking:98.21298217773438, train_RAE:0.21185189485549927,  train_Gen:32.29628372192383, train_Disc:9.426066753803752e-07, train_reg:3

it:16000, trainCI:0.9033044965295242, train_ranking:105.5367431640625, train_RAE:0.2262089103460312,  train_Gen:33.855567932128906, train_Disc:5.75511080569413e-07, train_reg:3.2934012413024902, train_t_reg:3.512627124786377, train_t_mse:183.46051025390625, train_layer_one_recon:0.0
Iteration: 16089 epochs:173, Training: RAE:0.20789699256420135, Loss: 37.1537971496582, Ranking:110.03601837158203, Reg:3.293832540512085, Gen:33.842796325683594, Disc:5.834825174133584e-07, Recon_One:0.0, T_Reg:3.3110005855560303,T_MSE:143.19329833984375,  CI:0.9192686289210825 Validation RAE:0.19262040175317718 Loss:50.3768258442413, Ranking:102.94111766292853, Reg:3.3156623672780148, Gen:28.963172705572696, Disc:2.353404423056876e-06, Recon_One:0.0, T_Reg:21.413650175206463, T_MSE:1120302.6167132696, CI:0.9133740239957194, 
it:16100, trainCI:0.9024132033985721, train_ranking:97.3615951538086, train_RAE:0.24078033864498138,  train_Gen:33.784629821777344, train_Disc:4.770083137373149e-07, train_reg:3.29387

Iteration: 17019 epochs:183, Training: RAE:0.22290486097335815, Loss: 38.453311920166016, Ranking:101.46199035644531, Reg:3.301474094390869, Gen:34.77877426147461, Disc:3.4614447486092104e-07, Recon_One:0.0, T_Reg:3.67453670501709,T_MSE:168.88107299804688,  CI:0.8932464146023468 Validation RAE:0.19157527461116935 Loss:69.14533775151186, Ranking:103.04321302418948, Reg:3.323354565442853, Gen:30.484116642388965, Disc:9.111252001907711e-07, Recon_One:0.0, T_Reg:38.66122045173516, T_MSE:4042064.483064721, CI:0.9133949775448208, 
it:17100, trainCI:0.892348124936824, train_ranking:105.02616882324219, train_RAE:0.21008747816085815,  train_Gen:34.810245513916016, train_Disc:3.918264042113151e-07, train_reg:3.3020262718200684, train_t_reg:2.9793734550476074, train_t_mse:121.33263397216797, train_layer_one_recon:0.0
Iteration: 17112 epochs:184, Training: RAE:0.2195374220609665, Loss: 39.107460021972656, Ranking:102.87263488769531, Reg:3.301980972290039, Gen:35.49074935913086, Disc:3.336833742650



observed_samples:(4736, 200), empirical_observed:(4736,)




:Test RAE:0.2082371774781784, Loss:25.19799843303821, Gen:21.509501012764154, Disc:8.022657475115799e-05, Reg:3.3017350421305283, Ranking101.50992513788071, Recon:0.0, T_Reg:3.6884172208144377,T_MSE:552.2015465418434, CI:0.9065759542086378, Observed: CI:0, Correlation:SpearmanrResult(correlation=0.802172671164176, pvalue=0.0)
INFO:tensorflow:Restoring parameters from C:\Users\raibe\Desktop\Thesis Code\DATE\summaries\mort_p\DATE_AE_model
Valid observed_death:(4265,), percentage:0.5333249968738277
:Valid RAE:0.21359430442262445, Loss:26.191246215889002, Gen:21.315678241954412, Disc:7.192693218426718e-05, Reg:3.273517396489695, Ranking103.09099471910426, Recon:0.0, T_Reg:4.875496057803979,T_MSE:5047.380039300474, CI:0.90464884824015, Observed: CI:0, Correlation:SpearmanrResult(correlation=0.7976431717508782, pvalue=0.0)
INFO:tensorflow:Restoring parameters from C:\Users\raibe\Desktop\Thesis Code\DATE\summaries\mort_p\DATE_AE_model
Train observed_death:(17056,), percentage:0.53319994998124

In [21]:
tt = np.load('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\matrix\\mort_p\\Test_empirical_time.npy')
ee = np.load('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\matrix\\mort_p\\Test_data_e.npy')
pp1 = np.load('C:\\Users\\raibe\\Desktop\\Thesis Code\\DATE\\matrix\\mort_p\\Test_predicted_time.npy')

In [22]:
#print(concordance_index(tt, pp1, ee24))
#print(concordance_index(tt, pp1, ee48))
print(concordance_index(tt, pp1, ee))

0.9082440781409044
