In [1]:
import numpy as np
import pandas as pd
import os
import shutil
import logging
from sklearn.model_selection import ShuffleSplit

from data_process import get_dataset_splits
from utils.evaluation_utils import load_data_from_file, write_results_to_file

In [2]:
import tensorflow as tf
from tensorflow.keras import *
print(tf.__version__)

2024-03-05 18:43:29.144489: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-05 18:43:29.183479: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.13.1


# Data Loading and basic setting

In [3]:
dataset = load_data_from_file("../fullfeature_fillmean_1000.txt")
# 数据类型转换
for key in dataset.keys():
    if key!='sequence_length':
        dataset[key] = dataset[key].astype(np.float32)
    print(key)
    print(dataset[key].shape)
    print(dataset[key].dtype)

previous_covariates
(3000, 160, 25)
float32
previous_treatments
(3000, 160, 3)
float32
covariates
(3000, 161, 25)
float32
treatments
(3000, 161, 3)
float32
sequence_length
(3000,)
int64
outcomes
(3000, 161, 1)
float32


In [4]:
shuffle_split = ShuffleSplit(n_splits=1, test_size=0.1, random_state=10)
train_index, test_index = next(shuffle_split.split(dataset['covariates'][:, :, 0]))
shuffle_split = ShuffleSplit(n_splits=1, test_size=0.11, random_state=10)
train_index, val_index = next(shuffle_split.split(dataset['covariates'][train_index, :, 0]))
dataset_map = get_dataset_splits(dataset, train_index, val_index, test_index, use_predicted_confounders=False)

In [5]:
MODEL_ROOT = 'results/rmsn_result_test_use_confounders_False'
# rnn_fit参数设置
networks_to_train='propensity_networks'
# networks_to_train='encoder'
b_use_predicted_confounders=False

In [6]:
# Setup gpus
# 检测 GPU 设备
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # set TensorFlow to use all GPU
        tf.config.set_visible_devices(gpus, 'GPU')
        for gpu in gpus:
            # set GPU memery growth
            tf.config.experimental.set_memory_growth(gpu, True)
        print("Using GPU with memory growth")
    except RuntimeError as e:
        # Changing device settings after the program is running may cause errors
        print(e)
else:
    # if no GPU，using CPU
    print("No GPU found, using CPU")
    

Using GPU with memory growth


# Data Process Functions

In [7]:
def get_processed_data(raw_sim_data,
                       b_predict_actions,
                       b_use_actions_only,
                       b_use_predicted_confounders,
                       b_use_oracle_confounders,
                       b_remove_x1,
                       keep_first_point=False):
    """
    Create formatted data to train both propensity networks and seq2seq architecture

    :param raw_sim_data: Data from simulation
    :param scaling_params: means/standard deviations to normalise the data to
    :param b_predict_actions: flag to package data for propensity network to forecast actions
    :param b_use_actions_only:  flag to package data with only action inputs and not covariates
    :param b_predict_censoring: flag to package data to predict censoring locations
    :return: processed data to train specific network
    """
    horizon = 1
    offset = 1

    # Continuous values

    # Binary application
    treatments = raw_sim_data['treatments']
    covariates = raw_sim_data['covariates']
    dataset_outputs = raw_sim_data['outcomes']
    sequence_lengths = raw_sim_data['sequence_length']
    
    if b_use_predicted_confounders:
        predicted_confounders = raw_sim_data['predicted_confounders']

    if b_use_oracle_confounders:
        predicted_confounders = raw_sim_data['confounders']

    num_treatments = treatments.shape[-1]

    # Parcelling INPUTS
    if b_predict_actions:
        if b_use_actions_only:
            inputs = treatments
            inputs = inputs[:, :-offset, :]

            actions = inputs.copy()

        else:
            # Uses current covariate, to remove confounding effects between action and current value
            if (b_use_predicted_confounders):
                print ("Using predicted confounders")
                inputs = np.concatenate([covariates[:, 1:, ], predicted_confounders[:, 1:, ], treatments[:, :-1, ]],
                                        axis=2)
            else:
                inputs = np.concatenate([covariates[:, 1:,], treatments[:, :-1, ]], axis=2)

            actions = inputs[:, :, -num_treatments:].copy()


    else:
        if (b_use_predicted_confounders):
            inputs = np.concatenate([covariates, predicted_confounders, treatments], axis=2)
        else:
            inputs = np.concatenate([covariates, treatments], axis=2)
        
        if not keep_first_point:
            inputs = inputs[:, 1:, :]

        actions = inputs[:, :, -num_treatments:].copy()


    # Parcelling OUTPUTS
    if b_predict_actions:
        outputs = treatments
        outputs = outputs[:, 1:, :]

    else:
        if keep_first_point:
            outputs = dataset_outputs
        else:
            outputs = dataset_outputs[:, 1:, :]


    # Set array alignment
    sequence_lengths = np.array([i - 1 for i in sequence_lengths]) # everything shortens by 1

    # Remove any trajectories that are too short
    inputs = inputs[sequence_lengths > 0, :, :]
    outputs = outputs[sequence_lengths > 0, :, :]
    sequence_lengths = sequence_lengths[sequence_lengths > 0]
    actions = actions[sequence_lengths > 0, :, :]

    # Add active entires
    active_entries = np.zeros(outputs.shape, dtype=np.float32)

    for i in range(sequence_lengths.shape[0]):
        sequence_length = int(sequence_lengths[i])

        if not b_predict_actions:
            for k in range(horizon):
                #include the censoring point too, but ignore future shifts that don't exist
                active_entries[i, :sequence_length-k, k] = 1
        else:
            active_entries[i, :sequence_length, :] = 1

    return {'outputs': outputs,  # already scaled
            'scaled_inputs': inputs,
            'scaled_outputs': outputs,
            'actions': actions,
            'sequence_lengths': sequence_lengths,
            'active_entries': active_entries
            }

In [8]:
def convert_to_tf_dataset(dataset_map, minibatch_size):
    key_map = {'inputs': dataset_map['scaled_inputs'],
               'outputs': dataset_map['scaled_outputs'],
               'active_entries': dataset_map['active_entries'],
               'sequence_lengths': dataset_map['sequence_lengths']}

    if 'propensity_weights' in dataset_map:
        key_map['propensity_weights'] = dataset_map['propensity_weights']

    if 'initial_states' in dataset_map:
        key_map['initial_states'] = dataset_map['initial_states']

    #from_tensor_slices:切片; shuffle:随机打乱; batch:批次组合; prefetch:提前准备（预取）数据
    # buffer_size = key_map['inputs'].shape[0]
    tf_dataset = tf.data.Dataset.from_tensor_slices(key_map)\
                .shuffle(buffer_size=1000).batch(minibatch_size) \
                .prefetch(tf.data.experimental.AUTOTUNE)

    return tf_dataset

# Mirrored Trainging

In [9]:
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: %d' % strategy.num_replicas_in_sync)

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')
Number of devices: 2


2024-03-05 18:43:37.734261: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 21626 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:44:00.0, compute capability: 8.9
2024-03-05 18:43:37.735013: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1639] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 21894 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:81:00.0, compute capability: 8.9


## Model Defination

In [10]:
def create_model(params):
    
    # Data params
    #training_data = None if 'training_dataset' not in params else params['training_dataset']
    #validation_data = None if 'validation_dataset' not in params else params['validation_dataset']
    #test_data = None if 'test_dataset' not in params else params['test_dataset']
    input_size = params['input_size']
    output_size = params['output_size']

    # Network params
    net_name = params['net_name']
    softmax_size = params['softmax_size']
    dropout_rate = params['dropout_rate']
    hidden_layer_size = params['hidden_layer_size']
    memory_activation_type = params['hidden_activation']
    output_activation_type = params['output_activation']
    #initial_states = None
    # input layer
    inputs = layers.Input(shape=(None,input_size), dtype=tf.float32)
    # define initial states 
    initial_h =layers.Input(shape=(hidden_layer_size,), dtype=tf.float32, name='initial_h')
    initial_c =layers.Input(shape=(hidden_layer_size,), dtype=tf.float32, name='initial_c')

    # LSTM layer
    lstm, state_h, state_c = layers.LSTM(hidden_layer_size, activation=memory_activation_type, 
                       return_sequences=True, return_state=True, dropout=dropout_rate)(inputs, initial_state=[initial_h, initial_c])

    # flattened_lstm = layers.Flatten()(lstm)

    # Seq2Seq(if need)
    use_seq2seq_feedback = False
    if use_seq2seq_feedback:
        logits = lstm
    else:
        # linear output layer
        logits = layers.Dense(output_size)(lstm)

    # Softmax
    if softmax_size != 0:
        logits_reshaped = layers.Reshape((-1, output_size))(logits)
        core_outputs, softmax_outputs = tf.split(logits_reshaped, [output_size - softmax_size, softmax_size], axis=-1)
        core_activated = layers.Activation(output_activation_type)(core_outputs)
        softmax_activated = layers.Softmax(axis=-1)(softmax_outputs)
        outputs = layers.Concatenate(axis=-1)([core_activated, softmax_activated])
    else:
        outputs = layers.Activation(output_activation_type)(logits)

    # construct model
    model = models.Model(inputs=[inputs, initial_h, initial_c], outputs=[outputs, state_h, state_c], name=net_name)
    return model

## Loss Function

In [11]:
with strategy.scope():
    # test loss function ###################################
    mse_loss_object = tf.keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)
    
    def compute_mse_loss(labels, predictions):
        global_batch_size = 256
        # 计算每个样本的MSE损失
        per_example_loss = mse_loss_object(labels, predictions)
        # 计算所有样本的平均MSE损失，并根据全局批量大小进行调整
        return tf.nn.compute_average_loss(per_example_loss, global_batch_size=global_batch_size)
    
    # custom loss function ##################################
    class CustomLoss(losses.Loss):
        def __init__(self, performance_metric, num_gpus, global_batch_size, name="custom_loss"):
            super().__init__(name=name) #reduction=losses.Reduction.NONE
            self.performance_metric = performance_metric
            self.num_gpus = num_gpus
            self.global_batch_size = global_batch_size
            # self.weights = params['weights']
            # self.active_entries = params['active_entries']

        def train_call(self, y_true, y_pred, active_entries, weights):
            if self.performance_metric == "mse":
                loss = tf.reduce_sum(tf.square(y_true - y_pred) * active_entries * weights) \
                       / tf.reduce_sum(active_entries)
                # per_example_loss = (tf.square(y_true - y_pred) * active_entries * weights) \
                #                     / tf.reduce_sum(active_entries)
            elif self.performance_metric == "xentropy":
                loss = tf.reduce_sum((y_true * -tf.math.log(y_pred + 1e-8) +
                                       (1 - y_true) * -tf.math.log(1 - y_pred + 1e-8))
                                       * active_entries * weights) / tf.reduce_sum(active_entries)
                # per_example_loss = ((y_true * -tf.math.log(y_pred + 1e-8) + \
                #                    (1 - y_true) * -tf.math.log(1 - y_pred + 1e-8)) * active_entries * weights) / tf.reduce_sum(active_entries)

            else:
                raise ValueError("Unknown performance metric {}".format(self.performance_metric))

            # 将总和除以gpu数，获得全局平均损失
            return loss * (1./self.num_gpus)
            # return tf.nn.compute_average_loss(per_example_loss, global_batch_size=self.global_batch_size)

        def valid_call(self, y_true, y_pred):
            if self.performance_metric == "mse":
               #loss = tf.reduce_sum(tf.square(y_true - y_pred) * active_entries ) \
               #        / tf.reduce_sum(active_entries)
                loss = tf.square(y_true - y_pred)

            elif self.performance_metric == "xentropy":
                loss = (y_true * -tf.math.log(y_pred + 1e-8) +
                       (1 - y_true) * -tf.math.log(1 - y_pred + 1e-8))

            else:
                raise ValueError("Unknown performance metric {}".format(self.performance_metric))

            return loss

        def get_config(self):
            config = super().get_config()
            config.update({"performance_metric": self.performance_metric, "global_batch_size": self.global_batch_size})
            return config
    

## core routine

In [14]:
# def core_routine(params):
    
# 设置输入流水线
training_processed = params['training_dataset']
global_batch_size = params['minibatch_size'] * strategy.num_replicas_in_sync
tf_data_train = convert_to_tf_dataset(training_processed, global_batch_size)

# distribute them
dist_tf_data_train = strategy.experimental_distribute_dataset(tf_data_train)

2024-03-05 18:43:54.164477: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:786] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_4"
op: "TensorSliceDataset"
input: "Placeholder/_0"
input: "Placeholder/_1"
input: "Placeholder/_2"
input: "Placeholder/_3"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_FLOAT
      type: DT_FLOAT
      type: DT_FLOAT
      type: DT_INT64
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 2403
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:0"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: 160
        }
        dim {
          size: 3
        }
      }
      shape {
        dim {
          size: 160
        }
        dim {
          size: 3
        }

In [15]:
def inspect_dataset(batch):
    # 这里的内容根据你的数据集结构调整
    # 例如，打印出批次的形状或一些关键数据
    for key, value in batch.items():
        # 打印出每个键对应的值的形状
        print(f"{key}: {value.shape}")

for dist_batch in dist_tf_data_train:
    strategy.run(inspect_dataset, args=(dist_batch,))





inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)




inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)




inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)




inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)




inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 3)
active_entries: (64, 160, 3)
sequence_lengths: (64,)
inputs: (64, 160, 3)
outputs: (64, 160, 

In [16]:
# 定义损失函数
# loss_func = CustomLoss(params['performance_metric'], strategy.num_replicas_in_sync, global_batch_size)

# 定义衡量指标
with strategy.scope():
    train_metric = metrics.MeanSquaredError(name='train_mse')
    valid_loss = metrics.Mean(name='valid_loss')
    valid_metric = metrics.MeanSquaredError(name='valid_mse')

# 构架模型和优化器
# A model, an optimizer, and a checkpoint must be created under `strategy.scope`.
with strategy.scope():
    model = create_model(params)
    model.summary()
    optimizer = tf.keras.optimizers.Adam(learning_rate=params['learning_rate'])

Model: "treatment_rnn_action_inputs_only"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, None, 3)]            0         []                            
                                                                                                  
 initial_h (InputLayer)      [(None, 16)]                 0         []                            
                                                                                                  
 initial_c (InputLayer)      [(None, 16)]                 0         []                            
                                                                                                  
 lstm (LSTM)                 [(None, None, 16),           1280      ['input_1[0][0]',             
                              (None, 16),                          

In [17]:
# 训练函数 ###################################################################################
def train_step(data): #, chunk_sizes
    inputs = data['inputs']
    outputs = data['outputs']
    active_entries = data['active_entries']
    weights = data['propensity_weights'] if 'propensity_weights' in data else tf.constant(1.0)

    with tf.GradientTape() as tape:

        batch_size = tf.shape(inputs)[0]
        initial_state = tf.zeros([batch_size, hidden_layer_size], dtype=tf.float32)
        predictions,_,_ = model([inputs,initial_state, initial_state], training=True)
        # Compute loss
        # loss = loss_func.train_call(outputs, predictions, active_entries, weights)
        loss = compute_mse_loss(outputs, predictions)

    gradients = tape.gradient(loss, model.trainable_variables)
    # Clip gradients
    gradients, _ = tf.clip_by_global_norm(gradients, clip_norm = max_norm)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    #self.train_loss.update_state(loss)
    train_metric.update_state(outputs, predictions)

    return loss

@tf.function
def distributed_train_step(data): #, chunk_sizes
    per_replica_losses = strategy.run(train_step, args=(data,)) #, chunk_sizes
    return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None)
# ###########################################################################################

In [None]:
hidden_layer_size = params['hidden_layer_size']
max_norm = params['max_norm']
for epoch in range(params['num_epochs']):
    # TRAIN LOOP
    total_loss = 0.0
    num_batches = 0
    for x in tf_data_train:
        total_loss += distributed_train_step(x)
        num_batches += 1
        train_loss = total_loss / num_batches

    template = ("Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, "
              "Test Accuracy: {}")
    print(template.format(epoch + 1, train_loss,
                         train_metric.result() * 100, valid_loss.result(),
                         valid_metric.result() * 100))

    valid_loss.reset_states()
    train_metric.reset_states()
    valid_metric.reset_states()

INFO:tensorflow:Collective all_reduce tensors: 5 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:Collective all_reduce tensors: 5 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Collective all_reduce tensors: 5 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:Collective all_reduce tensors: 5 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
2024-03-05 18:44:20.942445: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2024-03-05 18:44:20.959377: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2024-03-05 18:44:20.966005: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600


## rnn fit function

In [13]:
specifications = {
     'rnn_propensity_weighted': (0.1, 4, 100, 64, 0.005, 1.0),
     'treatment_rnn_action_inputs_only': (0.1, 4, 100, 64, 0.005, 1.0),
     'treatment_rnn': (0.1, 4, 100, 64, 0.005, 1.0),
} # decrease learning rate from 0.01 to 0.005 
####################################################################################################################


# def rnn_fit(dataset_map, networks_to_train, MODEL_ROOT, b_use_predicted_confounders,
#             b_use_oracle_confounders=False, b_remove_x1=False):

b_use_oracle_confounders=False; b_remove_x1=False
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)


# Get the correct networks to train
if networks_to_train == "propensity_networks":
    logging.info("Training propensity networks")
    net_names = ['treatment_rnn_action_inputs_only']
    # net_names = ['treatment_rnn']

elif networks_to_train == "encoder":
    logging.info("Training R-MSN encoder")
    net_names = ["rnn_propensity_weighted"]

elif networks_to_train == "user_defined":
    logging.info("Training user defined network")
    raise NotImplementedError("Specify network to use!")

else:
    raise ValueError("Unrecognised network type")

logging.info("Running hyperparameter optimisation")

# Experiment name
expt_name = "treatment_effects"

# Possible networks to use along with their activation functions
# change hidden layer of rnn_propensity_weighted to tanh
activation_map = {'rnn_propensity_weighted': ("tanh", 'linear'),
                  'rnn_propensity_weighted_logistic': ("elu", 'linear'),
                  'rnn_model': ("elu", 'linear'),
                  'treatment_rnn': ("tanh", 'sigmoid'),
                  'treatment_rnn_action_inputs_only': ("tanh", 'sigmoid')
                  }

# Setup tensorflow
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # set TensorFlow to use all GPU
        tf.config.set_visible_devices(gpus, 'GPU')
        for gpu in gpus:
            # set GPU memery growth
            tf.config.experimental.set_memory_growth(gpu, True)
        logging.info("Using GPU with memory growth")
    except RuntimeError as e:
        # Changing device settings after the program is running may cause errors
        logging.info(e)
else:
    # if no GPU，using CPU
    logging.info("No GPU found, using CPU")

## Create a distribution strategy
#strategy = tf.distribute.MirroredStrategy()
#print('Number of devices: %d' % strategy.num_replicas_in_sync)

training_data = dataset_map['training_data']
validation_data = dataset_map['validation_data']
test_data = dataset_map['test_data']

# Start Running hyperparam opt
#opt_params = {}
mse_dict = {}
for net_name in net_names:
    print(net_name)
    # Re-run hyperparameter optimisation if parameters are not specified, otherwise train with defined params
    max_hyperparam_runs = 3 if net_name not in specifications else 1

    # Pull datasets
    b_predict_actions = "treatment_rnn" in net_name
    use_truncated_bptt = net_name != "rnn_model_bptt" # whether to train with truncated backpropagation through time
    b_propensity_weight = "rnn_propensity_weighted" in net_name
    b_use_actions_only = "rnn_action_inputs_only" in net_name


   # Extract only relevant trajs and shift data
    training_processed = get_processed_data(training_data, b_predict_actions,
                                                 b_use_actions_only, b_use_predicted_confounders,
                                                 b_use_oracle_confounders, b_remove_x1)
    validation_processed = get_processed_data(validation_data, b_predict_actions,
                                                   b_use_actions_only, b_use_predicted_confounders,
                                                   b_use_oracle_confounders, b_remove_x1)
    test_processed = get_processed_data(test_data, b_predict_actions,
                                             b_use_actions_only, b_use_predicted_confounders,
                                             b_use_oracle_confounders, b_remove_x1)


    num_features = training_processed['scaled_inputs'].shape[-1]
    # num_features = 28
    num_outputs = training_processed['scaled_outputs'].shape[-1]

    # Load propensity weights if they exist
    if b_propensity_weight:

        if net_name == 'rnn_propensity_weighted_den_only':
            # use un-stabilised IPTWs generated by propensity networks
            propensity_weights = np.load(os.path.join(MODEL_ROOT, "propensity_scores_den_only.npy"))
        elif net_name == "rnn_propensity_weighted_logistic":
            # Use logistic regression weights
            propensity_weights = np.load(os.path.join(MODEL_ROOT, "propensity_scores.npy"))
            tmp = np.load(os.path.join(MODEL_ROOT, "propensity_scores_logistic.npy"))
            propensity_weights = tmp[:propensity_weights.shape[0], :, :]
        else:
            # use stabilised IPTWs generated by propensity networks
            propensity_weights = np.load(os.path.join(MODEL_ROOT, "propensity_scores.npy"))

        logging.info("Net name = {}. Mean-adjusting!".format(net_name))

        propensity_weights /= propensity_weights.mean()

        training_processed['propensity_weights'] = np.array(propensity_weights, dtype='float32')

    # Start hyperparamter optimisation (training model directly)
    hyperparam_count = 0
    # while True:

    if net_name not in specifications:

        dropout_rate = np.random.choice([0.1, 0.2, 0.3, 0.4, 0.5])
        memory_multiplier = np.random.choice([0.5, 1, 2, 3, 4])
        num_epochs = 100
        minibatch_size = np.random.choice([64, 128, 256])
        learning_rate = np.random.choice([0.01, 0.005, 0.001])  #([0.01, 0.001, 0.0001])
        max_norm = np.random.choice([0.5, 1.0, 2.0, 4.0])
        hidden_activation, output_activation = activation_map[net_name]

    else:
        spec = specifications[net_name]
        logging.info("Using specifications for {}: {}".format(net_name, spec))
        dropout_rate = spec[0]
        memory_multiplier = spec[1]
        num_epochs = spec[2]
        minibatch_size = spec[3]
        learning_rate = spec[4]
        max_norm = spec[5]
        hidden_activation, output_activation = activation_map[net_name]

    model_folder = os.path.join(MODEL_ROOT, net_name)

    # transform data to tf format


    # construct model parameters
    # hidden_layer_size = int(memory_multiplier * num_features)
    hidden_layer_size = int(memory_multiplier * 25)
    params = {'net_name': net_name,
            'experiment_name': expt_name,
            'training_dataset': training_processed,
            'validation_dataset': validation_processed,
            'test_dataset':  test_processed,
            'dropout_rate': dropout_rate,
            'input_size': num_features,
            'output_size': num_outputs,
            'hidden_layer_size': hidden_layer_size,
            'num_epochs': num_epochs,
            'minibatch_size': minibatch_size,
            'learning_rate': learning_rate,
            'max_norm': max_norm,
            'model_folder': model_folder,
            'hidden_activation': hidden_activation,
            'output_activation': output_activation,
            'backprop_length': 60,  # backprop over 60 timesteps for truncated backpropagation through time
            'softmax_size': 0, #not used in this paper, but allows for categorical actions
            'performance_metric': 'xentropy' if output_activation == 'sigmoid' else 'mse'}

    # core_routine(model_parameters) 

INFO:Training propensity networks
INFO:Running hyperparameter optimisation
INFO:Using GPU with memory growth
INFO:Using specifications for treatment_rnn_action_inputs_only: (0.1, 4, 100, 64, 0.005, 1.0)


treatment_rnn_action_inputs_only


In [17]:
rnn_fit(dataset_map, networks_to_train, MODEL_ROOT, b_use_predicted_confounders)

INFO:Training propensity networks
INFO:Running hyperparameter optimisation
INFO:Using GPU with memory growth
INFO:Using specifications for treatment_rnn: (0.1, 4, 100, 64, 0.005, 1.0)


treatment_rnn


2024-03-05 14:36:09.344243: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:786] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_4"
op: "TensorSliceDataset"
input: "Placeholder/_0"
input: "Placeholder/_1"
input: "Placeholder/_2"
input: "Placeholder/_3"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_FLOAT
      type: DT_FLOAT
      type: DT_FLOAT
      type: DT_INT64
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 2403
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:0"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: 160
        }
        dim {
          size: 3
        }
      }
      shape {
        dim {
          size: 160
        }
        dim {
          size: 28
        

INFO:tensorflow:Collective all_reduce tensors: 5 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:Collective all_reduce tensors: 5 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Collective all_reduce tensors: 5 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:Collective all_reduce tensors: 5 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
2024-03-05 14:36:11.819374: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2024-03-05 14:36:11.844503: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600
2024-03-05 14:36:11.845333: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2024-03-05 14:36:12.290942: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55fcb87e4280 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-03-05 14:36:12.290991: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2024-03-05 14:36:12.291001: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (1

INFO:tensorflow:Collective all_reduce tensors: 5 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:Collective all_reduce tensors: 5 all_reduces, num_devices = 2, group_size = 2, implementation = CommunicationImplementation.NCCL, num_packs = 1


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


Epoch 1, Loss: 26.423446655273438, Accuracy: 33.42791748046875, Test Loss: 0.0, Test Accuracy: 0.0
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


Epoch 2, Loss: 20.1297607421875, Accuracy: 25.465864181518555, Test Loss: 0.0, Test Accuracy: 0.0
Epoch 3, Loss: 19.281408309936523, Accuracy: 24.392627716064453, Test Loss: 0.0, Test Accuracy: 0.0
Epoch 4, Loss: 19.014102935791016, Accuracy: 24.05445671081543, Test Loss: 0.0, Test Accuracy: 0.0
Epoch 5, Loss: 18.92331314086914, Accuracy: 23.93960189819336, Test Loss: 0.0, Test Accuracy: 0.0
Epoch 6, Loss: 18.84199333190918, Accuracy: 23.83672523498535, Test Loss: 0.0, Test Accuracy: 0.0
Epoch 7, Loss: 18.776779174804688, Accuracy: 23.754222869873047, Test Loss: 0.0, Test Accuracy: 0.0
Epoch 8, Loss: 18.7154541015625, Accuracy: 23.676645278930664, Test Loss: 0.0, Test Accuracy: 0.0
Epoch 9, Loss: 18.688953399658203, Accuracy: 23.643117904663086, Test Loss: 0.0, Test Accuracy: 0.0
Epoch 10, Loss: 18.630285263061523, Accuracy: 23.568899154663086, Test Loss: 0.0, Test Accuracy: 0.0
Epoch 11, Loss: 18.58951759338379, Accuracy: 23.5173282623291, Test Loss: 0.0, Test Accuracy: 0.0
Epoch 12, 

KeyboardInterrupt: 

In [31]:
memory_multiplier * 28

112