# **Universal Function Approximation over Sets**
## **Experiment 2:** *Standard Deviation of a set of scalar values*
---

## Imports and Library Functions
---

In [None]:
# !pip install tensorboard-plugin-profile
# !pip install tensorflow==2.9.1
# !pip install scikit-learn
# !pip install matplotlib
# !pip install tqdm

In [None]:
# SYSTEM RELATED IMPORTS
import os
from pathlib import Path
import time

# MATH RELATED IMPORTS
import numpy as np
import itertools
from sklearn.model_selection import train_test_split

# TF/KERAS RELATED
import tensorflow as tf
import keras.backend as K
from keras.layers import Input, Dense, LSTM, GRU, Embedding, Lambda, serialize, deserialize, Attention
from keras.models import Model, load_model, clone_model
from tensorflow.keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

# MISC IMPORTS
from tqdm import tqdm,trange

## Basic Settings
---

In [None]:
datetime = int(time.time())
seed = datetime

## Global Experimental Parameters 
---

In [None]:
# TRAINING SET PARAMETERS
n_train = 100000 # number of training examples
max_train = 10 # maximum cardinality of a training set member

# TESTING SET PARAMETERS
n_test = 5000 # number of testing examples
min_test = 5 # minimum cardinality of a testing set member
max_test = 100 # maximum cardinality of a testing set member
step_test = 5 # interval through which cardinalities of set members are tested

# SET FUNCTION TO APPROXIMATE
# Maps an input set of variable size to a target label
#================================
labelling_function = np.std
#================================

# ELEMENT DISTRIBUTION
# How an individual element of a set is generated
#================================
input_range = (0,9)
element_generator = lambda: np.random.uniform(*input_range)
#================================

# REPEATABILITY
np.random.seed(seed)
tf.random.set_seed(seed)

# STORAGE & MISC
data_dir = '/tmp'
weights_dir = '/tmp'
logs_dir = '/tmp'
test_name = 'E2'

## Model Aggregation Strategies
---

In [None]:
class SummationAggregation(Lambda):  
    def __init__(self, function=lambda x: K.sum(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2])), mask=None, arguments=None, trainable=False, **kwargs):
      super(SummationAggregation, self).__init__(function, output_shape, mask=mask, arguments=arguments, trainable=trainable, **kwargs)

    @classmethod
    def get_layer_name(cls):
      return "Summation"

class ArithmeticMeanAggregation(Lambda):  
    def __init__(self, function=lambda x: K.mean(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2])), mask=None, arguments=None, trainable=False, **kwargs):
      super(ArithmeticMeanAggregation, self).__init__(function, output_shape, mask=mask, arguments=arguments, trainable=trainable, **kwargs)

    @classmethod
    def get_layer_name(cls):
      return "Arithmetic Mean"

class ProductAggregation(Lambda):  
    def __init__(self, function=lambda x: K.prod(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2])), mask=None, arguments=None, trainable=False, **kwargs):
      super(ProductAggregation, self).__init__(function, output_shape, mask=mask, arguments=arguments, trainable=trainable, **kwargs)

    @classmethod
    def get_layer_name(cls):
      return "Product"

class MaximumAggregation(Lambda):  
    def __init__(self, function=lambda x: K.max(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2])), mask=None, arguments=None, trainable=False, **kwargs):
      super(MaximumAggregation, self).__init__(function, output_shape, mask=mask, arguments=arguments, trainable=trainable, **kwargs)

    @classmethod
    def get_layer_name(cls):
      return "Maximum"

class MinimumAggregation(Lambda):  
    def __init__(self, function=lambda x: K.min(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2])), mask=None, arguments=None, trainable=False, **kwargs):
      super(MinimumAggregation, self).__init__(function, output_shape, mask=mask, arguments=arguments, trainable=trainable, **kwargs)

    @classmethod
    def get_layer_name(cls):
      return "Minimum"


## Model Hyper-parameters and Metrics
---

In [None]:
# Establish the list of aggregation layers to test with
aggregator_list = [SummationAggregation, ArithmeticMeanAggregation, ProductAggregation, MaximumAggregation, MinimumAggregation]

# HYPERPARAMETERS
hyper_parameters = {
    'aggregation': { # Aggregation layer properties
      SummationAggregation.get_layer_name(): {
          'optimizer_args': {'learning_rate':1e-3, 'epsilon': 1e-2}, 
      },
      ArithmeticMeanAggregation.get_layer_name(): {
          'optimizer_args': {'learning_rate':1e-3, 'epsilon': 1e-2},
      },
      ProductAggregation.get_layer_name(): {
          'optimizer_args': {'learning_rate':1e-3, 'epsilon': 1e-2},
      },
      MaximumAggregation.get_layer_name(): {
          'optimizer_args': {'learning_rate':1e-3, 'epsilon': 1e-2},
      },
      MinimumAggregation.get_layer_name(): {
          'optimizer_args': {'learning_rate':1e-3, 'epsilon': 1e-2},
      },  
    },
    'encoder': [Dense, Dense, Dense], # Encoder structure to be used by model: simple three layer NN
    'encoder_args': [{'units':100, 'activation':'tanh'}, {'units': 30, 'activation':'tanh'}, {'units': 10}], # Encoder arguments
    'decoder': [Dense], # Encoder structure to be used by model: simple layer 
    'decoder_args': [{'units': 1}], # Decoder arguments
    'p_validation': 0.15, # Proportion of training set used for validation
    'n_epochs': 20, # Number of training epochs
    'n_batch': 128, # Batch size for training / evaluation
    'optimizer': Adam, # Optimizer
    'loss': 'mae' # Loss function   
}

## Helper methods for saving files
---

In [None]:
# Create a filename for the training set based on experimental configurations
def training_set_filenames():
  # Reusing an existing training dataset depends on the following arguments
  filename_dataset = '{}/{}_training_dataset.npy'.format(data_dir, test_name)
  filename_labels = '{}/{}_training_labels.npy'.format(data_dir, test_name)
  return filename_dataset, filename_labels


# Create a filename for a testing set based on experimental configurations
def testing_set_filenames(length):
  # Reusing an existing testing dataset depends on the following arguments
  filename_dataset = '{}/{}_testing_dataset_{}.npy'.format(data_dir, test_name, length)
  filename_labels = '{}/{}_testing_labels_{}.npy'.format(data_dir, test_name, length)
  return filename_dataset, filename_labels


# Create a filename for model weights based on experimental configurations
def weights_filename(layer):
  # Reusing an existing weight file depends on the following arguments
  return '{}/{}_weights_({}).hdf5'.format(weights_dir, test_name, layer)


# Create a filename for the results stored in log
def log_filename():
  return '{}/{}_instance_({}).json'.format(logs_dir, test_name, seed) 

## Log of Results
---

In [None]:
instance_log = {layer.get_layer_name(): {
    'loss_per_training_epoch': {},
    'performance_per_set_size': {},
    'training_time': None,
    'evaluation_time_per_set_size':{}
} for layer in aggregator_list}

## Helper methods for timekeeping
---

In [None]:
# Simple timer to monitor training / evaluation times
class Timer:
  def start(self):
    self._time = time.time()

  def elapsed(self):
    return time.time() - self._time 

## Training/Testing Dataset Generation Methods
---

In [None]:
def create_train_data(num_examples, length):

  # Start with an empty list of examples and labels
  X = []
  X_labels = []
  
  # For the desired number of training examples
  for i in tqdm(range(num_examples), desc='Creating training examples of maximum length {}: '.format(length)):
    
    # Generate a random set cardianality
    n = np.random.randint(1, length)

    # Generate a random set and add to list of examples
    target_set = [element_generator() for _ in range(n)]
    target_label = labelling_function(target_set)
    X.append(target_set)
    X_labels.append(target_label)

  return tf.ragged.constant(X), tf.constant(X_labels)

def gen_test_data(n_examples, length):
    # Start with an empty list of examples and labels
    X = []
    X_labels = []
    
    # For the desired number of training examples
    for i in tqdm(range(n_examples), desc='Creating testing examples of length {}: '.format(length)):

      # Generate a random set and add to list of examples
      target_set = [element_generator() for _ in range(length)]
      target_label = labelling_function(target_set)
      X.append(target_set)
      X_labels.append(target_label)

    return tf.ragged.constant(X), tf.constant(X_labels)

## Training Dataset Creation
---

In [None]:
# Create training sets
X_train, label_X_train = None, None

# Get filenames for storing training data
filename_dataset, filename_labels = training_set_filenames()

# Determine if a saved set already exists, else create one
temp_dataset, temp_labels = None, None
data_file, label_file = Path(filename_dataset), Path(filename_labels)
temp_dataset, temp_labels = create_train_data(n_train, max_train)
np.save(filename_dataset, temp_dataset.numpy())
np.save(filename_labels, temp_labels.numpy())

X_train, label_X_train = temp_dataset, temp_labels 



## Testing Dataset Creation
---

In [None]:
# Create a collection of testing sets for each desired set length
testing_collection = {}
for l in range(min_test, max_test+1, step_test):
  
  temp_dataset, temp_labels = None, None
  filename_dataset, filename_labels = testing_set_filenames(l)

  # Determine if a saved set already exists, else create one
  data_file, label_file = Path(filename_dataset), Path(filename_labels)
  temp_dataset, temp_labels = gen_test_data(n_test, l)
  np.save(filename_dataset, temp_dataset.numpy())
  np.save(filename_labels, temp_labels.numpy())
  
  # Add to the testing collection
  testing_collection[l] = (temp_dataset, temp_labels)

## Model Creation
---

In [None]:
# Method to create an encoder-decoder style model with a variable aggregation layer
def build_model(aggregator):
  
  # Input is retrieved: ragged inputs used due to variable operand lengths
  input = Input(shape=[None], ragged=True)
  x = tf.expand_dims(input, axis=2)

  # The following layers all comprise the 'encoder' function of the model  
  encoder_layers = hyper_parameters.get('encoder')
  encoder_layer_args = hyper_parameters.get('encoder_args')
  encoder_sequence = [layer(**layer_arg) for layer, layer_arg in zip(encoder_layers, encoder_layer_args)]
  for layer in encoder_sequence:
    x = layer(x)
  encoded = x

  # The elements of the input have now been mapped to some element in a latent space
  # Such latent embeddings are now embedded according to the desired aggregation strategy
  x = aggregator(encoded)

  # The aggregation is decoded to produce the resultant output
  decoder_layers = hyper_parameters.get('decoder')
  decoder_layer_args = hyper_parameters.get('decoder_args')
  decoder_sequence = [layer(**layer_arg) for layer, layer_arg in zip(decoder_layers, decoder_layer_args)]
  for layer in decoder_sequence:
    x = layer(x)
  decoded = x

  # The model is returned
  return Model(inputs=input, outputs=decoded)

# Helper method to copy the weights of one model and set them in another
def duplicate_weights(out_model, in_model):
  for out_layer, in_layer in zip(out_model.layers,in_model.layers):
    in_layer.set_weights(out_layer.get_weights())
  return in_model

# Wrapper to produce a custom object to register with each model
def get_custom_object(in_layer):
  return {in_layer.__name__: in_layer}

## Model Training
---

In [None]:
# Create a timer
training_timer = Timer()

# load dataset for this iteration
filename_dataset, filename_labels = training_set_filenames()
X_train = tf.ragged.constant(np.load(filename_dataset, allow_pickle=True))
label_X_train = tf.constant(np.load(filename_labels, allow_pickle=True))

# Create validation set, true training set, from training set
# This is given a pre-established validation proportion
n_train_total, _ = X_train.shape
I_train, I_val = train_test_split(range(n_train_total), test_size=hyper_parameters.get('p_validation'))

X_train_partial = tf.gather(X_train, indices=I_train)
X_val = tf.gather(X_train, indices=I_val)

label_X_train_partial = tf.gather(label_X_train, indices=I_train)
label_X_val = tf.gather(label_X_train, indices=I_val)

# For each type of aggregation in consideration
for agg_layer in aggregator_list:

  # Get the name of the layer
  layer_name = agg_layer.get_layer_name()

  # Create the relevant model with desired aggregation
  model = build_model(aggregator=agg_layer())
  
  # Register the custom object (aggregation layer)
  custom_objects = get_custom_object(agg_layer)
  with tf.keras.utils.custom_object_scope(custom_objects):

      # Compile the model, set optimizer and loss function
      model = tf.keras.models.clone_model(model)
      v_opt_args = hyper_parameters.get('aggregation').get(layer_name).get('optimizer_args')
      v_opt = hyper_parameters.get('optimizer')(**v_opt_args)
      v_los = hyper_parameters.get('loss')
      model.compile(loss=v_los, optimizer=v_opt)

      # Train and save weights if they don't already exist
      filename = weights_filename(layer_name)
      weight_file = Path(filename)
      if not weight_file.is_file():
        print("Training commencing with aggregation layer: {}...".format(layer_name))

        # Checkpoint desirable weights based on validation loss
        checkpointer = ModelCheckpoint(
            filepath=filename, 
            verbose=0, 
            save_best_only=True
        )

        # start timer
        training_timer.start()
    
        # =======================================
        # pre-training fitness
        epoch_0 = model.evaluate(x=X_val, y=label_X_val, batch_size=hyper_parameters.get('n_batch'))
        
        # Fit model
        history = model.fit(
            x=X_train_partial, 
            y=label_X_train_partial, 
            epochs=hyper_parameters.get('n_epochs'), 
            batch_size=hyper_parameters.get('n_batch'),
            shuffle=True, 
            validation_data=(X_val, label_X_val),
            callbacks=[
                checkpointer, 
                #tboard_callback
            ],
            verbose=1
        )
        # =======================================

        # Gather elapsed time
        elapsed = training_timer.elapsed()
        instance_log[layer_name]['training_time'] = elapsed
        
        # save training metrics
        for epoch, value in enumerate([epoch_0] + history.history['val_loss']):     
            instance_log[layer_name]['loss_per_training_epoch'][epoch] = value

## Performance Measures
---

In [None]:
# Accuracy
def acc(pred, labels):
  return 1.0 * np.sum(np.squeeze(np.round(preds)) == labels) / len(labels)

# Mean Absolute Error 
def mae(pred, labels):
  diff_vector = np.abs(np.squeeze(pred) - labels)
  return np.sum(diff_vector) / len(labels)

# Root Mean Squared Error 
def rmse(pred, labels):
  diff_vector = np.squeeze(pred) - labels
  return np.sqrt(np.dot( diff_vector, diff_vector) / len(labels))

performance_metrics = [mae, rmse, acc]

## Model Evaluation
---

In [None]:
lengths = range(min_test, max_test+1, step_test)

# Create a timer
testing_timer = Timer()
  
# For each aggregation layer
for agg_layer in aggregator_list:
  
  layer_name = agg_layer.get_layer_name()
  print("Prediction commencing with aggregation layer: {}".format(layer_name))
  
  # Add performance metrics
  instance_log[layer_name]['performance_per_set_size'] = {met.__name__: {} for met in performance_metrics}
  
  # Determine, for sets of a particular length
  for l in lengths:
    print('Evaluating at length: ', l)
    K.clear_session()
    
    # Gather test data
    filename_dataset, filename_labels = testing_set_filenames(l)
    X_test = np.load(filename_dataset, allow_pickle=True)
    label_X_test = np.load(filename_labels, allow_pickle=True)

    # Build model
    model = build_model(aggregator=agg_layer())

    # Load weights as determined through training
    filename = weights_filename(layer_name)
    
    custom_objects = get_custom_object(agg_layer)
    with tf.keras.utils.custom_object_scope(custom_objects):
      
      # Allocate weights for future computation
      temp_model = load_model(filename)
      duplicate_weights(temp_model, model)

      # Start timer
      testing_timer.start()

      # Perform prediction 
      preds = model.predict(
          X_test, 
          batch_size=hyper_parameters.get('n_batch'), 
          verbose=1
      )

      # Gather elapsed time
      elapsed = testing_timer.elapsed()
      instance_log[layer_name]['evaluation_time_per_set_size'][l] = elapsed

    # Add to monitored metric log
    for met in performance_metrics:
      performance = met(preds, label_X_test)
      instance_log[layer_name]['performance_per_set_size'][met.__name__][l] = performance

## Saving Results
---

In [None]:
import json

# Save the performance results
output_filename = log_filename()
with open(output_filename, 'w') as handle:
    json.dump(instance_log, handle)


In [None]:
# Remove temporary data / weight files
for d, f in [(data_dir, data) for data in os.listdir(data_dir)] + [(weights_dir, weight) for weight in os.listdir(weights_dir)]:
    path = "{}/{}".format(d,f)
    os.remove(path)

In [None]:
# Helper methods to delete observations
# os.remove(result_filename())
# os.remove(training_results_filename())