<a href="https://colab.research.google.com/github/Adrian-Rae-19004029/Universal-function-approximation-over-sets/blob/main/Experiments/E2%20-%20Standard%20Deviation%20of%20Textual%20Elements/E2_STD_of_Scalar_Elements.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Universal Function Approximation over Sets**
## **Experiment 2:** *Population standard deviation of a set of scalar values*
---

## Imports and Library Functions
---

In [None]:
# SYSTEM RELATED IMPORTS
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
from pathlib import Path
import time
!pip install Pympler
from pympler import asizeof
get_alloc = asizeof.asizeof

# MATH RELATED IMPORTS
import numpy as np
import itertools
from sklearn.model_selection import train_test_split

# VIS RELATED IMPORTS
import matplotlib
import matplotlib.pyplot as plt

# TF/KERAS RELATED
!pip install tensorflow==2.9.1
import tensorflow as tf
import keras.backend as K
from keras.layers import Input, Dense, LSTM, GRU, Embedding, Lambda, serialize, deserialize, Attention
from keras.models import Model, load_model, clone_model
from tensorflow.keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

# MISC IMPORTS
from tqdm import tqdm,trange

## Basic Settings
---

In [None]:
datetime = int(time.time())
seed = datetime
write_results = True

## Global Experimental Parameters 
---

In [None]:
# TRAINING SET PARAMETERS
n_train = 100000 # number of training examples
max_train = 10 # maximum cardinality of a training set member

# TESTING SET PARAMETERS
n_test = 5000 # number of testing examples
min_test = 5 # minimum cardinality of a testing set member
max_test = 100 # maximum cardinality of a testing set member
step_test = 5 # interval through which cardinalities of set members are tested

# SET FUNCTION TO APPROXIMATE
# Maps an input set of variable size to a target label
#================================
labelling_function = np.std
#================================

# ELEMENT DISTRIBUTION
# How an individual element of a set is generated
#================================
input_range = (0,9)
element_generator = lambda: np.random.uniform(*input_range)
#================================


# REPEATABILITY
np.random.seed(seed)
tf.random.set_seed(seed)

# STORAGE & MISC
data_path = '/tmp'
weight_path = '/tmp'
result_path = '/content/drive/MyDrive/FAOS_results'
timing_path = result_path
test_name = 'E2'
always_regenerate_data = True
always_regenerate_weights = True
base_verbose = 1
config_hash = "Final"
plot_scale = 1.6
plot_text = 14

## Model Aggregation Strategies
---

In [None]:
class SummationAggregation(Lambda):  
    def __init__(self, function=lambda x: K.sum(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2])), mask=None, arguments=None, trainable=False, **kwargs):
      super(SummationAggregation, self).__init__(function, output_shape, mask=mask, arguments=arguments, trainable=trainable, **kwargs)

    @classmethod
    def get_layer_name(cls):
      return "Summation"

class ArithmeticMeanAggregation(Lambda):  
    def __init__(self, function=lambda x: K.mean(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2])), mask=None, arguments=None, trainable=False, **kwargs):
      super(ArithmeticMeanAggregation, self).__init__(function, output_shape, mask=mask, arguments=arguments, trainable=trainable, **kwargs)

    @classmethod
    def get_layer_name(cls):
      return "Arithmetic Mean"

class ProductAggregation(Lambda):  
    def __init__(self, function=lambda x: K.prod(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2])), mask=None, arguments=None, trainable=False, **kwargs):
      super(ProductAggregation, self).__init__(function, output_shape, mask=mask, arguments=arguments, trainable=trainable, **kwargs)

    @classmethod
    def get_layer_name(cls):
      return "Hadamard Product"

class MaximumAggregation(Lambda):  
    def __init__(self, function=lambda x: K.max(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2])), mask=None, arguments=None, trainable=False, **kwargs):
      super(MaximumAggregation, self).__init__(function, output_shape, mask=mask, arguments=arguments, trainable=trainable, **kwargs)

    @classmethod
    def get_layer_name(cls):
      return "Maximum"

class MinimumAggregation(Lambda):  
    def __init__(self, function=lambda x: K.min(x, axis=1), output_shape=(lambda shape: (shape[0], shape[2])), mask=None, arguments=None, trainable=False, **kwargs):
      super(MinimumAggregation, self).__init__(function, output_shape, mask=mask, arguments=arguments, trainable=trainable, **kwargs)

    @classmethod
    def get_layer_name(cls):
      return "Minimum"

# class GeometricMeanAggregation(Lambda):  
#     def __init__(self, function=lambda x: K.exp(K.mean(K.log(K.cast(x, tf.float64)), axis=1)), output_shape=(lambda shape: (shape[0], shape[2])), mask=None, arguments=None, trainable=False, **kwargs):
#       super(GeometricMeanAggregation, self).__init__(function, output_shape, mask=mask, arguments=arguments, trainable=trainable, **kwargs)

#     classmethod
#     def get_layer_name(cls):
#       return "Geometric Mean"

# class SelfAttentionAggregation(Lambda):  
#     def __init__(self, function=None, output_shape=None, mask=None, arguments=None, trainable=None, **kwargs):
#       def self_attention(X):
#         scores = tf.matmul(X,X,transpose_b=True)
#         distribution = tf.nn.softmax(scores)
#         return tf.matmul(distribution, X)
      
#       f = lambda x: K.sum(self_attention(x), axis=1)
#       sh = (lambda shape: (shape[0], shape[2]))
#       super(SelfAttentionAggregation, self).__init__(f, sh, mask=None, arguments=arguments, trainable=False, **kwargs)

#     classmethod
#     def get_layer_name(cls):
#       return "Scaled Dot Product Self Attention"


## Model Hyper-parameters and Metrics
---

In [None]:
# Establish the list of aggregation layers to test with
aggregator_list = [SummationAggregation, ArithmeticMeanAggregation, ProductAggregation, MaximumAggregation, MinimumAggregation]

# HYPERPARAMETERS
hyper_parameters = {
    'aggregation': { # Aggregation layer properties
      'Summation': {
          'optimizer_args': {'learning_rate':1e-3, 'epsilon': 1e-2}, 
          'color': 'g',
          'style': 'go-'
      },
      'Arithmetic Mean': {
          'optimizer_args': {'learning_rate':1e-3, 'epsilon': 1e-2},
          'color': 'r',
          'style': 'ro-'
      },
      'Hadamard Product': {
          'optimizer_args': {'learning_rate':1e-3, 'epsilon': 1e-2},
          'color': 'b',
          'style': 'bo-'
      },
      'Maximum': {
          'optimizer_args': {'learning_rate':1e-3, 'epsilon': 1e-2},
          'color': 'y',
          'style': 'yo-'
      },
      'Minimum': {
          'optimizer_args': {'learning_rate':1e-3, 'epsilon': 1e-2},
          'color': 'm',
          'style': 'mo-'
      },  
    },
    'encoder': [Dense, Dense, Dense], # Encoder structure to be used by model: simple three layer NN
    'encoder_args': [{'units': 100}, {'units': 30}, {'units': 10}], # Encoder arguments
    'decoder': [Dense], # Encoder structure to be used by model: simple layer 
    'decoder_args': [{'units': 1}], # Decoder arguments
    'p_validation': 0.15, # Proportion of training set used for validation
    'n_epochs': 50, # Number of training epochs
    'n_batch': 128, # Batch size for training / evaluation
    'optimizer': Adam, # Optimizer
    'loss': 'mae' # Loss function   
}

# Simple method to extract a hyper-parameter
def get_hyperparameter(*args):
  src = hyper_parameters
  try:
    for item in args:
      src = src.get(item)
  except Exception:
    src = None
  return src

## Helper methods for saving files
---

In [None]:
# Create a unique hash of a collection of items
def hash_elements(*args):
  n = len(args)
  str_placements = n * '{}'
  str_args = [str(arg) for arg in args]
  plainstring = str_placements.format(*str_args)
  return hash(plainstring)

# Create a filename for the training set based on experimental configurations
def training_set_filenames():
  # Reusing an existing training dataset depends on the following arguments
  depends_on = [seed, max_train, n_train, labelling_function]
  
  # Create a hash of depends
  identifier = hash_elements(*depends_on)
  filename_dataset = '{}/{}_training_dataset_({}).npy'.format(data_path, test_name, identifier)
  filename_labels = '{}/{}_training_labels_({}).npy'.format(data_path, test_name, identifier)
  return filename_dataset, filename_labels

# Create a filename for a testing set based on experimental configurations
def testing_set_filenames(length):
  # Reusing an existing testing dataset depends on the following arguments
  depends_on = [length, seed, n_test, labelling_function]
  
  identifier = hash_elements(*depends_on)
  filename_dataset = '{}/{}_testing_dataset_({}).npy'.format(data_path, test_name, identifier)
  filename_labels = '{}/{}_testing_labels_({}).npy'.format(data_path, test_name, identifier)
  return filename_dataset, filename_labels

# Create a filename for model weights based on experimental configurations
def weight_filename(layer):
  # Reusing an existing weight file depends on the following arguments
  depends_on = [layer, seed, max_train, n_train, labelling_function]
  identifier = hash_elements(*depends_on)
  return '{}/{}_weights_({}).hdf5'.format(weight_path, test_name, identifier)

# Create a filename for the result set based on experimental configurations
def result_filename():
  return '{}/{}_results_({}).npy'.format(result_path, test_name, config_hash)

# Create a filename for the timesheet based on experimental configurations
def timesheet_filename():
  return '{}/{}_times_({}).npy'.format(result_path, test_name, config_hash)

## Helper methods for timekeeping
---

In [None]:
# Simple timer to monitor training / evaluation times
class Timer:
  def start(self):
    self._time = time.time()

  def elapsed(self):
    return time.time() - self._time

# Timekeeping metrics
timesheet = {layer.get_layer_name(): {'training': None, 'testing':{}} for layer in aggregator_list} 

## Training/Testing Dataset Generation Methods
---

In [None]:
def create_train_data(num_examples, length):

  # Start with an empty list of examples and labels
  X = []
  X_labels = []
  
  # For the desired number of training examples
  for i in tqdm(range(num_examples), desc='Creating training examples of maximum length {}: '.format(length)):
    
    # Generate a random set cardianality
    n = np.random.randint(1, length)

    # Generate a random set and add to list of examples
    target_set = [element_generator() for _ in range(n)]
    target_label = labelling_function(target_set)
    X.append(target_set)
    X_labels.append(target_label)

  return tf.ragged.constant(X), tf.constant(X_labels)

def gen_test_data(n_examples, length):
    # Start with an empty list of examples and labels
    X = []
    X_labels = []
    
    # For the desired number of training examples
    for i in tqdm(range(n_examples), desc='Creating testing examples of length {}: '.format(length)):

      # Generate a random set and add to list of examples
      target_set = [element_generator() for _ in range(length)]
      target_label = labelling_function(target_set)
      X.append(target_set)
      X_labels.append(target_label)

    return tf.ragged.constant(X), tf.constant(X_labels)

## Training Dataset Creation
---

In [None]:
# Create training sets
X_train, label_X_train = None, None

# Get filenames for storing training data
filename_dataset, filename_labels = training_set_filenames()

# Determine if a saved set already exists, else create one
temp_dataset, temp_labels = None, None
data_file, label_file = Path(filename_dataset), Path(filename_labels)
if not data_file.is_file() or not label_file.is_file() or always_regenerate_data:
  temp_dataset, temp_labels = create_train_data(n_train, max_train)
  np.save(filename_dataset, temp_dataset.numpy())
  np.save(filename_labels, temp_labels.numpy())

X_train, label_X_train = temp_dataset, temp_labels 

## Testing Dataset Creation
---

In [None]:
# Create a collection of testing sets for each desired set length
testing_collection = {}
for l in range(min_test, max_test+1, step_test):
  
  temp_dataset, temp_labels = None, None
  filename_dataset, filename_labels = testing_set_filenames(l)

  # Determine if a saved set already exists, else create one
  data_file, label_file = Path(filename_dataset), Path(filename_labels)
  if not data_file.is_file() or not label_file.is_file() or always_regenerate_data:
    temp_dataset, temp_labels = gen_test_data(n_test, l)
    np.save(filename_dataset, temp_dataset.numpy())
    np.save(filename_labels, temp_labels.numpy())
  
  # Add to the testing collection
  testing_collection[l] = (temp_dataset, temp_labels)

## Model Creation
---

In [None]:
# Method to create an encoder-decoder style model with a variable aggregation layer
def build_model(aggregator):
  
  # Input is retrieved: ragged inputs used due to variable operand lengths
  input = Input(shape=[None], ragged=True)
  x = tf.expand_dims(input, axis=2)

  # The following layers all comprise the 'encoder' function of the model  
  encoder_layers = get_hyperparameter('encoder')
  encoder_layer_args = get_hyperparameter('encoder_args')
  encoder_sequence = [layer(**layer_arg) for layer, layer_arg in zip(encoder_layers, encoder_layer_args)]
  for layer in encoder_sequence:
    x = layer(x)
  encoded = x

  # The elements of the input have now been mapped to some element in a latent space
  # Such latent embeddings are now embedded according to the desired aggregation strategy
  x = aggregator(encoded)

  # The aggregation is decoded to produce the resultant output
  decoder_layers = get_hyperparameter('decoder')
  decoder_layer_args = get_hyperparameter('decoder_args')
  decoder_sequence = [layer(**layer_arg) for layer, layer_arg in zip(decoder_layers, decoder_layer_args)]
  for layer in decoder_sequence:
    x = layer(x)
  decoded = x

  # The model is returned
  return Model(inputs=input, outputs=decoded)

# Helper method to copy the weights of one model and set them in another
def duplicate_weights(out_model, in_model):
  for out_layer, in_layer in zip(out_model.layers,in_model.layers):
    in_layer.set_weights(out_layer.get_weights())
  return in_model

# Wrapper to produce a custom object to register with each model
def get_custom_object(in_layer):
  return {in_layer.__name__: in_layer}

## Model Training
---

In [None]:
# Create a timer
training_timer = Timer()

# load dataset for this iteration
filename_dataset, filename_labels = training_set_filenames()
X_train = tf.ragged.constant(np.load(filename_dataset, allow_pickle=True))
label_X_train = tf.constant(np.load(filename_labels, allow_pickle=True))

# Create validation set, true training set, from training set
# This is given a pre-established validation proportion
n_train_total, _ = X_train.shape
I_train, I_val = train_test_split(range(n_train_total), test_size=get_hyperparameter('p_validation'))

X_train_partial = tf.gather(X_train, indices=I_train)
X_val = tf.gather(X_train, indices=I_val)

label_X_train_partial = tf.gather(label_X_train, indices=I_train)
label_X_val = tf.gather(label_X_train, indices=I_val)

# For each type of aggregation in consideration
for agg_layer in aggregator_list:

  # Get the name of the layer
  layer_name = agg_layer.get_layer_name()

  # Create the relevant model with desired aggregation
  model = build_model(aggregator=agg_layer())
  
  # Register the custom object (aggregation layer)
  custom_objects = get_custom_object(agg_layer)
  with tf.keras.utils.custom_object_scope(custom_objects):

      # Compile the model, set optimizer and loss function
      model = tf.keras.models.clone_model(model)
      v_opt_args = get_hyperparameter('aggregation',layer_name,'optimizer_args')
      v_opt = get_hyperparameter('optimizer')(**v_opt_args)
      v_los = get_hyperparameter('loss')
      model.compile(loss=v_los, optimizer=v_opt)

      # Train and save weights if they don't already exist
      filename = weight_filename(layer_name)
      weight_file = Path(filename)
      if not weight_file.is_file() or always_regenerate_weights:
        print("Training commencing with aggregation layer: {}...".format(layer_name))

        # Checkpoint desirable weights based on validation loss
        checkpointer = ModelCheckpoint(
            filepath=filename, 
            verbose=0, 
            save_best_only=True
        )

        # adapt learning rate near loss landscape plateau
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, verbose=1, patience=20, min_lr=0.000001)

        # start timer
        training_timer.start()

        # Fit model
        model.fit(
            x=X_train_partial, 
            y=label_X_train_partial, 
            epochs=get_hyperparameter('n_epochs'), 
            batch_size=get_hyperparameter('n_batch'),
            shuffle=True, 
            validation_data=(X_val, label_X_val),
            callbacks=[checkpointer, reduce_lr],
            verbose=base_verbose
        )

        # Gather elapsed time
        elapsed = training_timer.elapsed()
        timesheet[layer_name]['training'] = elapsed

## Performance Measures
---

In [None]:
# Mean Absolute Error 
def mae(pred, labels):
  diff_vector = np.abs(np.squeeze(pred) - labels)
  return np.sum(diff_vector) / len(labels)

# Root Mean Squared Error 
def rmse(pred, labels):
  diff_vector = np.squeeze(pred) - labels
  return np.sqrt(np.dot( diff_vector, diff_vector) / len(labels))

performance_metrics = [mae, rmse]

## Model Evaluation
---

In [None]:
lengths = range(min_test, max_test+1, step_test)

# Create a timer
testing_timer = Timer()

# Lookup of prescribed metrics
metrics = {in_layer.get_layer_name(): {met.__name__: {} for met in performance_metrics} for in_layer in aggregator_list}
  
# For each aggregation layer
for agg_layer in aggregator_list:
  
  layer_name = agg_layer.get_layer_name()
  print("Prediction commencing with aggregation layer: {}".format(layer_name))
  
  # Determine, for sets of a particular length
  for l in lengths:
    print('Evaluating at length: ', l)
    K.clear_session()
    
    # Gather test data
    filename_dataset, filename_labels = testing_set_filenames(l)
    X_test = np.load(filename_dataset, allow_pickle=True)
    label_X_test = np.load(filename_labels, allow_pickle=True)

    # Build model
    model = build_model(aggregator=agg_layer())

    # Load weights as determined through training
    filename = weight_filename(layer_name)
    
    custom_objects = get_custom_object(agg_layer)
    with tf.keras.utils.custom_object_scope(custom_objects):
      
      # Allocate weights for future computation
      temp_model = load_model(filename)
      duplicate_weights(temp_model, model)

      # Start timer
      testing_timer.start()

      # Perform prediction 
      preds = model.predict(
          X_test, 
          batch_size=get_hyperparameter('n_batch'), 
          verbose=base_verbose
      )

      # Gather elapsed time
      elapsed = testing_timer.elapsed()
      timesheet[layer_name]['testing'][l] = elapsed

    # Add to monitored metric log
    for met in performance_metrics:
      metrics[layer_name][met.__name__][l] = met(preds, label_X_test)

## Saving Results
---

In [None]:
if write_results:
  
  # Save the performance results
  output_filename = result_filename()
  result_file = Path(output_filename)
  if not result_file.is_file():
    np.save(output_filename, np.array([metrics]))
  else:
    result_log = np.load(output_filename, allow_pickle=True)
    result_log = np.append(result_log, metrics)
    np.save(output_filename, result_log)

  # Save the timesheet results
  output_filename = timesheet_filename()
  time_file = Path(output_filename)
  if not time_file.is_file():
    np.save(output_filename, np.array([timesheet]))
  else:
    time_log = np.load(output_filename, allow_pickle=True)
    time_log = np.append(time_log, timesheet)
    np.save(output_filename, time_log)

## Result Visualisation
---

In [None]:
import math
def plot_results(metric, xlabel='Set Cardinality', ylabel='', title='', ylim=None, added_operations=lambda plt: None):
  font = {'size': plot_text}
  matplotlib.rc('font', **font)
  scale = plot_scale
  plt.figure(figsize=(10*scale, 8*scale))

  y_max = -math.inf
  for agg_layer in aggregator_list:
    layer_name = agg_layer.get_layer_name()
    style = get_hyperparameter('aggregation',layer_name,'style')
    color = get_hyperparameter('aggregation',layer_name,'color')
    x = np.array(list(metrics[layer_name][metric].keys()))
    y = np.array(list(metrics[layer_name][metric].values()))
    y_max = max(y_max, np.max(y))
    plt.plot(x, y, style, label=layer_name)

  plt.axvspan(1, max_train, color='C1', alpha=0.3, label='Training Set Size Threshold')

  plt.grid()
  plt.xlabel(xlabel)
  plt.ylabel(ylabel)
  plt.title("{} [n={}]".format(title, n_test))
  if ylim is not None:
    plt.ylim(*ylim)
  plt.xlim(min_test - step_test, max_test + step_test)
  plt.legend()
  plt.tight_layout()
  added_operations(plt)
  plt.show()

def plot_testing_times(xlabel='Set Cardinality', ylabel='', title='', ylim=None, added_operations=lambda plt: None):
  font = {'size': plot_text}
  matplotlib.rc('font', **font)
  scale = plot_scale
  plt.figure(figsize=(10*scale, 8*scale))

  y_max = -math.inf
  for agg_layer in aggregator_list:
    layer_name = agg_layer.get_layer_name()
    color = get_hyperparameter('aggregation',layer_name,'color')
    point_style = "{}o".format(color)
    line_style = "{}-".format(color)
    x = np.array(list(timesheet[layer_name]['testing'].keys()))
    y = np.array(list(timesheet[layer_name]['testing'].values()))
    y_max = max(y_max, np.max(y))
    m, b = np.polyfit(x,y,1)
    yhat = m * x + b

    plt.plot(x, y, point_style, markersize=0.8)
    plt.plot(x, yhat, line_style, label=layer_name)

  plt.grid()
  plt.xlabel(xlabel)
  plt.ylabel(ylabel)
  plt.title("{} [n={}]".format(title,n_test))
  if ylim is not None:
    plt.ylim(*ylim)
  plt.xlim(min_test - step_test, max_test + step_test)
  plt.legend()
  plt.tight_layout()
  added_operations(plt)
  plt.show()

def plot_training_times(xlabel='', ylabel='', title='', ylim=None, added_operations=lambda plt: None):
  font = {'size': plot_text}
  matplotlib.rc('font', **font)
  scale = plot_scale
  plt.figure(figsize=(10*scale, 8*scale))

  obs = {layer.get_layer_name(): timesheet[layer.get_layer_name()]['training'] for layer in aggregator_list}
  x = obs.keys()
  y = obs.values()
  y_min, y_max = np.min(list(y)), np.max(list(y))

  plt.grid()
  plt.bar(x, y)
  plt.xlabel(xlabel)
  plt.ylabel(ylabel)
  plt.title("{} [n={}]".format(title,n_train))
  plt.ylim(0.95 * y_min, 1.05 * y_max)
  if ylim is not None:
    plt.ylim(*ylim)
  plt.tight_layout()
  added_operations(plt)
  plt.show()

## Model MAE
---

In [None]:
plot_results(metric='mae', ylabel='MAE', title='Model Mean Absolute Error per Testing Set Cardinality')

## Model RMSE
---

In [None]:
plot_results(metric='rmse', ylabel='RMSE', title='Model Root Mean Squared Error per Testing Set Cardinality')

## Model Training Times
---

In [None]:
# plot_training_times(title="Training Period per Model", ylabel="Training Period (s)")

## Model Evaluation Times
---

In [None]:
# plot_testing_times(title="Evaluation Times per Testing Set Cardinality [n={}]".format(n_test), ylabel="Evaluation Time (s)")

## Analysis of Multiple Experiments
---

In [None]:
# Method to summarise experimental observations in a tabular format to some prescribed degree of accuracy
from prettytable import PrettyTable
def print_summary_statistics(x, Y, model=None, places=3):
  n_experiments, n_classes = Y.shape
  N = n_experiments * n_classes

  if model is not None:
    print("[Statistics for Model: {}]".format(model))

  x_obs = np.array([x for _ in range(n_experiments)]).flatten()
  y_obs = Y.flatten()
  rho = np.round(np.corrcoef(x_obs, y_obs)[0, 1],places)

  headers = ['Basic Measure','Value']
  table = PrettyTable(headers)
  table.add_row(['Total Experiments', n_experiments])
  table.add_row(['Total Observations', N])
  table.add_row(['Correlation', rho])
  print(table)
  
  measures = {
    'Mean': np.mean(Y, axis=0),
    'Min': np.min(Y, axis=0),
    'Max': np.max(Y, axis=0),
    'Std': np.std(Y, axis=0),    
  }
  
  headers = ['Aggregate Measure'] + ["length {}".format(l) for l in x]
  content = [[measure] + list(np.round(measures[measure],places)) for measure in measures.keys()]
  table = PrettyTable(headers)
  for row in content:
    table.add_row(row)

  print(table)

In [None]:
# Method to plot performance metric as a function of set cardinality
def plot_aggregate_metrics(metric, xlabel='Set Cardinality', ylabel='', title='', ylim=None, verbose=False, added_operations=lambda plt: None):
  result_set = result_filename()
  observations = np.load(result_set, allow_pickle=True)
  n_observations = len(observations)

  font = {'size': plot_text}
  matplotlib.rc('font', **font)

  scale = plot_scale
  plt.figure(figsize=(10*scale, 8*scale))

  for agg_layer in aggregator_list:
    layer_name = agg_layer.get_layer_name()
    color = get_hyperparameter('aggregation',layer_name,'color')
    point_style = "{}o".format(color)
    line_style = "{}-".format(color)
    
    x = np.array([list(observations[iter][layer_name][metric].keys()) for iter in range(n_observations)])
    y = np.array([list(observations[iter][layer_name][metric].values()) for iter in range(n_observations)])
    rho = np.round(np.corrcoef(x.flatten(), y.flatten())[0, 1], 5)
    label = "{} [ρ={}]".format(layer_name, rho)
    if verbose:
      print_summary_statistics(x[0, :], y, model=label, places=5)

    plt.plot(x, y, point_style, markersize=0.8)

    xflat = x[0, :]
    m, b = np.polyfit(x.flatten(),y.flatten(),1)
    yhat = m * xflat + b
    plt.plot(xflat, yhat, line_style, label=label)

  plt.axvspan(1, max_train, color='C1', alpha=0.3, label='Training Set Size Threshold')

  plt.grid()
  plt.xlabel(xlabel)
  plt.ylabel(ylabel)
  plt.title("{} [N={}, n={}]".format(title,n_observations,n_test))
  if ylim is not None:
    plt.ylim(*ylim)
  plt.xlim(min_test - step_test, max_test + step_test)
  plt.legend()
  plt.tight_layout()
  added_operations(plt)
  plt.show()

In [None]:
# Method to create a boxplot of training times for each model
def plot_aggregate_training_times(xlabel='', ylabel='', title='', added_operations=lambda plt: None):  
  result_set = timesheet_filename()
  observations = np.load(result_set, allow_pickle=True)
  n_observations = len(observations)

  font = {'size': plot_text}
  matplotlib.rc('font', **font)

  scale = plot_scale
  fig, ax = plt.subplots(figsize=(10*scale, 8*scale))

  key_values = []
  data_values = []
  for agg_layer in aggregator_list:
    key = agg_layer.get_layer_name()
    values = [observations[iter][key]['training'] for iter in range(n_observations)]
    key_values.append(key)
    data_values.append(values)

  ax.boxplot(data_values)
  ax.set_xticklabels(key_values)
  
  plt.grid()
  plt.xlabel(xlabel)
  plt.ylabel(ylabel)
  plt.title("{} [N={}, n={}]".format(title,n_observations,n_train))
  plt.tight_layout()
  added_operations(plt)
  plt.show()

# Method to plot and analyse model evaluation times as a function of set cardinality
def plot_aggregate_testing_times(xlabel='Set Cardinality', ylabel='', title='', ylim=None, added_operations=lambda plt: None):
  result_set = timesheet_filename()
  observations = np.load(result_set, allow_pickle=True)
  n_observations = len(observations)
  
  font = {'size': plot_text}
  matplotlib.rc('font', **font)
  scale = plot_scale
  plt.figure(figsize=(10*scale, 8*scale))

  y_max = -math.inf
  for agg_layer in aggregator_list:
    layer_name = agg_layer.get_layer_name()
    color = get_hyperparameter('aggregation',layer_name,'color')
    point_style = "{}o".format(color)
    line_style = "{}-".format(color)
    x = np.array([list(observations[iter][layer_name]['testing'].keys()) for iter in range(n_observations)]).flatten()
    y = np.array([list(observations[iter][layer_name]['testing'].values()) for iter in range(n_observations)]).flatten()
    y_max = max(y_max, np.max(y))
    m, b = np.polyfit(x,y,1)
    yhat = m * x + b

    rho = np.round(np.corrcoef(x.flatten(), y.flatten())[0, 1], 5)
    label = "{} [ρ={}]".format(layer_name, rho)

    plt.plot(x, y, point_style, markersize=0.8)
    plt.plot(x, yhat, line_style, label=label)

  plt.grid()
  plt.xlabel(xlabel)
  plt.ylabel(ylabel)
  plt.title("{} [N={},n={}]".format(title,n_observations,n_test))
  if ylim is not None:
    plt.ylim(*ylim)
  plt.xlim(min_test - step_test, max_test + step_test)
  plt.legend()
  plt.tight_layout()
  added_operations(plt)
  plt.show()

## Long-Term Model Mean Average Error
---

In [None]:
plot_aggregate_metrics('mae', ylabel='MAE', ylim=(0,10), title='Mean Average Error Per Testing Set Cardinality')

## Long-Term Model Root Mean Squared Error
---

In [None]:
plot_aggregate_metrics('rmse', ylabel='RMSE', ylim=(0,10), title='Root Mean Squared Error Per Testing Set Cardinality')

## Long-Term Model Training Periods
---

In [None]:
plot_aggregate_training_times(xlabel='Aggregation Mechanism', ylabel='Training Period (s)', title='Training Period per Aggregation Mechanism')

## Long-Term Model Evaluation Periods
---

In [None]:
plot_aggregate_testing_times(xlabel='Set Cardinality', ylabel='Evaluation Period (s)', title='Evaluation Period per Testing Set Cardinality')