In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
from sklearn.model_selection import train_test_split
from functools import *
import sys
import os

PROJECT_ROOT = os.path.abspath(os.getcwd() + os.sep + os.pardir)
sys.path.insert(0, PROJECT_ROOT)

from lib.time_series_datasets import *
from lib.models import *
from lib.benchmarks import *
import notify2  # ToDo replace notify watch here : https://notify2.readthedocs.io/en/latest/

DATA_ROOT = os.path.join(PROJECT_ROOT, "datasets")
TUNER_ROOT = os.path.join(PROJECT_ROOT, "models")
BENCHMARKS_ROOT = os.path.join(PROJECT_ROOT, "plots", "benchmarks")
WEIGHTS_ROOT = os.path.join(PROJECT_ROOT, "plots", "weights")
TB_ROOT = os.path.join(os.path.abspath(os.sep), "tmp", "tensorboard")

BENCHMARKS_DIR = "benchmarks"
WEIGHTS_DIR = "weights"

print("       Data dir:", DATA_ROOT)
print("      Tuner dir:", TUNER_ROOT)
print("Tensorboard dir:", TB_ROOT)

TUNER = "BayesianOptimization"  # "Hyperband" or "BayesianOptimization"
SKIP = True  # Re benchmark a model if already tested ?

MAX_UNITS = 400
MAX_EPOCHS = 200
GUESSES = 10
PATIENCE = 10

MAX_TRIALS = 100  # BayesianOptimization

#set the following values based on the specific dataset
OUTPUT_ACTIVATION = tf.keras.activations.softmax  # https://www.tensorflow.org/api_docs/python/tf/keras/activations  'softmax'
LOSS_FUNCTION = tf.keras.losses.SparseCategoricalCrossentropy()  # https://www.tensorflow.org/api_docs/python/tf/keras/losses  'sparse_categorical_crossentropy'

if not os.path.exists(TUNER_ROOT):
    os.makedirs(TUNER_ROOT)
if not os.path.exists(TB_ROOT):
    os.makedirs(TB_ROOT)

benchmarks = BenchmarksDB(load_path=os.path.join(BENCHMARKS_ROOT, "benchmarks.json"), plot_path=WEIGHTS_ROOT)

       Data dir: /dati/luca/Uni-Luca/Tesi/tesi/datasets
      Tuner dir: /dati/luca/Uni-Luca/Tesi/tesi/models
Tensorboard dir: /tmp/tensorboard



|        | ArticularyWordRecognition | CharacterTrajectories | Libras | SpokenArabicDigits |
|--------|:-------------------------:|:---------------------:|:------:|:------------------:|
| Input  |             9             |           3           |   2    |         13         |
| Output |            25             |          20           |   15   |         10         |

In [10]:
from keras_tuner import Hyperband, BayesianOptimization


def tune_and_test(build_model_fn, names,
                  train_set, val_set, test_set,
                  tuner_path, tensorboard_path=None,
                  benchmarks_verbose=0):
    dataset_name, class_name, experiment_name, model_name = names
    x_train, y_train = train_set
    x_val, y_val = val_set
    x_test, y_test = test_set
    if TUNER == "Hyperband":
        working_dir = os.path.join(tuner_path, "Hyperband", dataset_name, class_name)
        if not os.path.exists(working_dir):
            os.makedirs(working_dir)

        tuner = Hyperband(
            build_model_fn,
            objective='val_accuracy',
            max_epochs=MAX_EPOCHS,
            hyperband_iterations=1.,
            seed=42,

            directory=working_dir,
            project_name=experiment_name + ' ' + model_name,
            overwrite=False,
        )
    elif TUNER == "BayesianOptimization":
        working_dir = os.path.join(tuner_path, "BayesianOptimization", dataset_name, class_name)
        if not os.path.exists(working_dir):
            os.makedirs(working_dir)

        tuner = BayesianOptimization(
            build_model_fn,
            objective='val_accuracy',
            max_trials=MAX_TRIALS,
            #num_initial_points=2,
            seed=42,

            directory=working_dir,
            project_name=experiment_name + ' ' + model_name,
            overwrite=False,
        )
    else:
        raise ValueError("Tuner var contains a bad value -> {}".format(TUNER))

    tuner.search(x_train, y_train, epochs=MAX_EPOCHS, validation_data=(x_val, y_val),
                 callbacks=[
                     keras.callbacks.EarlyStopping(monitor='val_loss', patience=PATIENCE)
                 ])

    # choose the best hyperparameters  # tf.keras.callbacks.CallbackList([])
    callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)]
    if tensorboard_path is not None:
        tensorboard_dir = tensorboard_path + model_name
        callbacks.append(keras.callbacks.TensorBoard(tensorboard_dir, profile_batch='500,500'))

    print("Start {} benchmarks for {} | {} | {} | {}:".format(GUESSES, dataset_name, class_name, experiment_name,
                                                              model_name))
    best_model_hp = tuner.get_best_hyperparameters()[0]
    test_model = None

    metrics_ts = []
    loss_ts = []
    required_time = []

    tf.random.set_seed(42)
    for i in range(GUESSES):
        initial_time = time()

        test_model = tuner.hypermodel.build(best_model_hp)
        test_model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=MAX_EPOCHS,
                       callbacks=callbacks, verbose=benchmarks_verbose)
        loss, metrics = test_model.evaluate(x_test, y_test)

        required_time.append(time() - initial_time)
        metrics_ts.append(metrics)
        loss_ts.append(loss)

    stat = Statistic(best_model_hp, metrics_ts, loss_ts, required_time)

    return test_model, stat


notify2_init = False


def send_notification(title, message):
    def notify2_init_fun():
        global notify2_init
        if not notify2_init:
            notify2.init("Tesi")

    notify2_init_fun()

    notice = notify2.Notification(title, message)
    notice.show()


In [20]:
# This cell is a little nightmare

class HP:
    FREE = 0
    FIXED = 1
    RESTRICTED = 2

    def __init__(self, hp_type, value=None):
        self.value = value
        self.type = hp_type

    @classmethod
    def free(cls):
        return cls(HP.FREE)

    @classmethod
    def fixed(cls, value):
        return cls(HP.FIXED, value)

    @classmethod
    def restricted(cls, value=None):
        return cls(HP.RESTRICTED, value)


def get_int(tuner, name, hp, min_value, max_value, step=1, sampling=None, pn=None, pv=None):
    if hp.type == HP.FREE or hp.type == HP.RESTRICTED:
        tmp = tuner.Int(name, min_value=min_value, max_value=max_value, step=step, sampling=sampling, parent_name=pn,
                        parent_values=pv)
    elif hp.type == HP.FIXED:
        tmp = tuner.Fixed(name, hp.value)
    else:
        raise ValueError("HP type not found")
    return tmp


def get_float(tuner, name, hp, min_value, max_value, step=None, sampling=None, pn=None, pv=None):
    if hp.type == HP.FREE or hp.type == HP.RESTRICTED:
        tmp = tuner.Float(name, min_value=min_value, max_value=max_value, step=step, sampling=sampling, parent_name=pn,
                          parent_values=pv)
    elif hp.type == HP.FIXED:
        tmp = tuner.Fixed(name, hp.value)
    else:
        raise ValueError("HP type not found")
    return tmp


def get_bool(tuner, name, hp):
    if hp.type == HP.FREE or hp.type == HP.RESTRICTED:
        tmp = tuner.Boolean(name)
    elif hp.type == HP.FIXED:
        tmp = tuner.Fixed(name, hp.value)
    else:
        raise ValueError("HP type not found")
    return tmp


def get_float_vec(tuner, name, hp, length, min_value, max_value, step=None, sampling=None, pn=None, pv=None):
    if hp.type == HP.FREE:
        tmp = [tuner.Float(name + ' ' + str(i), min_value=min_value, max_value=max_value, sampling=sampling,
                           parent_name=pn, parent_values=pv)
               for i in range(length)]
    elif hp.type == HP.FIXED:
        tmp = [tuner.Fixed(name + ' ' + str(i), hp[i]) for i in range(length)]
    elif hp.type == HP.RESTRICTED:
        tmp2 = tuner.Float(name + ' 0', min_value=min_value, max_value=max_value, sampling=sampling, parent_name=pn,
                           parent_values=pv)
        tmp = [tmp2 for _ in range(length)]
    else:
        raise ValueError("HP type not found")
    return tmp


def get_connectivity(tuner, hp, length):  # It is a squared matrix
    if hp.type == HP.FREE:
        conn_matrix = [[tuner.Float('connectivity ' + str(i), min_value=0., max_value=1.) if i == j else
                        tuner.Float('connectivity ' + str(i) + '->' + str(j), min_value=0., max_value=1.)
                        for i in range(length)]
                       for j in range(length)]
    elif hp.type == HP.FIXED:
        diagonal, off_diagonal = hp.value
        off_diagonal = tuner.Fixed('connectivity X->Y', off_diagonal)
        conn_matrix = [[tuner.Fixed('connectivity ' + str(i), diagonal) if i == j else
                        off_diagonal
                        for i in range(length)]
                       for j in range(length)]
    elif hp.type == HP.RESTRICTED:
        if hp.value is None:
            connectivity = [tuner.Float('connectivity ' + str(i), min_value=0., max_value=1.) for i in range(length)]
        else:
            tmp = tuner.Fixed('connectivity', hp.value)
            connectivity = [ tmp for _ in range(length)]
        intra_connectivity = tuner.Float('connectivity X->Y', min_value=0., max_value=1.)
        conn_matrix = [[connectivity[i] if i == j else intra_connectivity for i in range(length)] for j in range(length)]
    else:
        raise ValueError("HP type not found")
    return conn_matrix


def get_spectral_radius(tuner, hp, length):
    max_value = 2.0
    if hp.type == HP.FREE:
        off_diagonal = tuner.Float('spectral radius X->Y', min_value=0.0, max_value=max_value, step=0.1)
        sr_matrix = \
            [[tuner.Float('spectral radius ' + str(i), min_value=0.0, max_value=max_value, step=0.1) if i == j else
             off_diagonal
             for i in range(length)]
            for j in range(length)]
    elif hp.type == HP.FIXED:
        diagonal, off_diagonal = hp.value
        sr_matrix = [[tuner.Fixed('spectral radius ' + str(i), diagonal) if i == j else
                      tuner.Fixed('spectral radius ' + str(i) + '->' + str(j), off_diagonal)
                      for i in range(length)]
                     for j in range(length)]
    elif hp.type == HP.RESTRICTED:
        connectivity = tuner.Float('spectral radius', min_value=0.0, max_value=max_value, step=0.1)
        intra_connectivity = tuner.Float('spectral radius X->Y', min_value=0.0, max_value=max_value, step=0.1)
        sr_matrix = [[connectivity if i == j else intra_connectivity for i in range(length)] for j in range(length)]
    else:
        raise ValueError("HP type not found")
    return sr_matrix


def get_gsr(tuner, gsr):
    global_sr = get_bool(tuner, 'use G.S.R', gsr)
    if global_sr:
        global_sr = tuner.Float('G.S.R', min_value=0.01, max_value=2., sampling='log', parent_name='use G.S.R', parent_values=True)
    else:
        global_sr = None #  tuner.Fixed('G.S.R', False, parent_name='use G.S.R', parent_values=False)

    return global_sr


In [21]:
def build_ESN1(output, _reservoirs,  # Defined by dataset
               units, spectral_radius, _gsr, connectivity, input_scaling, bias_scaling, leaky, learning_rate,
               # Defined by experiment
               tuner) -> ESN1:
    tmp_model = ESN1(units=get_int(tuner, 'units', units, 50, MAX_UNITS),
                     connectivity=get_float(tuner, 'connectivity 0', connectivity, 0.001, 1.),
                     spectral_radius=get_float(tuner, 'spectral radius', spectral_radius, min_value=0.001, max_value=2.,
                                               sampling='log'),
                     output_units=output,
                     output_activation=OUTPUT_ACTIVATION,
                     input_scaling=get_float(tuner, 'input scaling', input_scaling, 0.1, 1.5, step=0.1),
                     bias_scaling=get_float(tuner, 'bias scaling', bias_scaling, min_value=0.1, max_value=1.5,
                                            step=0.1),
                     leaky=get_float(tuner, 'leaky', leaky, min_value=0.1, max_value=1, step=0.1),
                     )

    alpha = get_float(tuner, 'learning rate', learning_rate, min_value=1e-5, max_value=1e-1, sampling='log')
    tmp_model.compile(
        optimizer=keras.optimizers.RMSprop(alpha),
        loss=LOSS_FUNCTION,
        metrics=['accuracy'])
    return tmp_model


def build_ESN2(output, reservoirs,  # Defined by dataset
               units, spectral_radius, gsr, connectivity, input_scaling, bias_scaling, leaky, learning_rate,
               # Defined by experiment
               tuner) -> ESN2:
    tmp_model = ESN2(units=get_int(tuner, 'units', units, 50, MAX_UNITS),
                     sub_reservoirs=reservoirs,
                     connectivity=get_float_vec(tuner, 'connectivity', connectivity, reservoirs, min_value=0.0,
                                                max_value=1.),
                     spectral_radius=get_float_vec(tuner, 'spectral radius', spectral_radius, reservoirs,
                                                   min_value=0.01, max_value=2., sampling='log'),
                     gsr=get_gsr(tuner, gsr),
                     output_units=output,
                     output_activation=OUTPUT_ACTIVATION,
                     input_scaling=get_float_vec(tuner, 'input scaling', input_scaling, reservoirs, min_value=0.1,
                                                 max_value=1.5, step=0.1),
                     bias_scaling=get_float_vec(tuner, 'bias scaling', bias_scaling, reservoirs, min_value=0.1,
                                                max_value=1.5, step=0.1),
                     leaky=get_float(tuner, 'leaky', leaky, min_value=0.1, max_value=1, step=0.1),
                     )

    alpha = get_float(tuner, 'learning rate', learning_rate, min_value=1e-5, max_value=1e-1, sampling='log')
    tmp_model.compile(
        optimizer=keras.optimizers.RMSprop(alpha),
        loss=LOSS_FUNCTION,
        metrics=['accuracy'])
    return tmp_model


def build_ESN3(output, reservoirs,  # Defined by dataset
               units, spectral_radius, gsr, connectivity, input_scaling, bias_scaling, leaky, learning_rate,
               # Defined by experiment
               tuner) -> ESN3:
    tmp_model = ESN3(units=get_int(tuner, 'units', units, 50, MAX_UNITS),
                     sub_reservoirs=reservoirs,
                     connectivity=get_connectivity(tuner, connectivity, reservoirs),
                     spectral_radius=get_spectral_radius(tuner, spectral_radius, reservoirs),
                     gsr=get_gsr(tuner, gsr),
                     output_units=output,
                     output_activation=OUTPUT_ACTIVATION,
                     input_scaling=get_float_vec(tuner, 'input scaling', input_scaling, reservoirs, min_value=0.1,
                                                 max_value=1.5, step=0.1),
                     bias_scaling=get_float_vec(tuner, 'bias scaling', bias_scaling, reservoirs, min_value=0.1,
                                                max_value=1.5, step=0.1),
                     leaky=get_float(tuner, 'leaky', leaky, min_value=0.1, max_value=1, step=0.1),
                     )

    alpha = get_float(tuner, 'learning rate', learning_rate, min_value=1e-5, max_value=1e-1, sampling='log')
    tmp_model.compile(
        optimizer=keras.optimizers.RMSprop(alpha),
        loss=LOSS_FUNCTION,
        metrics=['accuracy'])
    return tmp_model


def build_ESN4(output, reservoirs,  # Defined by dataset
               units, spectral_radius, gsr, connectivity, input_scaling, bias_scaling, leaky, learning_rate,
               # Defined by experiment
               tuner) -> ESN4:
    partitions = [tuner.Float('partition ' + str(i), min_value=0., max_value=1.0) for i in range(reservoirs)]
    total = sum(partitions)
    # Normalize the partition vector now sum(partitions) == 1.
    partitions = list(map(lambda _x: 0 if total == 0 else _x / total, partitions))

    tmp_model = ESN4(units=get_int(tuner, 'units', units, 50, MAX_UNITS),
                     sub_reservoirs=reservoirs,
                     connectivity=get_connectivity(tuner, connectivity, reservoirs),
                     partitions=partitions,
                     spectral_radius=get_spectral_radius(tuner, spectral_radius, reservoirs),
                     gsr=get_gsr(tuner, gsr),
                     output_units=output,
                     output_activation=OUTPUT_ACTIVATION,
                     input_scaling=get_float_vec(tuner, 'input scaling', input_scaling, reservoirs, min_value=0.1,
                                                 max_value=1.5, step=0.1),
                     bias_scaling=get_float_vec(tuner, 'bias scaling', bias_scaling, reservoirs, min_value=0.1,
                                                max_value=1.5, step=0.1),
                     leaky=get_float(tuner, 'leaky', leaky, min_value=0.1, max_value=1, step=0.1),
                     )

    alpha = get_float(tuner, 'learning rate', learning_rate, min_value=1e-5, max_value=1e-1, sampling='log')
    tmp_model.compile(
        optimizer=keras.optimizers.RMSprop(alpha),
        loss=LOSS_FUNCTION,
        metrics=['accuracy'])
    return tmp_model

# Experiments configs:

In [13]:
config = {
    'Datasets': ["ArticularyWordRecognition"],# "CharacterTrajectories", "Libras", "SpokenArabicDigits", "Epilepsy", "JapaneseVowels"],
    'Classes': ["Best Models"],
    'Models': [build_ESN1, build_ESN2, build_ESN3, build_ESN4],
    #                Units         | Spectral radius| G.S.R         | Connectivity     | Input scaling  | Bias scaling   | leaky    | learning rate
    #                F             | F-R            | F             | F - R            | F-R            | F-R            | F        | F
    'Best Models': {
        'Best':      (HP.free(),     HP.free(),       HP.free(),      HP.restricted(),   HP.free(),       HP.free(),       HP.free(), HP.free())
    },
    'N SR': {  # Modello con N sub res ora ha 7 + N ( N diag sr + 1 off diag st, 1 g.s.r., 1 offdiag conn, 1 input, 1 bias, 1 leaky, 1 learning)
         'Units 50': (HP.fixed(50),  HP.free(),       HP.free(),      HP.restricted(1.), HP.restricted(), HP.restricted(), HP.free(), HP.free()),
         'Units 75': (HP.fixed(75),  HP.free(),       HP.free(),      HP.restricted(1.), HP.restricted(), HP.restricted(), HP.free(), HP.free()),
        'Units 112': (HP.fixed(122), HP.free(),       HP.free(),      HP.restricted(1.), HP.restricted(), HP.restricted(), HP.free(), HP.free()),
        'Units 168': (HP.fixed(168), HP.free(),       HP.free(),      HP.restricted(1.), HP.restricted(), HP.restricted(), HP.free(), HP.free()),
        'Units 253': (HP.fixed(253), HP.free(),       HP.free(),      HP.restricted(1.), HP.restricted(), HP.restricted(), HP.free(), HP.free()),
        'Units 379': (HP.fixed(379), HP.free(),       HP.free(),      HP.restricted(1.), HP.restricted(), HP.restricted(), HP.free(), HP.free()),
    },
    'Single SR': {
         'Units 50': (HP.fixed(50),  HP.restricted(), HP.free(),      HP.restricted(1.), HP.restricted(), HP.restricted(), HP.free(), HP.free()),
    #     'Units 75': (HP.fixed(75),  HP.restricted(), HP.free(),      HP.restricted(1.), HP.restricted(), HP.restricted(), HP.free(), HP.free()),
    #    'Units 100': (HP.fixed(100), HP.restricted(), HP.free(),      HP.restricted(1.), HP.restricted(), HP.restricted(), HP.free(), HP.free()),
    #    'Units 150': (HP.fixed(150), HP.restricted(), HP.free(),      HP.restricted(1.), HP.restricted(), HP.restricted(), HP.free(), HP.free()),
    #    'Units 200': (HP.fixed(200), HP.restricted(), HP.free(),      HP.restricted(1.), HP.restricted(), HP.restricted(), HP.free(), HP.free()),
    }
}

### Compute function

In [22]:
datasets = config.get('Datasets')
classes = config.get('Classes')
models_fn = config.get('Models')
#benchmarks = BenchmarksDB()

start_learning = time()
for dataset in datasets:
    train_path = os.path.join(DATA_ROOT, dataset, dataset + '_TRAIN.ts')
    test_path = os.path.join(DATA_ROOT, dataset, dataset + '_TEST.ts')

    x_train_all, y_train_all = load_sktime_dataset(train_path)
    x_test, y_test = load_sktime_dataset(test_path)

    x_train, x_val, y_train, y_val = train_test_split(x_train_all, y_train_all,
                                                      test_size=0.33, random_state=42, stratify=y_train_all)

    train_set = (x_train.astype(np.float64), y_train.astype(np.float64))  # Todo is this cast necessary?
    val_set = (x_val.astype(np.float64), y_val.astype(np.float64))
    test_set = (x_test.astype(np.float64), y_test.astype(np.float64))

    features = x_train.shape[-1]
    output_units = len(np.unique(y_test))  # Dataset must have one of each features

    for class_name in classes:
        for experiment, params in config.get(class_name).items():
            for model_fn in models_fn:
                model_name = model_fn.__annotations__['return'].__name__
                if benchmarks.is_benchmarked(dataset, class_name, experiment, model_name) and True:
                    continue
                build_fn = partial(model_fn, output_units, features, *params)
                names = (dataset, class_name, experiment, model_name)
                model, stat = tune_and_test(build_fn, names,
                                            train_set, val_set, test_set,
                                            tuner_path=TUNER_ROOT)
                benchmarks.add(dataset, class_name, experiment, model_name, stat)
                model.plot(names, path=WEIGHTS_ROOT, show=False)
                #send_notification(experiment + " " + model_name, dataset + " Accuracy " + stat.get_accuracy_str())

print("Total learning time:" + str(time() - start_learning))
send_notification("All Done", "Requested time:" + str(time() - start_learning))

  warn(


INFO:tensorflow:Reloading Oracle from existing project /dati/luca/Uni-Luca/Tesi/tesi/models/BayesianOptimization/ArticularyWordRecognition/Best Models/Best ESN3/oracle.json

Search: Running Trial #2

Hyperparameter    |Value             |Best Value So Far 
units             |274               |?                 
connectivity 0    |0.038552          |?                 
connectivity 1    |0.40854           |?                 
connectivity 2    |0.27188           |?                 
connectivity 3    |0.88827           |?                 
connectivity 4    |0.35185           |?                 
connectivity 5    |0.54816           |?                 
connectivity 6    |0.066884          |?                 
connectivity 7    |0.49754           |?                 
connectivity 8    |0.24352           |?                 
connectivity X->Y |0.97835           |?                 
spectral radius...|1.2               |?                 
spectral radius 0 |1.8               |?                 
sp

InvalidArgumentError: Exception encountered when calling layer "sequential" (type Sequential).

Input matrix must be square. [Op:Eig]

Call arguments received:
  • inputs=tf.Tensor(shape=(184, 144, 9), dtype=float64)
  • training=None
  • mask=None

In [None]:
for d, c, e, m, stat in benchmarks:
    #  ret += "Hyperparameters:\n"
    print(d, c, e, m)
    print(stat.get_accuracy_str())
    for key, val in stat.hyperparameters.values.items():
        print("{}: {}".format(key, val))
    print("######################")