Packages dependencies

In [11]:
!pip3 install -U -q git+https://github.com/fchollet/keras.git
!pip3 install -U -q git+https://www.github.com/keras-team/keras-contrib.git
!pip3 install -U -q git+https://github.com/hadim/keras-toolbox.git
!pip3 install -U -q git+https://github.com/DottD/convnets-keras.git
!pip3 install -U -q git+https://github.com/python-telegram-bot/python-telegram-bot.git
print('Depedencies successfully installed')

Depedencies successfully installed


Import package dependencies

In [2]:
import os
import argparse
import numpy as np
import tensorflow as tf
import keras
from keras import backend as K
from keras.models import Model, load_model
from keras.layers import Input, Dense, ELU, Dropout, BatchNormalization
from keras.losses import binary_crossentropy
from keras.metrics import top_k_categorical_accuracy
from keras.regularizers import l2
from keras.optimizers import Adam
from keras.callbacks import Callback, ProgbarLogger, TerminateOnNaN, ModelCheckpoint
from keras.callbacks import LearningRateScheduler, LambdaCallback, ReduceLROnPlateau, EarlyStopping
from keras.utils import HDF5Matrix
from keras.applications import ResNet50
from convnetskeras2.alexnet import AlexNet 
from kerastoolbox.callbacks import TelegramMonitor, PrintMonitor
from h5datagen import H5DataGen
import h5py
from io import StringIO
from time import time
from datetime import datetime
from tensorboard_logging import Logger

###Check GPU availability

In [3]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print('GPU device not found')
else:
  print('Found GPU at: {}'.format(device_name))

GPU device not found


## Set up parameters

In [5]:
#@title Parameters { run: "auto", display-mode: "form" }
img_shape = 128 #@param {type:"integer"}
batch_size = 64 #@param {type:"integer"}
N = 1000 #@param {type:"integer"}
V = 100 #@param {type:"integer"}
db_path = "/Users/MacD/Databases/database.h5" #@param {type:"string"}
db_path = os.path.abspath(os.path.normpath(db_path))
filename = "/Users/MacD/Databases/logits.h5" #@param {type:"string"}
filename = os.path.abspath(os.path.normpath(filename))
architecture = "alexnet" #@param ["alexnet", "resnet"]

## Set up some monitors

In [6]:
monitors = [
  #TelegramMonitor(api_token="546794449:AAGzmfH9Oa6277Vsl2T9hRrGnNHHSpEMsd8", chat_id="41795159", plot_history=1),
  PrintMonitor()]
def printmsg(*args):
  output = StringIO()
  print(*args, file=output, end='')
  for monitor in monitors:
    monitor.notify(message=output.getvalue())
  output.close()

## Save logits from AlexNet
Load a pretrained AlexNet (without the last layer), compute the logits from the last used layer and save them to an .h5 file.

In [8]:
# Read and augment data
if architecture == 'resnet':
  target_size = (200,200)
  K.set_image_data_format('channels_last')
elif architecture == 'alexnet':
  target_size = (227,227)
  K.set_image_data_format('channels_first')
else:
  raise ValueError("Architecture type not available")
def preproc_fn(x):
  x -= np.mean(x, keepdims=True)
  x /= (np.std(x, keepdims=True) + 1E-7)
  x = .5 * (np.tanh(.5 * x) + 1) # this is a sigmoid
  return x
datagen = H5DataGen(
  width_shift_range=0.2,
  height_shift_range=0.2,
  rotation_range = 50, # degrees (int)
  shear_range = 20.*np.pi/180., # radians (float)
  zoom_range = 0.1,
  fill_mode = 'constant',
  cval = 0,
  horizontal_flip = True,
  preprocessing_function = preproc_fn)
idg_args = {
  'target_size': target_size,
  'color_mode':'rgb',
  'class_mode': None,
  'batch_size': batch_size,
  'shuffle': True
}
data_provider = datagen.flow_from_h5file(db_path, **idg_args)
num_classes = data_provider.num_classes
printmsg("Found", data_provider.tot_samples, "images belonging to", data_provider.num_classes, "classes.")
# Build network
if architecture == 'resnet':
  model = ResNet50(include_top=False,
    weights='imagenet',
    input_shape=(target_size[0],target_size[1],3),
    pooling='avg')
elif architecture == 'alexnet':
  model = AlexNet(include_top=False,
        weights='imagenet',
        input_shape=(3,target_size[0],target_size[1]),
        trainable=False)
else:
  raise ValueError("Architecture type not available")
model.compile(optimizer='rmsprop', loss='mse')
n_logits = model.output_shape[1]
printmsg("Network created -", n_logits, "logits")
# Start writing the databases
read_mode = 'a'
try:
  with h5py.File(filename, read_mode) as f:
    printmsg("Output file", filename, "opened")
    def db_append(name, rep):
      if not name in f.keys():
        printmsg("The", name, "dataset will be created")
        db = f.create_dataset(name=name, 
          shape=(rep*num_classes, n_logits), dtype='f',
          maxshape=(None, n_logits),
          compression="gzip",
          compression_opts=9)
        db.attrs['num_classes'] = num_classes
        db.attrs['repetitions'] = rep
        prev_rep = 0
        printmsg("Created dataset", name, "("+str(rep)+" repetitions)")
      else:
        db = f[name]
        if (not 'num_classes' in db.attrs) or db.attrs['num_classes'] != num_classes:
          raise ValueError("The dataset lacks 'num_classes' or it differs from input folder traits")
        if (not 'repetitions' in db.attrs):
          raise ValueError("The dataset lacks 'repetitions'")
        prev_rep = db.attrs['repetitions']
        db.attrs['repetitions'] += rep
        db.resize((db.shape[0]+rep*num_classes, db.shape[1]))
        printmsg("Appending logits for", rep, "repetitions of", num_classes, 
                 "classes to dataset", name, "(total "+str(db.attrs['repetitions'])+" repetitions)")

      for k in range(prev_rep, db.attrs['repetitions']):        
        begin = k * num_classes # inclusive
        end = (k+1) * num_classes # exclusive
        time_start = time()
        prediction = model.predict_generator(data_provider)
        time_end = time()
        db[begin:end, :] = prediction
        printmsg("Step", k+1, "/", db.attrs['repetitions'], "- slice", str(begin)+':'+str(end),
                 "DONE in {} seconds".format(time_end-time_start))

    db_append('training', N)
    db_append('validation', V)
except (Exception, KeyboardInterrupt) as error:
  print("An error occurred!!", str(error))
  to_be_removed = bool(input('The database is no more valid, shall I remove it? [y]/n')!='n')
  if to_be_removed:
    os.remove(filename)
    print("Database {} removed".format(filename))
  else:
    print("Database {} not removed".format(filename))

Found 2483632 images belonging to 177 classes.
Network created - 4096 logits
Output file /Users/MacD/Databases/logits.h5 opened
The training dataset will be created
Created dataset training (1000 repetitions)
Step 1 / 1000 - slice 0:177 DONE in 12.617342233657837 seconds
Step 2 / 1000 - slice 177:354 DONE in 11.318016767501831 seconds
Step 3 / 1000 - slice 354:531 DONE in 11.682682991027832 seconds
Step 4 / 1000 - slice 531:708 DONE in 12.015029907226562 seconds
Step 5 / 1000 - slice 708:885 DONE in 12.08167028427124 seconds
Step 6 / 1000 - slice 885:1062 DONE in 12.818318843841553 seconds
Step 7 / 1000 - slice 1062:1239 DONE in 13.531311988830566 seconds
Step 8 / 1000 - slice 1239:1416 DONE in 13.710819005966187 seconds
Step 9 / 1000 - slice 1416:1593 DONE in 10.980029106140137 seconds
Step 10 / 1000 - slice 1593:1770 DONE in 11.032562732696533 seconds
Step 11 / 1000 - slice 1770:1947 DONE in 10.944623947143555 seconds
Step 12 / 1000 - slice 1947:2124 DONE in 11.125774145126343 second

Step 118 / 1000 - slice 20709:20886 DONE in 16.666855812072754 seconds
Step 119 / 1000 - slice 20886:21063 DONE in 16.041518688201904 seconds
Step 120 / 1000 - slice 21063:21240 DONE in 16.38516402244568 seconds
Step 121 / 1000 - slice 21240:21417 DONE in 16.905516147613525 seconds
Step 122 / 1000 - slice 21417:21594 DONE in 16.392373085021973 seconds
Step 123 / 1000 - slice 21594:21771 DONE in 16.254162073135376 seconds
Step 124 / 1000 - slice 21771:21948 DONE in 16.37112283706665 seconds
Step 125 / 1000 - slice 21948:22125 DONE in 16.368615865707397 seconds
Step 126 / 1000 - slice 22125:22302 DONE in 16.53879189491272 seconds
Step 127 / 1000 - slice 22302:22479 DONE in 16.18482208251953 seconds
Step 128 / 1000 - slice 22479:22656 DONE in 16.326942205429077 seconds
Step 129 / 1000 - slice 22656:22833 DONE in 16.68998908996582 seconds
Step 130 / 1000 - slice 22833:23010 DONE in 17.2572979927063 seconds
Step 131 / 1000 - slice 23010:23187 DONE in 16.22209882736206 seconds
Step 132 / 100

Step 234 / 1000 - slice 41241:41418 DONE in 16.16317105293274 seconds
Step 235 / 1000 - slice 41418:41595 DONE in 16.83797287940979 seconds
Step 236 / 1000 - slice 41595:41772 DONE in 16.031065940856934 seconds
Step 237 / 1000 - slice 41772:41949 DONE in 16.576483011245728 seconds
Step 238 / 1000 - slice 41949:42126 DONE in 16.404372930526733 seconds
Step 239 / 1000 - slice 42126:42303 DONE in 16.510457038879395 seconds
Step 240 / 1000 - slice 42303:42480 DONE in 16.37769865989685 seconds
Step 241 / 1000 - slice 42480:42657 DONE in 16.394344329833984 seconds
Step 242 / 1000 - slice 42657:42834 DONE in 17.637861013412476 seconds
Step 243 / 1000 - slice 42834:43011 DONE in 16.319549083709717 seconds
Step 244 / 1000 - slice 43011:43188 DONE in 16.005059957504272 seconds
Step 245 / 1000 - slice 43188:43365 DONE in 16.652379989624023 seconds
Step 246 / 1000 - slice 43365:43542 DONE in 18.34857487678528 seconds
Step 247 / 1000 - slice 43542:43719 DONE in 17.06564497947693 seconds
Step 248 / 

Step 350 / 1000 - slice 61773:61950 DONE in 11.506618738174438 seconds
Step 351 / 1000 - slice 61950:62127 DONE in 11.491338014602661 seconds
Step 352 / 1000 - slice 62127:62304 DONE in 11.135302782058716 seconds
Step 353 / 1000 - slice 62304:62481 DONE in 11.176668882369995 seconds
Step 354 / 1000 - slice 62481:62658 DONE in 11.197463035583496 seconds
Step 355 / 1000 - slice 62658:62835 DONE in 11.159059047698975 seconds
Step 356 / 1000 - slice 62835:63012 DONE in 11.18499207496643 seconds
Step 357 / 1000 - slice 63012:63189 DONE in 11.150430917739868 seconds
Step 358 / 1000 - slice 63189:63366 DONE in 11.087057828903198 seconds
Step 359 / 1000 - slice 63366:63543 DONE in 11.11498498916626 seconds
Step 360 / 1000 - slice 63543:63720 DONE in 11.129112005233765 seconds
Step 361 / 1000 - slice 63720:63897 DONE in 13.060790777206421 seconds
Step 362 / 1000 - slice 63897:64074 DONE in 11.180936813354492 seconds
Step 363 / 1000 - slice 64074:64251 DONE in 11.112380981445312 seconds
Step 364

Step 466 / 1000 - slice 82305:82482 DONE in 19.703739166259766 seconds
Step 467 / 1000 - slice 82482:82659 DONE in 13.292970895767212 seconds
Step 468 / 1000 - slice 82659:82836 DONE in 13.57598090171814 seconds
Step 469 / 1000 - slice 82836:83013 DONE in 12.759927988052368 seconds
Step 470 / 1000 - slice 83013:83190 DONE in 11.158961057662964 seconds
Step 471 / 1000 - slice 83190:83367 DONE in 11.031543016433716 seconds
Step 472 / 1000 - slice 83367:83544 DONE in 11.237030982971191 seconds
Step 473 / 1000 - slice 83544:83721 DONE in 10.94584584236145 seconds
Step 474 / 1000 - slice 83721:83898 DONE in 10.938810110092163 seconds
Step 475 / 1000 - slice 83898:84075 DONE in 10.935195922851562 seconds
Step 476 / 1000 - slice 84075:84252 DONE in 10.909494161605835 seconds
Step 477 / 1000 - slice 84252:84429 DONE in 11.349190950393677 seconds
Step 478 / 1000 - slice 84429:84606 DONE in 10.946097135543823 seconds
Step 479 / 1000 - slice 84606:84783 DONE in 10.95789098739624 seconds
Step 480 

Step 582 / 1000 - slice 102837:103014 DONE in 11.298645973205566 seconds
Step 583 / 1000 - slice 103014:103191 DONE in 12.458064794540405 seconds
Step 584 / 1000 - slice 103191:103368 DONE in 12.254891872406006 seconds
Step 585 / 1000 - slice 103368:103545 DONE in 12.81511902809143 seconds
Step 586 / 1000 - slice 103545:103722 DONE in 11.999859809875488 seconds
Step 587 / 1000 - slice 103722:103899 DONE in 13.078321933746338 seconds
Step 588 / 1000 - slice 103899:104076 DONE in 11.160415887832642 seconds
Step 589 / 1000 - slice 104076:104253 DONE in 10.93601393699646 seconds
Step 590 / 1000 - slice 104253:104430 DONE in 11.105652093887329 seconds
Step 591 / 1000 - slice 104430:104607 DONE in 11.078790187835693 seconds
Step 592 / 1000 - slice 104607:104784 DONE in 10.814877986907959 seconds
Step 593 / 1000 - slice 104784:104961 DONE in 13.698939323425293 seconds
Step 594 / 1000 - slice 104961:105138 DONE in 12.422838926315308 seconds
Step 595 / 1000 - slice 105138:105315 DONE in 12.6509

Step 695 / 1000 - slice 122838:123015 DONE in 12.269747018814087 seconds
Step 696 / 1000 - slice 123015:123192 DONE in 12.167454957962036 seconds
Step 697 / 1000 - slice 123192:123369 DONE in 12.245534896850586 seconds
Step 698 / 1000 - slice 123369:123546 DONE in 12.180423974990845 seconds
Step 699 / 1000 - slice 123546:123723 DONE in 12.155263900756836 seconds
Step 700 / 1000 - slice 123723:123900 DONE in 12.27121615409851 seconds
Step 701 / 1000 - slice 123900:124077 DONE in 12.205442905426025 seconds
Step 702 / 1000 - slice 124077:124254 DONE in 12.271299123764038 seconds
Step 703 / 1000 - slice 124254:124431 DONE in 12.439842224121094 seconds
Step 704 / 1000 - slice 124431:124608 DONE in 12.148783206939697 seconds
Step 705 / 1000 - slice 124608:124785 DONE in 12.262766122817993 seconds
Step 706 / 1000 - slice 124785:124962 DONE in 12.436716079711914 seconds
Step 707 / 1000 - slice 124962:125139 DONE in 12.311320781707764 seconds
Step 708 / 1000 - slice 125139:125316 DONE in 12.212

Step 808 / 1000 - slice 142839:143016 DONE in 13.107497930526733 seconds
Step 809 / 1000 - slice 143016:143193 DONE in 12.9666268825531 seconds
Step 810 / 1000 - slice 143193:143370 DONE in 12.920257091522217 seconds
Step 811 / 1000 - slice 143370:143547 DONE in 13.078426122665405 seconds
Step 812 / 1000 - slice 143547:143724 DONE in 12.130358219146729 seconds
Step 813 / 1000 - slice 143724:143901 DONE in 12.036061763763428 seconds
Step 814 / 1000 - slice 143901:144078 DONE in 13.739752054214478 seconds
Step 815 / 1000 - slice 144078:144255 DONE in 12.156924962997437 seconds
Step 816 / 1000 - slice 144255:144432 DONE in 13.025922060012817 seconds
Step 817 / 1000 - slice 144432:144609 DONE in 12.866425037384033 seconds
Step 818 / 1000 - slice 144609:144786 DONE in 14.123568058013916 seconds
Step 819 / 1000 - slice 144786:144963 DONE in 12.244639158248901 seconds
Step 820 / 1000 - slice 144963:145140 DONE in 11.861634969711304 seconds
Step 821 / 1000 - slice 145140:145317 DONE in 11.9207

Step 921 / 1000 - slice 162840:163017 DONE in 10.981138944625854 seconds
Step 922 / 1000 - slice 163017:163194 DONE in 10.864591121673584 seconds
Step 923 / 1000 - slice 163194:163371 DONE in 10.90548300743103 seconds
Step 924 / 1000 - slice 163371:163548 DONE in 11.786193132400513 seconds
Step 925 / 1000 - slice 163548:163725 DONE in 10.978208065032959 seconds
Step 926 / 1000 - slice 163725:163902 DONE in 10.829850196838379 seconds
Step 927 / 1000 - slice 163902:164079 DONE in 10.944430828094482 seconds
Step 928 / 1000 - slice 164079:164256 DONE in 10.877121686935425 seconds
Step 929 / 1000 - slice 164256:164433 DONE in 10.951154947280884 seconds
Step 930 / 1000 - slice 164433:164610 DONE in 10.970303297042847 seconds
Step 931 / 1000 - slice 164610:164787 DONE in 10.936965942382812 seconds
Step 932 / 1000 - slice 164787:164964 DONE in 10.934694290161133 seconds
Step 933 / 1000 - slice 164964:165141 DONE in 11.129607915878296 seconds
Step 934 / 1000 - slice 165141:165318 DONE in 11.259

Step 36 / 100 - slice 6195:6372 DONE in 11.866087913513184 seconds
Step 37 / 100 - slice 6372:6549 DONE in 11.907933235168457 seconds
Step 38 / 100 - slice 6549:6726 DONE in 11.898898124694824 seconds
Step 39 / 100 - slice 6726:6903 DONE in 11.97248888015747 seconds
Step 40 / 100 - slice 6903:7080 DONE in 12.029602766036987 seconds
Step 41 / 100 - slice 7080:7257 DONE in 12.439888954162598 seconds
Step 42 / 100 - slice 7257:7434 DONE in 12.027023792266846 seconds
Step 43 / 100 - slice 7434:7611 DONE in 11.967891931533813 seconds
Step 44 / 100 - slice 7611:7788 DONE in 11.990520000457764 seconds
Step 45 / 100 - slice 7788:7965 DONE in 11.86212682723999 seconds
Step 46 / 100 - slice 7965:8142 DONE in 11.871284008026123 seconds
Step 47 / 100 - slice 8142:8319 DONE in 11.853605031967163 seconds
Step 48 / 100 - slice 8319:8496 DONE in 11.89610505104065 seconds
Step 49 / 100 - slice 8496:8673 DONE in 11.834949016571045 seconds
Step 50 / 100 - slice 8673:8850 DONE in 12.103639125823975 second

## Train the last layer of the Net
Use the logits computed before to train the last layer previosly removed from the net.

### Define a folder scanner
Define a function that recursively scan the given folder, looking for files with the provided ending string.

In [9]:
def scan_dir(path, ending):
	"""Recursively scan the folder"""
	file_list = []
	dir_list = []
	for curr_dir, _, local_files in os.walk(path):
		# filter local files
		local_files = [os.path.join(curr_dir, x) for x in local_files if x.endswith(ending)]
		# append to global list
		file_list += local_files
		if local_files:
			dir_list.append(curr_dir)
	return dir_list, file_list

### Define the loss function
This is a mix between a softmax and a sigmoid, to handle this peculiar problem where the output is always multiclass, but during training we expect only one predicted class at a time, while at runtime it could also predict multiple classes together.

In [10]:
def binary_sparse_softmax_cross_entropy(target, output, from_logits=False):
	"""
	Expects the output of a sigmoid layer, but computes the
	sparse softmax cross entropy.
	"""
	# TF expects logits, Keras expects probabilities.
	if not from_logits:
		# transform from sigmoid back to logits
		_epsilon = tf.convert_to_tensor(1E-7, output.dtype.base_dtype)
		output = tf.clip_by_value(output, _epsilon, 1 - _epsilon)
		output = tf.log(output / (1 - output))

	output_shape = output.get_shape()
	targets = tf.cast(tf.reshape(target, [-1]), 'int64')
	logits = tf.reshape(output, [-1, int(output_shape[-1])])
	res = tf.nn.sparse_softmax_cross_entropy_with_logits(
		labels=targets,
		logits=logits)
	if len(output_shape) >= 3:
		# if our output includes timestep dimension
		# or spatial dimensions we need to reshape
		return tf.reshape(res, tf.shape(output)[:-1])
	else:
		return res

### Define a Tensorboard callback

In [11]:
class TensorboardCallback(Callback):
	def __init__(self, path, args=None, events_dir=None, max_step=None, save_period=10):
		self.save_period = save_period
		self.path = path
		train_dir = os.path.join(path, 'training')
		if not os.path.exists(train_dir): os.makedirs(train_dir)
		self.train_logger = Logger(train_dir)
		valid_dir = os.path.join(path, 'validation')
		if not os.path.exists(valid_dir): os.makedirs(valid_dir)
		self.valid_logger = Logger(valid_dir)
		if args:
			text = 'Parameters\n---------\n'
			for (key, val) in args.items():
				text += '- '+key+' = '+str(val)+'\n'
			self.train_logger.log_text('Description', text)
			self.valid_logger.log_text('Description', text)
		if events_dir and max_step:
			events_files = [F for F in scan_dir(events_dir, '')[1] if os.path.basename(F).startswith('events')]
			for events_file in events_files:
				parent_dir = os.path.dirname(events_file).split(os.sep)[-1]
				if 'training' == parent_dir:
					train_events_file = events_file
				elif 'validation' == parent_dir:
					valid_events_file = events_file
			self.train_logger.copyFrom(train_events_file, max_step=max_step)
			self.valid_logger.copyFrom(valid_events_file, max_step=max_step)
	def on_epoch_begin(self, epoch, logs={}):
		self.starttime=time()
	def on_epoch_end(self, epoch, logs={}):
		self.train_logger.log_scalar("Speed", time()-self.starttime, epoch)
		self.train_logger.log_scalar("sparse_categorical_accuracy_%", logs['sparse_categorical_accuracy']*100, epoch)
		self.train_logger.log_scalar("loss", logs['loss'], epoch)
		self.valid_logger.log_scalar("Speed", time()-self.starttime, epoch)
		self.valid_logger.log_scalar("sparse_categorical_accuracy_%", logs['val_sparse_categorical_accuracy']*100, epoch)
		self.valid_logger.log_scalar("loss", logs['val_loss'], epoch)
		# Model save
		if ((epoch+1) % self.save_period) == 0:
			self.model.save(os.path.join(self.path, 'save_'+str(epoch)+'.h5'))
			_, oldsaves = scan_dir(self.path, '.h5')
			for save in oldsaves:
				try:
					if int(save.split('.')[-2].split('_')[-1]) < epoch:
						os.remove(save)
				except: continue

### Assign parameters
From ``` batch_size ``` on it is possible to assign comma separated values to parameters. In case of multiple values every possible combination is used for training.


In [17]:
#@{ run: "auto", display-mode: "form" }
nb_epoch = 50 #@param {type:"integer"}
save_epochs = 5 #@param {type:"integer"}
db_path = "/Users/MacD/Databases/logits.h5" #@param {type:"string"}
load_path = "" #@param {type:"string"}
output_folder = "/Users/MacD/Databases/training_log" #@param {type:"string"}
batch_size = "256" #@param {type:"string"}
learning_rate = "1e-6" #@param {type:"string"}
decay_rate = "None" #@param {type:"string"}
ES_patience = "None" #@param {type:"string"}
ES_mindelta = "None" #@param {type:"string"}
RLROP_patience = "None" #@param {type:"string"}
RLROP_factor = "None" #@param {type:"string"}
db_path = os.path.abspath(os.path.normpath(db_path))
def tuplify(func, str_):
  if str_ == 'None':
    return [None]
  else:
    return [func(x) for x in str_.split('/')]
batch_size = tuplify(int, batch_size)
learning_rate = tuplify(float, learning_rate)
decay_rate = tuplify(float, decay_rate)
ES_patience = tuplify(int, ES_patience)
ES_mindelta = tuplify(float, ES_mindelta)
RLROP_patience = tuplify(int, RLROP_patience)
RLROP_factor = tuplify(float, RLROP_factor)

### Training

In [19]:
# Load data
with h5py.File(db_path, "r") as f:
  if 'training' in f.keys() and 'validation' in f.keys():
    train_db = f['training']
    valid_db = f['validation']
    if 'num_classes' in train_db.attrs and 'repetitions' in train_db.attrs:
      train_N, train_num_classes = train_db.attrs['repetitions'], train_db.attrs['num_classes']
    else: raise ValueError("The training dataset lacks 'num_classes' and 'repetitions' attributes")
    if 'num_classes' in valid_db.attrs and 'repetitions' in valid_db.attrs:
      valid_N, valid_num_classes = valid_db.attrs['repetitions'], valid_db.attrs['num_classes']
    else: raise ValueError("The validation dataset lacks 'num_classes' and 'repetitions' attributes")
    if train_num_classes != valid_num_classes:
      raise ValueError("The number of classes in training and validation databases differ")
    num_classes = train_num_classes
  else: raise ValueError("The input database lacks training and validation datasets")
print("Training and validation data loaded")
print("Training data:", num_classes, "classes repeated", train_N, "times")
print("Validation data:", num_classes, "classes repeated", valid_N, "times")

train_data = HDF5Matrix(db_path, 'training')
valid_data = HDF5Matrix(db_path, 'validation')
train_labels = np.tile(np.arange(num_classes), (train_N,))
valid_labels = np.tile(np.arange(num_classes), (valid_N,))
print(train_data.shape, train_labels.shape)
print(valid_data.shape, valid_labels.shape)

if train_data.shape[1] != valid_data.shape[1]:
  ValueError("Different model used for training and validation, not allowed")
logits_length = train_data.shape[1]
# Get info about loaded data
additional_info = {
  'Logits length': logits_length,
  'Number of classes': num_classes,
  'DB training repetitions': train_N,
  'Training samples': train_data.shape[0],
  'DB validation repetitions': valid_N,
  'Validation samples': valid_data.shape[0]
}

# Define the function that will be executed for each parameter
def runOnce(**roargs):
  roargs.update(roargs["additional_info"])
  roargs.pop("additional_info", None)

  summary_folder = str(datetime.now().isoformat(sep='_', timespec='seconds')).replace(':', '_').replace('-', '_')
  log_dir = os.path.join(os.path.abspath(output_folder), summary_folder)
  if not os.path.exists(log_dir): os.makedirs(log_dir)
  print('Created log folder:', log_dir)
  if load_path:
    initial_epoch = int(load_path.split('_')[-1].split('.')[0])
    # Model creation
    model = load_model(load_path, custom_objects={'binary_sparse_softmax_cross_entropy': binary_sparse_softmax_cross_entropy})
  else:
    initial_epoch = 0
    # Model creation
    logits = Input(shape=(logits_length,))
    prediction = logits
    prediction = Dense(num_classes, activation='sigmoid', kernel_initializer="he_normal")(prediction)
    model = Model(inputs=logits, outputs=prediction)
    model.compile(optimizer=Adam(lr=roargs["learning_rate"], amsgrad=True), 
      loss=binary_sparse_softmax_cross_entropy, # mutually exclusive classes, independent per-class distributions
      metrics=["sparse_categorical_accuracy"])
  # Model description
  trainable_count = int(np.sum([K.count_params(p) for p in set(model.trainable_weights)]))
  non_trainable_count = int(np.sum([K.count_params(p) for p in set(model.non_trainable_weights)]))
  roargs['Total parameters'] = trainable_count+non_trainable_count
  roargs['Trainable parameters'] = trainable_count
  roargs['Non-trainable parameters'] = non_trainable_count
  # Create custom callback
  if load_path: tensorboardCallback = TensorboardCallback(log_dir, roargs, os.path.dirname(load_path), initial_epoch, save_period=args["save_epochs"])
  else: tensorboardCallback = TensorboardCallback(log_dir, roargs, save_period=save_epochs)
  # Save other information about the model
  with open(os.path.join(log_dir, 'summary_'+summary_folder+'.txt'), mode='w') as F:
    print2F = lambda s: F.write(s+'\n')
    print2F('------')
    print2F("Parameters:")
    for (key, val) in roargs.items():
      print2F(str(key)+' = '+str(val))
    print2F('------')
    print2F('Logs will be summarized in ' + log_dir)
    model.summary(print_fn=print2F)

  # List of callbacks
  callbacks = [TerminateOnNaN(), tensorboardCallback, PrintMonitor()]
               #,TelegramMonitor(api_token="546794449:AAGzmfH9Oa6277Vsl2T9hRrGnNHHSpEMsd8", chat_id="41795159", plot_history=1)]
  if roargs["decay_rate"] and roargs["decay_rate"] > 0:
    compute_lr = lambda e: roargs["learning_rate"] * 1./(1. + roargs["decay_rate"] * e)
    callbacks.append(LearningRateScheduler(compute_lr, verbose=0))
  if roargs["ES_mindelta"] and roargs["ES_patience"]:
    callbacks.append(EarlyStopping(monitor='val_loss', min_delta=roargs["ES_mindelta"], patience=roargs["ES_patience"], verbose=1, mode='min'))
  if roargs["RLROP_factor"] and roargs["RLROP_patience"] and roargs["RLROP_factor"] <  1:
    callbacks.append(ReduceLROnPlateau(monitor='val_loss', factor=roargs["RLROP_factor"], patience=roargs["RLROP_patience"], verbose=1, mode='min', epsilon=0.0001, cooldown=0, min_lr=1e-10))

  # Training
  model.fit(x = train_data, 
    y = train_labels,
    batch_size = roargs["batch_size"],
    epochs=nb_epoch,
    verbose=1,
    callbacks=callbacks,
    validation_data=(valid_data, valid_labels),
    shuffle=False,
    initial_epoch=initial_epoch)

# Set variable parameters from cmd line args
safe_len = lambda _x_: len(_x_) if _x_ else 0
n_tests = np.prod([safe_len(batch_size), safe_len(learning_rate), safe_len(decay_rate),
                   safe_len(ES_patience), safe_len(ES_mindelta), safe_len(RLROP_patience),
                   safe_len(RLROP_factor)])
print(n_tests, "total tests will be performed... be patient!")
try:
    for bs in batch_size:
      for lr in learning_rate:
        for dr in decay_rate:
          for esp in ES_patience:
            for esm in ES_mindelta:
              for rlropp in RLROP_patience:
                for rlropf in RLROP_factor:
                  runOnce(batch_size = bs,
                    learning_rate = lr,
                    decay_rate = dr,
                    ES_patience = esp,
                    ES_mindelta =  esm,
                    RLROP_patience = rlropp,
                    RLROP_factor = rlropf,
                    additional_info = additional_info)
except KeyboardInterrupt:
    print("User interruption")

Training and validation data loaded
Training data: 177 classes repeated 2000 times
Validation data: 177 classes repeated 200 times
(354000, 4096) (354000,)
(35400, 4096) (35400,)
1 total tests will be performed... be patient!
Created log folder: /Users/MacD/Databases/training_log/2018_06_26_21_44_50
Train on 354000 samples, validate on 35400 samples
Monitor initialized.
Name of the model is "A Keras model"
Model ID is 1153cb1f-27af-40ae-89fe-6337711498a2
Training ID is 9597044e-ac80-4166-86a8-b440621dd20a
Training started at 2018-06-26 21:44 for 50 epochs with 354000 samples with a 2 layers model.
Epoch 1/50
Epoch 1/50 is done at 2018-06-26 21:50. Average minutes/epoch is 5.59.
Logs are : val_loss = 5.177099 | val_sparse_categorical_accuracy = 0.005763 | loss = 5.178904 | sparse_categorical_accuracy = 0.005616
Epoch 2/50
Epoch 2/50 is done at 2018-06-26 21:55. Average minutes/epoch is 5.56.
Logs are : val_loss = 5.176400 | val_sparse_categorical_accuracy = 0.005650 | loss = 5.176562 | 

KeyboardInterrupt: 