In [1]:
#hide
#default_exp experiment_manager
from nbdev.showdoc import *
from dsblocks.utils.nbdev_utils import nbdev_setup, TestRunner

nbdev_setup ()
#tst = TestRunner (targets=['dummy'])
tst = TestRunner (targets=[])

# Experiment Manager

> Main class

In [2]:
#export
# coding: utf-8
import pickle
import joblib
import sys
import os
import numpy as np
import pandas as pd
import time
import datetime
from sklearn.model_selection import ParameterGrid
from sklearn.utils import Bunch
import platform
import pprint
import subprocess
import json
from multiprocessing import Process
import logging
import traceback
import shutil
from pathlib import Path

from dsblocks.utils.utils import set_logger, set_verbosity, store_attr

# hpsearch core API
from hpsearch.config.manager_factory import ManagerFactory
from hpsearch.utils import experiment_utils
from hpsearch.utils.experiment_utils import remove_defaults
from hpsearch.utils.organize_experiments import remove_defaults_from_experiment_data
import hpsearch.config.hp_defaults as dflt

In [3]:
#for tests
import pytest
import os
import numpy as np
import optuna

from dsblocks.utils.nbdev_utils import md
from dsblocks.utils.utils import check_last_part, remove_previous_results

from hpsearch.examples.complex_dummy_experiment_manager import init_em

## ExperimentManager

In [4]:
#export
class ExperimentManager (object):

    def __init__ (self,
                  allow_base_class=dflt.allow_base_class,
                  path_experiments='hpsearch/results',
                  folder=None,
                  parent_path=None,
                  defaults=dflt.defaults,
                  metric=dflt.metric,
                  op=dflt.op,
                  alternative_path=None,
                  path_data=None,
                  name_model_history=dflt.name_model_history,
                  model_file_name=dflt.model_file_name,
                  name_epoch=dflt.name_epoch,
                  result_file=dflt.result_file,
                  target_model_file=None,
                  destination_model_file=None,
                  manager_path=dflt.manager_path,
                  non_pickable_fields=[],
                  avoid_saving_fields=[],
                  logger=None,
                  verbose: int = dflt.verbose,
                  name_logger:str = dflt.name_logger
                 ):

        # ********************
        # store_attr ()
        # ********************
        self.allow_base_class = allow_base_class
        self._path_experiments = path_experiments
        self.defaults = defaults
        self.key_score = metric
        self.op = op
        self.alternative_path = alternative_path
        self.path_data = path_data
        self.name_model_history = name_model_history
        self.model_file_name = model_file_name
        self.name_epoch = name_epoch
        self.result_file = result_file
        self.target_model_file = target_model_file
        self.destination_model_file = destination_model_file
        self.name_logger = name_logger
        self.logger = logger
        self.verbose = verbose
        self.manager_path = manager_path
        # ********************

        class_name = self.__class__.__name__

        self._path_experiments = Path (self._path_experiments).resolve ()
        if folder is not None or parent_path is not None:
            self.set_path_experiments (folder=folder, parent_path=parent_path)
        self.alternative_path = Path(self.alternative_path) if self.alternative_path is not None else None
        self.path_data = Path(self.path_data) if self.path_data is not None else None

        if self.logger is None:
            self.logger = set_logger (self.name_logger, path_results=self.path_experiments, verbose=self.verbose)

        self.key_score = metric
        self.registered_name = f'{class_name}-{self.folder}'

        self.parameters_non_pickable = {}
        self.manager_factory = ManagerFactory(allow_base_class=allow_base_class, manager_path=self.manager_path,
                                              logger=self.logger)
        self.manager_factory.register_manager (self)
        non_pickable_fields = (non_pickable_fields if isinstance(non_pickable_fields, list)
                               else [non_pickable_fields])
        avoid_saving_fields = (avoid_saving_fields if isinstance(avoid_saving_fields, list)
                               else [avoid_saving_fields])
        self.non_pickable_fields = (non_pickable_fields + avoid_saving_fields +
                                    ['manager_factory', 'parameters_non_pickable', 'logger'])
        self.avoid_saving_fields = avoid_saving_fields

    @property
    def folder (self):
        return self._path_experiments.name

    @property
    def parent_path (self):
        return self._path_experiments.parent

    @property
    def path_experiments (self):
        return self._path_experiments

    def set_path_experiments (self, path_experiments=None, folder=None, parent_path=None):
        if path_experiments is not None: self._path_experiments = Path(path_experiments).resolve()
        else:
            parent_path = Path(parent_path).resolve() if parent_path is not None else self.parent_path
            folder = folder if folder is not None else self.folder
            self._path_experiments = parent_path/folder

    def set_verbose (self, verbose):
        self.verbose = verbose
        set_verbosity (logger=self.logger, verbose=verbose)

    def get_default_parameters (self, parameters):
        if not self.allow_base_class:
            raise ImportError ('call get_default_parameters from base class is not allowed')
        return self.defaults

    def get_default_operations (self):
        return {'folder': self.folder, 'op': self.op, 'metric': self.key_score}

    def get_path_experiment (self, experiment_id):
        path_experiment = self.path_experiments/f'experiments/{experiment_id:05d}'
        return path_experiment

    def get_path_results (self, experiment_id=None, run_number=0, path_experiment=None):
        assert experiment_id is not None or path_experiment is not None
        if path_experiment is None:
            path_experiment = self.get_path_experiment (experiment_id)
        path_results = path_experiment/f'{run_number}'
        return path_results

    def get_path_alternative (self, path_results):
        if self.alternative_path is None:
            return path_results
        path_alternative = str(path_results).replace (str(self.path_experiments), str(self.alternative_path))

        return path_alternative

    def get_path_data (self, run_number, parameters={}):
        if self.path_data is None:
            return self.path_experiments/'data'
        else:
            return self.path_data

    def get_experiment_data (self, experiments=None):
        path_csv = '%s/experiments_data.csv' %self.path_experiments
        path_pickle = path_csv.replace('csv', 'pk')
        try:
            experiment_data = pd.read_pickle (path_pickle)
        except:
            experiment_data = pd.read_csv (path_csv, index_col=0)
        if experiments is not None:
            experiment_data = experiment_data.loc[experiments,:]

        return experiment_data

    def remove_previous_experiments (self, parent=False, only_test=True):
        path_to_remove = self.path_experiments.parent if parent else self.path_experiments
        if not str(path_to_remove.name).startswith ('test_') and only_test:
            raise ValueError (f'path to remove does not start with test_: {path_to_remove}')
        if path_to_remove.exists():
            shutil.rmtree (path_to_remove)

    def experiment_visualization (self, **kwargs):
        raise ValueError ('this type of experiment visualization is not recognized')

    def run_experiment_pipeline (self, run_number=0, path_results='./results', parameters = {}, 
                                 use_process=False):
        """ Runs complete learning pipeline: loading / generating data, building and learning model, applying it to data,
        and evaluating it."""
        start_time = time.time()

        # record all parameters except for non-pickable ones
        record_parameters (path_results, parameters)

        # integrate non-pickable parameters into global dictionary
        parameters.update (self.parameters_non_pickable)
        self.parameters_non_pickable = {}

        # #####################################
        # Evaluation
        # #####################################
        time_before = time.time()
        score_dict = self._run_experiment (parameters=parameters, path_results=path_results, 
                                           run_number=run_number, use_process=use_process)
        self.logger.info ('time spent on this experiment: {}'.format(time.time()-time_before))

        # #####################################
        # Final scores
        # #####################################
        score_name = parameters.get('suffix_results','')
        if len(score_name) > 0:
            if score_name[0] == '_':
                score_name = score_name[1:]
            if score_dict.get(score_name) is not None:
                self.logger.info (f'score: {score_dict.get(score_name)}')

        spent_time = time.time() - time_before

        return score_dict, spent_time

    # *************************************************************************
    #   run_experiment methods
    # *************************************************************************
    def _run_experiment (self, parameters={}, path_results='./results', run_number=None, use_process=False):

        parameters['run_number'] = run_number

        # wrap parameters
        parameters = Bunch(**parameters)

        if use_process:
            return self.run_experiment_in_separate_process (parameters, path_results)
        else:
            return self.run_experiment (parameters=parameters, path_results=path_results)

    def run_experiment_in_separate_process (self, parameters={}, path_results='./results'):

        parameters['return_results']=False
        p = Process(target=self.run_experiment_saving_results, args=(parameters, path_results))
        p.start()
        p.join()
        if p.exitcode != 0:
            self.logger.warning ('process exited with non-zero code: there might be an error '
                                 'in run_pipeline function')

        path_dict_results = f'{path_results}/dict_results.pk'
        try:
            dict_results = pickle.load (open (path_dict_results, 'rb'))
        except FileNotFoundError:
            raise RuntimeError (f'{path_dict_results} not found: probably there is an error in run_pipeline'
                                'function. Please run in debug mode, without multi-processing')

        return dict_results

    def run_experiment_saving_results (self, parameters={}, path_results='./results'):
        dict_results = self.run_experiment (parameters=parameters, path_results=path_results)
        pickle.dump (dict_results, open ('%s/dict_results.pk' %path_results, 'wb'))

    def run_experiment (self, parameters={}, path_results='./results'):
        raise NotImplementedError ('This method needs to be defined in subclass')


    # *************************************************************************
    # *************************************************************************
    def create_experiment_and_run (self, parameters = {}, other_parameters = {},
                                   info = Bunch(), em_args=Bunch(),
                                   run_number=0, log_message=None, stack_level=-3,
                                   precision=1e-15, experiment_number=None,
                                   repeat_experiment=False, remove_not_finished=False,
                                   only_remove_not_finished=False, check_finished=False,
                                   recompute_metrics=False,
                                   force_recompute_metrics=False,
                                   check_finished_if_interrupted=False, prev_epoch=False,
                                   use_previous_best=dflt.use_previous_best, from_exp=None,
                                   skip_interrupted=False, use_last_result=False,
                                   run_if_not_interrumpted=False, use_last_result_from_dict=False,
                                   previous_model_file_name=None, model_extension='h5', 
                                   model_name='checkpoint_', epoch_offset=0, name_best_model='best_model',
                                   name_last_epoch=dflt.name_last_epoch, min_iterations=dflt.min_iterations,
                                   use_process=False):
        
        """

        """
        current_em_args = Bunch ()
        store_attr (store_args=False, self=current_em_args, but='parameters, other_parameters, info, em_args')
        em_args.update (current_em_args)
        requested_experiment_number = experiment_number
        # ****************************************************
        #  preliminary set-up: logger and path_experiments
        # ****************************************************
        if log_message is not None:
            self.logger.info ('**************************************************')
            self.logger.info (log_message)
            self.logger.info ('**************************************************')

        # insert path to experiment script file that called the experiment manager
        insert_experiment_script_path (info, self.logger, stack_level=stack_level)

        # create directories
        path_experiments = self.path_experiments
        path_experiments.mkdir (parents=True, exist_ok=True)

        # ****************************************************
        # register (subclassed) manager so that it can be used by decoupled modules
        # ****************************************************
        self.register_and_store_subclassed_manager ()

        # ****************************************************
        #   get experiment number given parameters
        # ****************************************************
        parameters = remove_defaults (parameters)

        path_csv = f'{path_experiments}/experiments_data.csv'
        path_pickle = path_csv.replace('csv', 'pk')
        experiment_number, experiment_data = load_or_create_experiment_values (
            path_csv, parameters, precision=precision)

        # if old experiment, we can require that given parameters match with experiment number
        if (requested_experiment_number is not None
            and experiment_number != requested_experiment_number):
            raise ValueError (f'expected number: {requested_experiment_number}, '
                              f'found: {experiment_number}')
        other_parameters['experiment_number'] = experiment_number

        # ****************************************************
        # get key_score and suffix_results
        # ****************************************************
        key_score = self.key_score
        suffix_results = f'_{key_score}'

        # ****************************************************
        #   get run_id, if not given
        # ****************************************************
        if run_number is None:
            run_number = 0
            name_score = '%d%s' %(run_number, suffix_results)
            while not isnull(experiment_data, experiment_number, name_score):
                self.logger.info ('found previous run for experiment number {}, run {}, with score {} = {}'.format(experiment_number, run_number, key_score, experiment_data.loc[experiment_number, name_score]))
                run_number += 1
                name_score = '%d%s' %(run_number, suffix_results)
            self.logger.info ('starting experiment {} with run number {}'.format(experiment_number, run_number))

        else:
            name_score = '%d%s' %(run_number, suffix_results)
            if not isnull(experiment_data, experiment_number, name_score):
                previous_result = experiment_data.loc[experiment_number, name_score]
                self.logger.info ('found completed: experiment number: %d, run number: %d - score: %f' %(experiment_number, run_number, previous_result))
                self.logger.info (parameters)
                if repeat_experiment:
                    self.logger.info ('redoing experiment')

        # ****************************************************
        #   remove unfinished experiments
        # ****************************************************
        if remove_not_finished:
            name_finished = '%d_finished' %run_number
            if not isnull(experiment_data, experiment_number, name_finished):
                finished = experiment_data.loc[experiment_number, name_finished]
                self.logger.info (f'experiment {experiment_number}, run number {run_number}, finished {finished}')
                if not finished:
                    experiment_data.loc[experiment_number, name_score] = None
                    experiment_data.to_csv (path_csv)
                    experiment_data.to_pickle (path_pickle)
                    self.logger.info (f'removed experiment {experiment_number}, '
                                 f'run number {run_number}, finished {finished}')
            if only_remove_not_finished:
                return None, {}

        unfinished_flag = False
        name_epoch = self.name_epoch
        current_path_results = self.get_path_results (experiment_number, run_number=run_number)

        # ****************************************************
        #   check conditions for skipping experiment
        # ****************************************************
        if not isnull(experiment_data, experiment_number, name_score) and not repeat_experiment:
            if (check_finished
                and not self.finished_all_epochs (parameters, current_path_results)):
                unfinished_flag = True
            else:
                self.logger.info ('skipping...')
                return previous_result, {key_score: previous_result}
        elif (isnull(experiment_data, experiment_number, name_score)
              and recompute_metrics
              and not force_recompute_metrics):
            self.logger.info (f'experiment not found, skipping {run_number} due to only recompute_metrics')
            return None, {}

        # ****************************************************
        # log info
        # ****************************************************
        self.logger.info ('running experiment %d' %experiment_number)
        self.logger.info ('run number: %d' %run_number)
        self.logger.info ('\nparameters:\n%s' %mypprint(parameters))

        # ****************************************************
        #  get paths
        # ****************************************************
        # path_experiment folder
        path_experiment = self.get_path_experiment (experiment_number)
        path_experiment.mkdir (parents=True, exist_ok=True)

        # path_results folder (where results are)
        path_results = self.get_path_results (run_number=run_number, path_experiment=path_experiment)
        os.makedirs (path_results, exist_ok=True)

        # path to save big files
        path_results_big_size = self.get_path_alternative (path_results)
        os.makedirs (path_results_big_size, exist_ok = True)
        other_parameters['path_results_big'] = path_results_big_size

        # ****************************************************
        # get git and record parameters
        # ****************************************************
        # get git revision number
        info['git_hash'] = get_git_revision_hash(path_experiments)

        # write parameters in experiment folder
        record_parameters (path_experiment, parameters, other_parameters, em_args, info, self.__dict__)

        # store hyper_parameters in dictionary that maps experiment_number with hyper_parameter values
        store_parameters (path_experiments, experiment_number, parameters)

        # ****************************************************************
        # loggers
        # ****************************************************************
        logger_experiment = set_logger ("experiment", path_results, verbose=self.verbose)
        logger_experiment.info (f'script: {info["script_path"]}, line number: {info["lineno"]}')
        if os.path.exists(info['script_path']):
            shutil.copy (info['script_path'], path_results)
            shutil.copy (info['script_path'], path_experiment)

        # summary logger
        logger_summary = set_logger ("summary", path_experiments, mode='w', stdout=False, just_message=True,
                                     filename='summary.txt', verbose=self.verbose,
                                     verbose_out=self.verbose)
        logger_summary.info (f'\n\n{"*"*100}\nexperiment: {experiment_number}, run: {run_number}\n'
                             f'script: {info["script_path"]}, line number: {info["lineno"]}\n'
                             f'parameters:\n{mypprint(parameters)}{"*"*100}')
        if info.get('rerun_script') is not None:
            logger_summary.info ('\nre-run:\n{}'.format(info['rerun_script']))
        # same file in path_results
        logger_summary2 = set_logger ("summary", path_results, mode='w', stdout=False,
                                      just_message=True, filename='summary.txt', verbose=self.verbose,
                                      verbose_out=self.verbose)
        logger_summary2.info (f'\n\n{"*"*100}\nexperiment: {experiment_number}, run: {run_number}\nscript: '
                              f'{info["script_path"]}, line number: {info["lineno"]}\n'
                              f'parameters:\n{mypprint(parameters)}{"*"*100}')

        # ****************************************************************
        # Do final adjustments to parameters
        # ****************************************************************
        parameters = parameters.copy()
        original_parameters = parameters.copy()
        parameters.update(other_parameters)

        # add default parameters - their values are overwritten by input values, if given
        defaults = self.get_default_parameters(parameters)
        parameters_with_defaults = defaults.copy()
        parameters_with_defaults.update(parameters)
        parameters = parameters_with_defaults

        # ***********************************************************
        # resume from previous experiment
        # ***********************************************************
        if (isnull(experiment_data, experiment_number, name_score)
            and check_finished_if_interrupted
            and not self.finished_all_epochs (parameters, current_path_results)):
            unfinished_flag = True

        resuming_from_prev_epoch_flag = False
        if prev_epoch:
            self.logger.info('trying prev_epoch')
            experiment_data2 = experiment_data.copy()
            if (not unfinished_flag
                and (repeat_experiment or isnull(experiment_data, experiment_number, name_score))):
                    experiment_data2 = experiment_data2.drop(experiment_number,axis=0)
            prev_experiment_number = self.find_closest_epoch (experiment_data2, original_parameters)
            if prev_experiment_number is not None:
                self.logger.info(f'using prev_epoch: {prev_experiment_number}')
                prev_path_results = self.get_path_results (prev_experiment_number,
                                                           run_number=run_number)
                found = self.make_resume_from_checkpoint (parameters, prev_path_results)
                if found:
                    self.logger.info (f'found previous exp: {prev_experiment_number}')
                    if prev_experiment_number == experiment_number:
                        other_parameters['use_previous_best'] = use_previous_best
                        if not use_previous_best and unfinished_flag:
                            prev_epoch = self.get_last_epoch (parameters,
                                                              current_path_results)
                            prev_epoch = max (int(prev_epoch), 0)
                            parameters[name_epoch] = parameters[name_epoch] - prev_epoch
                        self.logger.info ('using previous best')
                    else:
                        prev_epoch = experiment_data.loc[prev_experiment_number,name_epoch]
                        prev_epoch = (int(prev_epoch) if prev_epoch is not None
                                      else defaults.get(name_epoch))
                        parameters[name_epoch] = parameters[name_epoch] - prev_epoch

                resuming_from_prev_epoch_flag = found


        if not resuming_from_prev_epoch_flag and from_exp is not None:
            prev_experiment_number = from_exp
            self.logger.info('using previous experiment %d' %prev_experiment_number)
            prev_path_results = self.get_path_results (prev_experiment_number, run_number=run_number)
            self.make_resume_from_checkpoint (parameters, prev_path_results, use_best=True,
                                              previous_model_file_name=previous_model_file_name,
                                              model_extension=model_extension, model_name=model_name,
                                              epoch_offset=epoch_offset, name_best_model=name_best_model,
                                              name_last_epoch=name_last_epoch)

        # ****************************************************************
        #   Analyze if experiment was interrupted
        # ****************************************************************
        if skip_interrupted:
            was_interrumpted = self.exists_current_checkpoint (parameters, path_results)
            was_interrumpted = (was_interrumpted or
                                self.obtain_last_result (
                                    parameters, path_results,
                                    use_last_result_from_dict=use_last_result_from_dict,
                                    min_iterations=min_iterations) is not None)
            if was_interrumpted:
                self.logger.info ('found intermediate results, skipping...')
                return None, {}

        # ****************************************************************
        # retrieve last results in interrupted experiments
        # ****************************************************************
        run_pipeline = True
        if use_last_result:
            experiment_result = self.obtain_last_result (
                parameters, path_results, use_last_result_from_dict=use_last_result_from_dict,
                min_iterations=min_iterations)
            if experiment_result is None and run_if_not_interrumpted:
                run_pipeline = True
            elif experiment_result is None:
                return None, {}
            else:
                run_pipeline = False

        # ****************************************************************
        # run experiment
        # ****************************************************************
        if run_pipeline:
            dict_results, time_spent = self.run_experiment_pipeline (run_number, path_results,
                                                                          parameters=parameters,
                                                                          use_process=use_process)
            finished = True
        else:
            finished = False

        # ****************************************************************
        #  Retrieve and store results
        # ****************************************************************
        if not isinstance(dict_results, dict): dict_results = {name_score: dict_results}
        columns = pd.MultiIndex.from_product ([[dflt.scores_col], list(dict_results.keys()), [run_number]])
        experiment_data[[x for x in columns if x not in experiment_data]] = None
        experiment_data.loc[experiment_number, columns]=dict_results.values()
        experiment_data = experiment_data[experiment_data.columns.sort_values()]

        if isnull(experiment_data, experiment_number, 'time_'+str(run_number)) and finished:
            experiment_data.loc[experiment_number,'time_'+str(run_number)]=time_spent
        experiment_data.loc[experiment_number, 'date']=datetime.datetime.time(datetime.datetime.now())
        experiment_data.loc[experiment_number, '%d_finished' %run_number]=finished

        experiment_data.to_csv(path_csv)
        experiment_data.to_pickle(path_pickle)

        try:
            save_other_parameters (experiment_number, {**other_parameters, **em_args, **info}, path_experiments)
        except Exception as e:
            print (f'error saving other parameters: {e}')

        logger_summary2.info ('\nresults:\n{}'.format(dict_results))
        self.logger.info ('finished experiment %d' %experiment_number)

        # return final score
        result = dict_results.get(key_score)
        return result, dict_results

    def grid_search (self, parameters_multiple_values={}, parameters_single_value={}, other_parameters={},
                     info=Bunch(), run_numbers=[0], random_search=False, load_previous=False,
                     log_message='', nruns=None, keep='multiple', **kwargs):

        other_parameters = other_parameters.copy()

        os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
        if nruns is not None:
            run_numbers = range (nruns)

        path_experiments = self.path_experiments
        path_results_base = path_experiments

        os.makedirs (path_results_base,exist_ok=True)

        if keep=='multiple':
            parameters_single_value = {k:parameters_single_value[k]
                                       for k in parameters_single_value.keys()
                                       if k not in parameters_multiple_values}
        elif keep=='single':
            parameters_multiple_values = {k:parameters_multiple_values[k]
                                          for k in parameters_multiple_values.keys()
                                          if k not in parameters_single_value}
        else:
            raise ValueError (f'parameter keep {keep} not recognized: it must be either multiple or single')

        parameters_multiple_values_all = parameters_multiple_values
        parameters_multiple_values_all = list(ParameterGrid(parameters_multiple_values_all))

        if log_message != '':
            info['log_message'] = log_message
        insert_experiment_script_path (info, self.logger)

        if random_search:
            path_random_hp = '%s/random_hp.pk' %path_results_base
            if load_previous and os.path.exists(path_random_hp):
                parameters_multiple_values_all = pickle.load (open(path_random_hp,'rb'))
            else:
                parameters_multiple_values_all = list (np.random.permutation(parameters_multiple_values_all))
                pickle.dump (parameters_multiple_values_all, open (path_random_hp,'wb'))
        for (i_hp, parameters_multiple_values) in enumerate (parameters_multiple_values_all):
            parameters = parameters_multiple_values.copy()
            parameters.update(parameters_single_value)

            for (i_run, run_number) in enumerate (run_numbers):
                self.logger.info (f'processing hyper-parameter {i_hp} '
                                 f'out of {len(parameters_multiple_values_all)}')
                self.logger.info (f'doing run {i_run} out of {len(run_numbers)}')
                self.logger.info (log_message)

                self.create_experiment_and_run (parameters=parameters, other_parameters=other_parameters,
                                                info=info, run_number=run_number, **kwargs)

        # This solves an intermitent issue found in TensorFlow (reported as bug by community)
        import gc
        gc.collect()

    def run_multiple_repetitions (self, parameters={}, other_parameters = {},
                     log_message='', nruns = None, run_numbers=[0], **kwargs):

        other_parameters = other_parameters.copy()

        if nruns is not None:
            run_numbers = range (nruns)

        path_experiments = self.path_experiments
        path_experiments.mkdir (parents=True, exist_ok = True)

        results = np.zeros((len(run_numbers),))
        for (i_run, run_number) in enumerate(run_numbers):
                self.logger.info('doing run %d out of %d' %(i_run, len(run_numbers)))
                self.logger.info('%s' %log_message)

                results[i_run], dict_results  = self.create_experiment_and_run (
                    parameters=parameters, other_parameters=other_parameters,
                    run_number=run_number, **kwargs)
                if dict_results.get('is_pruned', False):
                    break

        mu, std = results.mean(), results.std()
        self.logger.info (f'mean {self.key_score}: {mu}, std: {std}')

        dict_results[self.key_score] = mu

        return mu, std, dict_results


    def hp_optimization (self, parameter_sampler=None, log_message=None,
                         parameters={}, other_parameters={}, info=Bunch(),
                         nruns=None, stack_level=-3, sampler_method='random', 
                         pruner_method='halving', n_evaluations=20, seed=0, 
                         n_startup_trials=5, n_trials=10, study_name='hp_study', 
                         run_number=None, n_jobs=1, nruns_best=0,
                         **kwargs):

        import optuna
        from optuna.pruners import SuccessiveHalvingPruner, MedianPruner
        from optuna.samplers import RandomSampler, TPESampler
        from optuna.integration.skopt import SkoptSampler

        optuna.logging.disable_propagation()

        em_args = Bunch ()
        store_attr (store_args=False, self=em_args, but='parameters, other_parameters')

        path_experiments = self.path_experiments
        path_experiments.mkdir (parents=True, exist_ok = True)

        other_parameters = other_parameters.copy()

        if log_message != '':
            info['log_message'] = log_message
        insert_experiment_script_path (info, self.logger, stack_level=stack_level)

        # n_warmup_steps: Disable pruner until the trial reaches the given number of step.
        if sampler_method == 'random':
            sampler = RandomSampler(seed=seed)
        elif sampler_method == 'tpe':
            sampler = TPESampler(n_startup_trials=n_startup_trials,
                                 seed=seed)
        elif sampler_method == 'skopt':
            # cf https://scikit-optimize.github.io/#skopt.Optimizer
            # GP: gaussian process
            # Gradient boosted regression: GBRT
            sampler = SkoptSampler(skopt_kwargs={'base_estimator': "GP", 'acq_func': 'gp_hedge'})
        else:
            raise ValueError('Unknown sampler: {}'.format(sampler_method))

        if pruner_method == 'halving':
            pruner = SuccessiveHalvingPruner(min_resource=1, reduction_factor=4,
                                             min_early_stopping_rate=0)
        elif pruner_method == 'median':
            pruner = MedianPruner(n_startup_trials=5, n_warmup_steps=n_evaluations // 3)
        elif pruner_method == 'none':
            # Do not prune
            pruner = MedianPruner(n_startup_trials=n_trials,
                                  n_warmup_steps=n_evaluations)
        else:
            raise ValueError(f'Unknown pruner: {pruner_method}')

        self.logger.info (f'Sampler: {sampler_method} - Pruner: {pruner_method}')

        #study = optuna.create_study(sampler=sampler, pruner=pruner)
        direction = 'maximize' if self.op=='max' else 'minimize'
        study = optuna.create_study(direction=direction,
                                    study_name=study_name,
                                    storage=f'sqlite:///{path_experiments}/{study_name}.db',
                                    sampler=sampler, pruner=pruner, load_if_exists=True)

        key_score = self.key_score

        def objective(trial):

            hp_parameters = parameters.copy()
            self.parameters_non_pickable = dict(trial=trial)

            if parameter_sampler is not None:
                hp_parameters.update(parameter_sampler(trial))

            if nruns is None:
                _, dict_results = self.create_experiment_and_run (
                    parameters=hp_parameters, other_parameters=other_parameters,
                    run_number=run_number, info=info,
                    em_args=em_args, **kwargs
                )
            else:
                mu_best, std_best, dict_results = self.run_multiple_repetitions (
                    parameters=hp_parameters, other_parameters=other_parameters,
                    info=info, em_args=em_args, **kwargs
                )

            if dict_results.get('is_pruned', False):
                raise optuna.structs.TrialPruned()

            assert key_score in dict_results, f'metric {key_score} not found in results'

            return dict_results[key_score]

        study.optimize(objective, n_trials=n_trials, n_jobs=n_jobs)

        self.logger.info ('Number of finished trials: {}'.format(len(study.trials)))
        self.logger.info ('Best trial:')
        trial = study.best_trial
        self.logger.info ('Value: {}'.format(trial.value))
        self.logger.info ('best params: {}'.format (study.best_params))
        best_value = trial.value

        if nruns_best > 0:
            self.logger.info ('running best configuration %d times' %nruns_best)
            parameters.update (study.best_params)
            mu_best, std_best, _ = self.run_multiple_repetitions (
                parameters=parameters, other_parameters=other_parameters,
                nruns=nruns_best, info=info, em_args=em_args, **kwargs)
            best_value = mu_best

        return best_value

    def rerun_experiment (self, experiments=[], run_numbers=[0], nruns=None,
                          other_parameters={}, em_args={}, parameters={},
                          parameter_sampler=None, parameters_multiple_values=None,
                          log_message='', only_if_exists=False, check_experiment_matches=True,
                          **kwargs):

        other_parameters = other_parameters.copy()
        em_args = kwargs
        info = Bunch ()
        path_experiments = self.path_experiments

        if nruns is not None:
            run_numbers = range (nruns)

        parameters_original = parameters
        other_parameters_original = other_parameters
        em_args_original = em_args
        for experiment_id in experiments:
            path_experiment = self.get_path_experiment (experiment_id)
            check_experiment_matches = (check_experiment_matches and
                                        parameters_multiple_values is None
                                        and parameter_sampler is None)
            parameters, other_parameters, em_args = load_parameters (em=self,
                experiment=experiment_id,
                other_parameters=other_parameters_original, em_args=em_args_original,
                parameters=parameters_original, check_experiment_matches=check_experiment_matches
            )

            if 'log_message' in em_args:
                info['old_log_message'] = em_args['log_message']
                del em_args['log_message']
            if 'run_number' in em_args:
                info['old_run_number'] = em_args['run_number']
                del em_args['run_number']
            
            # we need to set the following flag to False, since otherwise when we request to store the intermediate results
            # and the experiment did not start, we do not run the experiment
            if (em_args.get('use_last_result', False)
                and not em_args_original.get('use_last_result', False)):
                self.logger.debug ('changing other_parameters["use_last_result"] to False')
                em_args['use_last_result'] = False
            self.logger.info (f'running experiment {experiment_id} with parameters:\n{parameters}\n'
                         f'other_parameters:\n{other_parameters}')

            if parameter_sampler is not None:
                self.logger.info ('running hp_optimization')
                if 'parameter_sampler' in em_args:
                    info['old_parameter_sampler'] = em_args['parameter_sampler']
                    del em_args['parameter_sampler']
                insert_experiment_script_path (info, self.logger)
                em_args['info'] = info
                self.hp_optimization (parameter_sampler=parameter_sampler,
                                      log_message=log_message, parameters=parameters,
                                      other_parameters=other_parameters, **em_args)
            elif parameters_multiple_values is not None:
                script_parameters = {}
                insert_experiment_script_path (script_parameters, self.logger)
                info['rerun_script'] = script_parameters
                em_args['info'] = info
                self.grid_search (
                    parameters_multiple_values=parameters_multiple_values,
                    parameters_single_value=parameters, other_parameters=other_parameters,
                    run_numbers=run_numbers, log_message=log_message, **em_args)
            else:
                if only_if_exists:
                    run_numbers = [run_number for run_number in run_numbers
                                   if (path_experiment/run_number).exists()]

                script_parameters = {}
                insert_experiment_script_path (script_parameters, self.logger)
                info['rerun_script'] = script_parameters
                em_args['info'] = info
                self.run_multiple_repetitions (
                    parameters=parameters, other_parameters=other_parameters,
                    log_message=log_message, run_numbers=run_numbers, **em_args
                )

    def rerun_experiment_pipeline (self, experiments, run_numbers=None,
                                   new_parameters={}, save_results=False):

        path_experiments = self.path_experiments
        for experiment_id in experiments:
            path_experiment = self.get_path_experiment (experiment_id)

            parameters, other_parameters, em_args, info, em_attrs =joblib.load (
                f'{path_experiment}/parameters.pk'
            )
            parameters = parameters.copy()
            parameters.update(other_parameters)
            parameters.update(new_parameters)
            for run_number in run_numbers:
                path_results = path_experiment/f'{run_number}'
                path_data = self.get_path_data (run_number, parameters)
                score, _ = self.run_experiment_pipeline (run_number, path_results,
                                                         parameters=parameters)

                if save_results:
                    experiment_number = experiment_id
                    path_csv = path_experiments/'experiments_data.csv'
                    path_pickle = str(path_csv).replace('csv', 'pk')
                    if os.path.exists(path_pickle):
                        experiment_data = pd.read_pickle (path_pickle)
                    else:
                        experiment_data = pd.read_csv (path_csv, index_col=0)
                    if type(score)==dict:
                        for key in score.keys():
                            if key != '':
                                experiment_data.loc[experiment_number, '%d_%s' %(run_number, key)]=score[key]
                            else:
                                experiment_data.loc[experiment_number, '%d' %run_number]=score[key]
                    else:
                        experiment_data.loc[experiment_number, name_score]=score
                    experiment_data.to_csv(path_csv)
                    experiment_data.to_pickle(path_pickle)

    def rerun_experiment_par (self, experiments, run_numbers=None, parameters={}):

        path_experiments = self.path_experiments
        for experiment_id in experiments:
            path_experiment = self.get_path_experiment (experiment_id)

            for run_number in run_numbers:
                path_results = path_experiment/f'{run_number}'
                self.run_experiment_pipeline (run_number, path_results, parameters=parameters)

    def find_closest_epoch (self, experiment_data, parameters):
        """Finds experiment with same parameters except for number of epochs.

        Takes the epochs that are closer but lower than the one in parameters."""
        name_epoch = self.name_epoch
        experiment_numbers, _, _ = experiment_utils.find_rows_with_parameters_dict (
            experiment_data, parameters, ignore_keys=[name_epoch,'prev_epoch'])

        defaults = self.get_default_parameters(parameters)
        current_epoch = parameters.get(name_epoch, defaults.get(name_epoch))
        if current_epoch is None:
            current_epoch = -1
        if len(experiment_numbers) > 1:
            epochs = experiment_data.loc[experiment_numbers,name_epoch].copy()
            epochs[epochs.isnull()]=defaults.get(name_epoch)
            epochs = epochs.loc[epochs<=current_epoch]
            if epochs.shape[0] == 0:
                return None
            else:
                return epochs.astype(int).idxmax()
        elif len(experiment_numbers) == 1:
            return experiment_numbers[0]
        else:
            return None

    def get_last_epoch (self, parameters, path_results, name_last_epoch=dflt.name_last_epoch):

        name_epoch = self.name_epoch
        name_model_history = self.name_model_history
        path_model_history = f'{path_results}/{name_model_history}'

        prev_epoch = -1
        if os.path.exists(path_model_history):
            summary = pickle.load(open(path_model_history, 'rb'))
            prev_epoch = summary.get(name_last_epoch)
            if prev_epoch is None:
                key_score = self.key_score
                if key_score in summary and (isinstance(summary[key_score], list)
                                             or isinstance(summary[key_score], np.array)):
                    prev_epoch = (~np.isnan(summary[key_score])).sum()

        return prev_epoch

    def finished_all_epochs (self, parameters, path_results,
                             name_last_epoch=dflt.name_last_epoch):
        defaults = self.get_default_parameters (parameters)
        current_epoch = parameters.get(self.name_epoch, defaults.get(self.name_epoch))
        prev_epoch = self.get_last_epoch (parameters, path_results,
                                          name_last_epoch=name_last_epoch)

        if prev_epoch >= current_epoch:
            finished = True
        else:
            finished = False

        return finished

    def make_resume_from_checkpoint (self, parameters, prev_path_results, use_best=False, previous_model_file_name=None,
                                    model_extension='h5', model_name='checkpoint_', epoch_offset=0, name_best_model='best_model',
                                    name_last_epoch=dflt.name_last_epoch):

        found = False
        path_model_history = f'{prev_path_results}/{self.name_model_history}'
        if os.path.exists(path_model_history):
            parameters['resume_summary'] = path_model_history
            found = True
            parameters['prev_path_results'] = prev_path_results
            if parameters.get('previous_model_file_name') is not None:
                parameters['resume'] = f'{prev_path_results}/{previous_model_file_name}'
            elif use_best:
                parameters['resume'] = f'{prev_path_results}/{name_best_model}.{model_extension}'
            else:
                summary = pickle.load(open(path_model_history, 'rb'))
                prev_epoch = summary.get(name_last_epoch)
                key_score = self.key_score
                if prev_epoch is None:
                    if key_score in summary and (isinstance(summary[key_score], list)
                                                 or isinstance(summary[key_score], np.array)):
                        prev_epoch = (~np.isnan(summary[key_score])).sum()
                    else:
                        prev_epoch = 0

                if prev_epoch >= 0:
                    parameters['resume'] = (f'{prev_path_results}/'
                                            f'{model_name}{prev_epoch+epoch_offset}.{model_extension}')
            if not os.path.exists(parameters['resume']):
                path_resume2 = f'{prev_path_results}/{self.model_file_name}'
                if os.path.exists (path_resume2):
                    parameters['resume'] = path_resume2
                else:
                    parameters['resume'] = ''
                    parameters['prev_path_results'] = ''
                    found = False

        return found

    def exists_current_checkpoint (self, parameters, path_results):
        model_file_name = self.model_file_name
        return os.path.exists (f'{path_results}/{model_file_name}')

    def obtain_last_result (self, parameters, path_results, use_last_result_from_dict=False,
                            min_iterations=dflt.min_iterations):

        if use_last_result_from_dict:
            return self.obtain_last_result_from_dict (parameters, path_results,
                                                      use_last_result_from_dict=use_last_result_from_dict,
                                                      min_iterations=min_iterations)
        name_result_file = self.name_model_history
        path_results_file = f'{path_results}/{name_result_file}'
        dict_results = None
        if os.path.exists (path_results_file):
            history = pickle.load(open(path_results_file, 'rb'))
            metrics = parameters.get('key_scores')
            if metrics is None:
                metrics = history.keys()
            ops = parameters.get('ops')
            if ops is None:
                ops = ['max'] * len(metrics)
            if type(ops) is str:
                ops = [ops] * len(metrics)
            if type(ops) is dict:
                ops_dict = ops
                ops = ['max'] * len(metrics)
                i = 0
                for k in metrics:
                    if k in ops_dict.keys():
                        ops[i] = ops_dict[k]
                    i += 1
            dict_results = {}
            max_last_position = -1
            for metric, op in zip(metrics, ops):
                if metric in history.keys():
                    history_array = history[metric]
                    score = min(history_array) if op == 'min' else max(history_array)
                    last_position = np.where(np.array(history_array).ravel()==0)[0]
                    if len(last_position) > 0:
                        last_position = last_position[0] - 1
                    else:
                        last_position = len(history_array)
                    dict_results[metric] = score
                else:
                    last_position = -1
                max_last_position = max(last_position, max_last_position)

            dict_results['last'] = max_last_position
            if max_last_position < min_iterations:
                dict_results = None
                print (f'not storing result from {path_results} with iterations {max_last_position}')
            else:
                print (f'storing result from {path_results} with iterations {max_last_position}')

        return dict_results

    #export
    def obtain_last_result_from_dict (self, parameters, path_results, use_last_result_from_dict=False,
                            min_iterations=dflt.min_iterations):
        name_result_file = self.result_file
        path_results_file = f'{path_results}/{name_result_file}'
        dict_results = None
        if os.path.exists (path_results_file):
            dict_results = pickle.load(open(path_results_file, 'rb'))
            if 'last' not in dict_results.keys() and 'epoch' in dict_results.keys():
                dict_results['last'] = dict_results['epoch']
            if 'last' not in dict_results:
                dict_results_from_history = self.obtain_last_result (
                    parameters, path_results, use_last_result_from_dict=False,
                    min_iterations=min_iterations)
                if dict_results_from_history is not None:
                    dict_results['last'] = dict_results_from_history['last']
            if 'last' not in dict_results:
                raise RuntimeError ('dict_results has no entry named "last", and '
                                    'the value of last could not be retrieved from '
                                    'a model history file')
            max_last_position = dict_results['last']
            if max_last_position < min_iterations:
                dict_results = None
                print (f'not storing result from {path_results} with iterations {max_last_position}')
            else:
                print (f'storing result from {path_results} with iterations {max_last_position}')

        return dict_results

    def register_and_store_subclassed_manager (self):
        #self.logger.debug ('registering')
        self.manager_factory.register_manager (self)
        self.manager_factory.write_manager (self)

### get_path_alternative

In [5]:
#export tests.test_experiment_manager
def test_get_path_alternative ():
    em = init_em ('basic')

    em.alternative_path = 'other_path'
    path_results = em.get_path_results (experiment_id=1, run_number=2)
    check_last_part (path_results,'test_basic/default/experiments/00001/2')
    path_alternative = em.get_path_alternative (path_results)
    check_last_part (path_alternative, 'other_path/experiments/00001/2')

    em.remove_previous_experiments (parent=True)

In [6]:
tst.run (test_get_path_alternative, tag='dummy')

### create_experiment_and_run

#### Basic usage

`create_experiment_and_run` is the main function of the `ExperimentManager`. All other functions make use of it adding additional functionalities.

In order to call `create_experiment_and_run`, we pass a dictionary of parameters characterizing the experiment we want to run, as follows:

In [7]:
#export tests.test_experiment_manager
def test_basic_usage ():
    em = init_em ('basic')

    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})

    # The output is a tuple of two objects:
    #1. The main result metric. In our case, we didn't indicate the name of this metric,
    #and therefore we get None.
    #1. A dictionary containing all the performance metrics for this experiment.

    assert result==0.6
    assert dict_results == {'validation_accuracy': 0.6, 'test_accuracy': 0.5}

    # Eight files  are stored in *path_experiments*, and the `experiments` folder is created:

    files_stored = ['current_experiment_number.pkl', 'experiments', 'experiments_data.csv',
                    'experiments_data.pk', 'git_hash.json', 'managers', 'other_parameters.csv',
                    'parameters.pk', 'parameters.txt', 'summary.txt']
    display (files_stored)

    path_experiments = em.path_experiments

    assert (sorted(os.listdir (path_experiments))==
            files_stored)

    # TODO TEST: test content of the above files

    import pandas as pd

    df = pd.read_pickle (f'{path_experiments}/experiments_data.pk')

    assert df.shape[0]==1 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished']).all()

    md ('experiment dataframe:'); display(df)

    list_exp = os.listdir (f'{path_experiments}/experiments')

    print (f'folder created in `{path_experiments}/experiments`:'); print(list_exp)

    assert list_exp == ['00000']

    print ('This folder has one sub-folder per run, since '
            'multiple runs can be done with the same parameters.')

    list_run = os.listdir (f'{path_experiments}/experiments/00000')

    print (f'contents of current run at `{path_experiments}/experiments/00000`:'); print(list_run)

    # the same data frame can be obtained by doing:
    df_bis = em.get_experiment_data ()

    pd.testing.assert_frame_equal(df,df_bis)

    em.remove_previous_experiments (parent=True)

In [8]:
tst.run (test_basic_usage, tag='dummy')

#### Running second experiment with same parameter values

In [9]:
#export tests.test_experiment_manager
def test_same_values ():
    em = init_em ('same_values')
    path_experiments = em.path_experiments

    # first experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})

    em.raise_error_if_run = True
    # second experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})

    df = em.get_experiment_data ()

    assert df.shape[0]==1 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished']).all()

    md ('experiment dataframe:'); display(df)

    # As we can see, no new experiment is added to the DataFrame, since the values of the parameters used
    # are already present in the first experiment.

    list_exp = os.listdir (f'{path_experiments}/experiments')

    print (f'folders created in `{path_experiments}/experiments`:'); print(list_exp)

    assert list_exp == ['00000']

    em.remove_previous_experiments (parent=True)

In [10]:
tst.run (test_same_values, tag='dummy')

#### Running second experiment with *almost* same parameter values

In [11]:
#export tests.test_experiment_manager
def test_almost_same_values ():
    em = init_em ('almost_same_values')
    path_experiments = em.path_experiments

    # first experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})

    em.raise_error_if_run = True
    # second experiment: the difference between the values of rate parameter is 1.e-16:
    # too small to be considered different
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05+1e-16})

    df = em.get_experiment_data ()
    assert df.shape[0]==1 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished']).all()
    list_exp = os.listdir (f'{path_experiments}/experiments')
    assert list_exp == ['00000']

    # consider 1.e-17 difference big enough
    em.raise_error_if_run = False
    # second experiment: the difference between the values of rate parameter is 1.e-16:
    # too small to be considered different
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05+1e-16},
                                                         precision=1e-17)

    df = em.get_experiment_data ()
    assert df.shape[0]==2 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished']).all()
    display (df)
    list_exp = os.listdir (f'{path_experiments}/experiments')
    assert sorted(list_exp) == sorted(['00000', '00001'])

    em.remove_previous_experiments (parent=True)

In [12]:
tst.run (test_almost_same_values, tag='dummy')

#### Adding new runs on previous experiment

In [13]:
#export tests.test_experiment_manager
def test_new_runs ():
    em = init_em ('new_runs')
    path_experiments = em.path_experiments

    # first experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})

    # second experiment: in order to run another experiment with same parametres, we increase
    # the run number. The default run number used in the first experiment is 0, so we indicate
    # run_number=1
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05},
                                                         run_number=1)

    df = em.get_experiment_data ()

    assert df.shape[0]==1 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished', '1_validation_accuracy',
                                            '1_test_accuracy', 'time_1','1_finished']).all()

    md ('experiment dataframe:'); display(df)

    # another adding a new run number is to indicate run_number=None. This will make the experiment
    # manager find the next run number automatically. Since we have used run numbers 0 and 1,
    # the next run number will be 2
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05},
                                                         run_number=None)

    df = em.get_experiment_data ()

    assert df.shape[0]==1 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished', '1_validation_accuracy',
                                            '1_test_accuracy', 'time_1','1_finished',
                                            '2_validation_accuracy', '2_test_accuracy', 'time_2',
                                            '2_finished']).all()

    md ('experiment dataframe:'); display(df)

    # As we can see, no new experiment is added to the DataFrame, since the values of the parameters used
    # are already present in the first experiment.

    list_exp = os.listdir (f'{path_experiments}/experiments')

    print (f'folders created in `{path_experiments}/experiments`:'); print(list_exp)

    assert list_exp == ['00000']

    list_runs = os.listdir (f'{path_experiments}/experiments/00000')
    if False:
        assert sorted(list_runs) == ['0',
                                     '1',
                                     '2',
                                     'other_parameters.json',
                                     'parameters.json',
                                     'parameters.pk',
                                     'parameters.txt']
    else:
        print (sorted(list_runs))

    em.remove_previous_experiments (parent=True)

In [14]:
tst.run (test_new_runs, tag='dummy')

#### Adding second experiment

In [15]:
#export tests.test_experiment_manager
def test_second_experiment ():
    em = init_em ('second')
    path_experiments = em.path_experiments

    # first experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})

    md ('If we run a second experiment with new parameters, a new row is '
        'added to the dataframe, and a new folder is created:')

    # second experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.7, 'rate': 0.2})

    df = em.get_experiment_data ()

    assert df.shape[0]==2 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished']).all()

    md ('experiment dataframe:'); display(df)

    list_exp = os.listdir (f'{path_experiments}/experiments')

    md (f'folders created in `{path_experiments}/experiments`:'); print(list_exp)

    assert sorted(list_exp) == sorted(['00000','00001'])

    em.remove_previous_experiments (parent=True)

In [16]:
tst.run (test_second_experiment, tag='dummy')

#### Adding another parameter 

In [17]:
import pandas as pd
df = pd.DataFrame({'a':[1,2,3],'b':[1,None,3]})
df

Unnamed: 0,a,b
0,1,1.0
1,2,
2,3,3.0


In [18]:
df.isna().loc[1,'b']

True

In [19]:
#export tests.test_experiment_manager
def test_new_parameter ():
    em = init_em ('another_parameter')
    path_experiments = em.path_experiments

    # first experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})

    # second experiment:
    # same parameters as before plus new parameter 'epochs' not indicated in first experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 5})

    df = em.get_experiment_data ()

    # a new experiment is added, and a new parameter `epochs` is added as additional column at the end
    assert df.shape[0]==2 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished','epochs']).all()

    assert (df.index==[0,1]).all()

    # the new parameter has None value for all previous experiments that did not indicated its value
    # In our case, the first experiment has None value for parameter `epochs`
    # This means that the default value of epochs is used for that parameter.
    # In our case, if we look at the implementation of DummyExperimentManager, we can see that
    # the default value for epochs is 10.
    assert df.isna().loc[0,'epochs']

    assert df.loc[1,'epochs'] == 5.0

    md ('experiment dataframe:'); display(df)

    em.remove_previous_experiments (parent=True)

In [20]:
tst.run (test_new_parameter, tag='dummy')

#### Adding another parameter with default value

In [21]:
#export tests.test_experiment_manager
def test_new_parameter_default ():
    em = init_em ('another_parameter_default')
    path_experiments = em.path_experiments

    # first experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})

    # second experiment:
    # same parameters as before plus new parameter 'epochs' not indicated in first experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 10})

    df = em.get_experiment_data ()

    # in this case, no new experiment is added, since the new parameter has the same value as the default value
    # implicitly used in the first experiment.
    assert df.shape[0]==1 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished']).all()

    assert (df.index==[0]).all()

    md ('experiment dataframe:'); display(df)

    em.remove_previous_experiments (parent=True)

In [22]:
tst.run (test_new_parameter_default, tag='dummy')

#### Indicating parameters that don't affect the experiment

In [23]:
#export tests.test_experiment_manager
def test_other_parameters ():
    em = init_em ('other_parameters')
    path_experiments = em.path_experiments

    # first experiment:
    # we use the other_parameters argument to indicate a parameter that does not affect the outcome
    # of the experiment
    # in this example, we change the level of verbosity. This parameter should not affect how the
    # experiment runs, and therefore we tell our experiment manager to not create a new experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05},
                                                         other_parameters={'verbose': False})

    # second experiment:
    # same parameters as before except for the verbosity parameter. Our experiment manager considers
    # this experiment the same as before, and therefore it does not run it, but outputs the same results
    # obtained before
    em.raise_error_if_run = True
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})

    df = em.get_experiment_data ()

    # in this case, no new experiment is added, since the new parameter has the same value as the default value
    # implicitly used in the first experiment.
    assert df.shape[0]==1 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished']).all()

    assert (df.index==[0]).all()

    md ('experiment dataframe:'); display(df)

    em.remove_previous_experiments (parent=True)

In [24]:
tst.run (test_other_parameters, tag='dummy')

#### remove_not_finished

In order to use this functionality, we need to indicate the name of the parameter that specifies the number of epochs. This can be done either passing this when constructing the object:
```python
em = MyExperimentManager (name_epoch='epochs')
```
or indicating it in the `other_parameters` dictionary:
```python
other_parameters = dict(name_epoch='epochs', ...)
```

In [25]:
#export tests.test_experiment_manager
def test_remove_not_finished ():
    em = init_em ('remove_not_finished')
    path_experiments = em.path_experiments

    # first experiment: we simulate that a halt before finishing
    with pytest.raises (KeyboardInterrupt):
        result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05},
                                                         other_parameters={'halt':True})

    df = em.get_experiment_data ()
    display(df)

    # second experiment: remove unfinished
    result, dict_results = em.create_experiment_and_run (parameters={'offset':1.0, 'rate': 0.2})

    df = em.get_experiment_data ()
    display(df)

    result, dict_results = em.create_experiment_and_run (parameters={'offset':1.0, 'rate': 0.3},
                                                         remove_not_finished=True)

    df = em.get_experiment_data ()
    display(df)

    # in this case, no new experiment is added, since the new parameter has the same value as the default value
    # implicitly used in the first experiment.

    em.remove_previous_experiments (parent=True)

In [26]:
tst.run (test_remove_not_finished, tag='dummy')

#### repeat_experiment

In [27]:
#export tests.test_experiment_manager
def test_repeat_experiment ():
    em = init_em ('repeat_experiment')
    path_experiments = em.path_experiments

    # first experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})

    df = em.get_experiment_data ()
    display(df)
    date = df.date.values[0]

    # second experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05},
                                                         repeat_experiment = True)

    df = em.get_experiment_data ()
    display(df)
    assert df.date.values[0] != date


    # in this case, no new experiment is added, since the new parameter has the same value as the default value
    # implicitly used in the first experiment.
    assert df.shape[0]==1 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished']).all()

    assert (df.index==[0]).all()

    em.remove_previous_experiments (parent=True)

In [28]:
tst.run (test_repeat_experiment, tag='dummy')

#### check_finished

In order to use this functionality, we need to indicate the name of the parameter that specifies the number of epochs. This can be done either passing this when constructing the object:
```python
em = MyExperimentManager (name_epoch='epochs')
```
or indicating it in the `other_parameters` dictionary:
```python
other_parameters = dict(name_epoch='epochs', ...)
```

In [29]:
#export tests.test_experiment_manager
def test_check_finished ():
    em = init_em ('check_finished')
    path_experiments = em.path_experiments

    # first experiment: we simulate that we only run for half the number of epochs
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 10},
                                                         other_parameters={'actual_epochs': 5})

    df = em.get_experiment_data ()
    date = df.date.values[0]
    score = df['0_validation_accuracy'].values[0]

    # second experiment: same values in parameters dictionary, without other_parameters
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 10})

    df = em.get_experiment_data ()

    assert (date==df.date.values[0]) and (score==df['0_validation_accuracy'].values[0])

    # third experiment: same values in parameters dictionary, with other_parameters indicating check_finished
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 10},
                                                         check_finished=True)

    df = em.get_experiment_data ()
    assert df.shape[0]==1 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished']).all()
    assert (df.index==[0]).all()
    assert (date!=df.date.values[0]) and (score!=df['0_validation_accuracy'].values[0])

    em.remove_previous_experiments (parent=True)

In [30]:
tst.run (test_check_finished, tag='dummy')

#### recompute_metrics

In [31]:
#export tests.test_experiment_manager
def test_recompute_metrics ():
    em = init_em ('recompute_metrics')
    path_experiments = em.path_experiments

    # first experiment
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})
    # second experiment: new values
    em.raise_error_if_run = True
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.02},
                                                         recompute_metrics=True)

    df = em.get_experiment_data ()
    assert df.shape[0]==2 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished']).all()
    assert np.isnan(df['0_validation_accuracy'].values[1])

    # third experiment: new values
    em.raise_error_if_run = False
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.02,
                                                                     'epochs': 10},
                                                         recompute_metrics=True,
                                                         force_recompute_metrics=True)

    df = em.get_experiment_data ()
    assert df.shape[0]==2 and (df.columns==['offset','rate','0_validation_accuracy','0_test_accuracy',
                                           'time_0', 'date', '0_finished']).all()
    assert (df.index==[0,1]).all()
    assert df['0_validation_accuracy'].values[1]==0.3

    em.remove_previous_experiments (parent=True)

In [32]:
tst.run (test_recompute_metrics, tag='dummy')

#### prev_epoch

In order to use this functionality, we need to indicate: 
1. The name of the parameter that specifies the number of epochs. 
1. The name of the file where the model is stored.
This can be done either passing this when constructing the object:
```python
em = MyExperimentManager (name_epoch='epochs', model_file_name='model_weights.pk')
```
or indicating it in the `other_parameters` dictionary:
```python
other_parameters = dict(name_epoch='epochs', model_file_name='model_weights.pk')
```

Furthermore, in order to work, we need our experiment manager to make use of the parameter `resume` or the parameter `prev_path_results`. In particular, we need it to load the model file whose path is indicated in `parameters['resume']`, or whose path is indicated in `f'{parameters["prev_path_results"]}/{self.model_file_name}'`

In [33]:
#export tests.test_experiment_manager
def test_prev_epoch ():
    em = init_em ('prev_epoch')

    # get reference result
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 17})
    reference_accuracy = em.model.accuracy
    reference_weight = em.model.weight
    df = em.get_experiment_data ()
    display (df)
    em.remove_previous_experiments (parent=True)

    # first 3 experiments
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 10})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 20})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 15})

    # more epochs
    # in order to work, we need our experiment manager to make use of the
    # parameter 'resume' or the parameter 'prev_path_results'.
    # In particular, we need it to load the model file
    # whose path is indicated in parameters['resume'], or whose path is
    # indicated in f'{parameters["prev_path_results"]}/{self.model_file_name}'
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 17},
                                      prev_epoch=True)




    assert em.model.epochs==2 and em.model.current_epoch==17

    assert reference_accuracy==em.model.accuracy and reference_weight==em.model.weight

    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 17},
                                      repeat_experiment = True)

    assert em.model.epochs==17 and em.model.current_epoch==17

    assert reference_accuracy==em.model.accuracy and reference_weight==em.model.weight

    em.remove_previous_experiments (parent=True)

In [34]:
tst.run (test_prev_epoch, tag='dummy')

In [35]:
#export tests.test_experiment_manager
def test_prev_epoch2 ():
    em = init_em ('prev_epoch2')

    em.remove_previous_experiments (parent=True)
    score, results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
                                          other_parameters={'actual_epochs': 2})


    assert score==0.16 and results['validation_accuracy']==0.16
    assert em.model.current_epoch==2 and em.model.epochs==2

    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.04, 'epochs': 5})


    # We use last result and have the required number of epochs to default number (50)
    # But we request to run the experiment until the end
    score, results = em.create_experiment_and_run (
        parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
        prev_epoch=True, check_finished=True, use_previous_best=False
    )

    assert score==0.25 and results['validation_accuracy']==0.25
    assert em.model.current_epoch==5 and em.model.epochs==3
    df = em.get_experiment_data ()
    assert (df['0_validation_accuracy']==[0.25, 0.30]).all()


    em.remove_previous_experiments (parent=True)

    # **********************************
    with pytest.raises (KeyboardInterrupt):
        _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
                                          other_parameters={'actual_epochs': 2, 'halt': True})

    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.04, 'epochs': 5})

    # We use last result and have the required number of epochs to default number (50)
    # But we request to run the experiment until the end
    score, results = em.create_experiment_and_run (
        parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
        prev_epoch=True, check_finished_if_interrupted=True,
        use_previous_best=False)
    assert score==0.25 and results['validation_accuracy']==0.25
    assert em.model.current_epoch==5 and em.model.epochs==3
    df = em.get_experiment_data ()
    assert (df['0_validation_accuracy']==[0.25, 0.30]).all()
    em.remove_previous_experiments (parent=True)

In [36]:
tst.run (test_prev_epoch2, tag='dummy')

#### from_exp

In order to work, we need our experiment manager to make use of the parameter `prev_path_results`. In particular, we need it to load the model file whose path is indicated in `f'{parameters["prev_path_results"]}/{self.model_file_name}'`

In [37]:
#export tests.test_experiment_manager
def test_from_exp ():
    em = init_em ('from_exp')
    path_experiments = em.path_experiments

    # get reference result
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 5})
    reference_accuracy = em.model.accuracy
    reference_weight = em.model.weight
    em.remove_previous_experiments (parent=True)

    # first 3 experiments
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.03, 'epochs': 2})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.04, 'epochs': 2})

    # the following resumes from experiment 0, and trains the model for 5 more epochs
    # using now different `offset` and `rate` hyper-parameters
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 5},
                                      from_exp=0)

    assert em.model.epochs==5 and em.model.current_epoch==7
    correct_accuracy = (0.1 + 0.03*2 # accuracy of model from experiment 0
                        + 0.05*5)     # accuracy gained by training for 5 more epochs using
                                    #  new hyper-parameters: rate=0.05
    assert (em.model.accuracy-correct_accuracy) < 1e-10
    assert reference_accuracy!=em.model.accuracy

    em.remove_previous_experiments (parent=True)

In [38]:
tst.run (test_from_exp, tag='dummy')

#### skip_interrupted

In order to use this functionality, we need to indicate the name of the file where the model is stored.
This can be done either passing this when constructing the object:
```python
em = MyExperimentManager (model_file_name='model_weights.pk')
```
or indicating it in the `other_parameters` dictionary:
```python
other_parameters = dict(model_file_name='model_weights.pk')
```

Alternatively, we can indicate the name of the file where the model history exists. This can be done either passing this when constructing the object:
```python
em = MyExperimentManager (name_model_history='history.pk')
```

If not indicated, the experiment manager tries to find the model history in a file named `model_history.pk`. In order to consider the history good enough, the experiment manager checks if the length of the arrays stored in the model_history dictionary is at least `parameters.get('min_iterations', dflt.min_iterations)`

In [39]:
#export tests.test_experiment_manager
def test_skip_interrupted ():
    em = init_em ('skip_interrupted')
    path_experiments = em.path_experiments

    # first 3 experiments
    with pytest.raises (KeyboardInterrupt):
        _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
                                          other_parameters={'halt': True})

    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.04, 'epochs': 5})

    em.raise_error_if_run = True
    score, results = em.create_experiment_and_run (
        parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
        skip_interrupted=True)
    assert score is None and len(results)==0

    score, results = em.create_experiment_and_run (
        parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
        skip_interrupted=True, min_iterations=1)
    assert score is None and len(results)==0

    em.model_file_name='wrong_file.pk'
    with pytest.raises (RuntimeError):
        score, results = em.create_experiment_and_run (
            parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
            skip_interrupted=True)

    df = em.get_experiment_data ()
    display (df)

    em.remove_previous_experiments (parent=True)

In [40]:
tst.run (test_skip_interrupted, tag='dummy')

#### use_last_result

In order to use this functionality, we need to indicate the name of the file where the model history exists. This can be done either passing this when constructing the object:
```python
em = MyExperimentManager (name_model_history='history.pk')
```

If not indicated, the experiment manager tries to find the model history in a file named `model_history.pk`. In order to consider the history good enough, the experiment manager checks if the length of the arrays stored in the model_history dictionary is at least `parameters.get('min_iterations', dflt.min_iterations)`

In [41]:
#export tests.test_experiment_manager
def test_use_last_result ():
    em = init_em ('use_last_result')
    path_experiments = em.path_experiments

    # first 3 experiments
    with pytest.raises (KeyboardInterrupt):
        _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
                                          other_parameters={'halt': True})

    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.04, 'epochs': 5})

    df = em.get_experiment_data ()
    assert (df.isna()['0_validation_accuracy'] == [True, False]).all()

    # We use last result but require that number of epochs is at least 50.
    # Since this is not true, the last result is not used.
    score, results = em.create_experiment_and_run (
        parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
        use_last_result=True)
    assert score is None and results=={}
    df = em.get_experiment_data ()
    display(df)
    assert (df.isna()['0_validation_accuracy'] == [True, False]).all()

    # We use last result and lower the required number of epochs to 2
    score, results = em.create_experiment_and_run (
        parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
        use_last_result=True, min_iterations=2)
    print (score, results)
    assert score==0.25 and results=={'validation_accuracy': 0.25, 'test_accuracy': 0.35, 'accuracy': 0.25, 'last': 5}
    df = em.get_experiment_data ()
    display(df)
    assert (df.isna()['0_validation_accuracy'] == [False, False]).all()
    assert (df['0_validation_accuracy'] == [0.25, 0.30]).all()

    # We use last result and increase the required number of epochs to default number (50)
    # But we request to run the experiment until the end
    score, results = em.create_experiment_and_run (
        parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
        use_last_result=True, run_if_not_interrumpted=True)
    print (score, results)
    #assert score==None and results=={'validation_accuracy': 0.25, 'test_accuracy': 0.35, 'accuracy': 0.25, 'last': 5}
    df = em.get_experiment_data ()
    display(df)
    #assert (df.isna()['0_validation_accuracy'] == [False, False]).all()
    #assert (df['0_validation_accuracy'] == [0.25, 0.30]).all()

    em.remove_previous_experiments (parent=True)

In [42]:
tst.run (test_use_last_result, tag='dummy')

##### second case

In [43]:
#export tests.test_experiment_manager
def test_use_last_result_run_interrupted ():
    em = init_em ('use_last_result_run_interrupted')
    path_experiments = em.path_experiments

    # first 3 experiments
    with pytest.raises (KeyboardInterrupt):
        _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
                                          other_parameters={'actual_epochs': 2, 'halt': True})

    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.04, 'epochs': 5})

    df = em.get_experiment_data ()
    #display (df)
    assert (df.isna()['0_validation_accuracy'] == [True, False]).all()

    # We use last result and have the required number of epochs to default number (50)
    # But we request to run the experiment until the end
    score, results = em.create_experiment_and_run (
        parameters={'offset':0.1, 'rate': 0.03, 'epochs': 5},
        use_last_result=True, run_if_not_interrumpted=True)
    print (score, results)
    #assert score==None and results=={'validation_accuracy': 0.25, 'test_accuracy': 0.35, 'accuracy': 0.25, 'last': 5}
    df = em.get_experiment_data ()
    #display(df)
    assert em.model.current_epoch==5 and em.model.epochs==5
    assert (df.isna()['0_validation_accuracy'] == [False, False]).all()
    assert (df['0_validation_accuracy'] == [0.25, 0.30]).all()

    em.remove_previous_experiments (parent=True)

In [44]:
tst.run (test_use_last_result_run_interrupted, tag='dummy')

#### Storing parameters

In [45]:
#export tests.test_experiment_manager
def test_storing_em_args_and_parameters ():
    em = init_em ('storing_em_args_and_parameters')

    path_experiment = em.get_path_experiment (0)
    result, dict_results = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05})
    check_last_part (path_experiment, 'test_storing_em_args_and_parameters/default/experiments/00000')
    ref_list = ['0', 'em_args.json', 'em_attrs.json',  'info.json', 'other_parameters.json', 
     'parameters.json', 'parameters.pk', 'parameters.txt', 'test_experiment_manager.py',]
    ref_list2 = ref_list.copy()
    del ref_list2[3]
    if __name__ == '__main__':
        del ref_list[-1]
        del ref_list2[-1]
    result_list = sorted (os.listdir(path_experiment))
    assert result_list==ref_list or result_list==ref_list2
    par, other, em_args, info, em_attrs = joblib.load (path_experiment/'parameters.pk')
    print (em_args)
    #assert em_args ==  {'run_number': 0, 'log_message': None, 'stack_level': -3}

    em.remove_previous_experiments (parent=True)

In [46]:
tst.run (test_storing_em_args_and_parameters, tag='dummy')

In [47]:
%debug

ERROR:root:No traceback has been produced, nothing to debug.


### grid_search

In [48]:
#export tests.test_experiment_manager
def test_grid_search ():
    em = init_em ('grid_search')

    # *********************************
    # *********************************
    em.grid_search (parameters_multiple_values={'rate': [0.03,0.01], 'epochs': [5, 7]},
                    parameters_single_value={'offset':0.1},
                    other_parameters={'verbose':False})
    df = em.get_experiment_data ()
    assert (df['epochs']==[5.0, 5.0, 7.0, 7.0]).all()
    assert (df['rate'].values[[0,2]]==[0.03, 0.03]).all()
    assert (df.isna()['rate']==[False, True, False, True]).all()
    assert (df['offset']==0.1).all()
    assert (np.abs(df['0_validation_accuracy']-[0.25, 0.15, 0.31, 0.17])<1.0e-15).all()

    #assert (df.isna()['0_validation_accuracy'] == [True, False]).all()

    # *********************************
    # *********************************
    em.raise_error_if_run = True
    em.grid_search (parameters_multiple_values={'rate': [0.01,0.03], 'epochs': [7, 5]},
                    parameters_single_value={'offset':0.1})
    df = em.get_experiment_data ()
    assert (df['epochs']==[5.0, 5.0, 7.0, 7.0]).all()
    assert (df['rate'].values[[0,2]]==[0.03, 0.03]).all()
    assert (df.isna()['rate']==[False, True, False, True]).all()
    assert (df['offset']==0.1).all()
    assert (np.abs(df['0_validation_accuracy']-[0.25, 0.15, 0.31, 0.17])<1.0e-15).all()

    # *********************************
    # *********************************
    em.remove_previous_experiments (parent=True)
    em.raise_error_if_run = False
    em.grid_search (parameters_multiple_values={'rate': [0.01,0.03], 'epochs': [7, 5]},
                    parameters_single_value={'offset':0.1, 'noise':0.0001}, nruns=2)
    df = em.get_experiment_data ()
    assert (df['epochs']==[7.0, 7.0, 5.0, 5.0]).all()
    assert (df['rate'].values[[1,3]]==[0.03, 0.03]).all()
    assert (df.isna()['rate']==[True, False, True, False]).all()
    assert (df['offset']==0.1).all()
    assert (np.abs(df['0_validation_accuracy']-[0.17, 0.31, 0.15, 0.25])<0.1).all()
    assert (np.abs(df['1_validation_accuracy']-[0.17, 0.31, 0.15, 0.25])<0.1).all()
    assert (df['0_validation_accuracy']!=df['1_validation_accuracy']).all()

    # *********************************
    # *********************************
    em.remove_previous_experiments (parent=True)
    np.random.seed (42)
    em.grid_search (parameters_multiple_values={'rate': [0.01,0.03], 'epochs': [7, 5]},
                    parameters_single_value={'offset':0.1}, random_search=True,
                    other_parameters={'verbose':False})

    df = em.get_experiment_data ()
    assert (df['epochs']==[7., 5., 7., 5.]).all()
    assert (df['rate'].values[[0,1]]==[0.03, 0.03]).all()
    assert (df.isna()['rate']==[False, False, True, True]).all()
    assert (df['offset']==0.1).all()
    assert (np.abs(df['0_validation_accuracy']-[0.31, 0.25, 0.17, 0.15])<1e-15).all()

    em.remove_previous_experiments (parent=True)

In [49]:
tst.run (test_grid_search, tag='dummy')

In [50]:
#%debug

### run_multiple_repetitions

In [51]:
#export tests.test_experiment_manager
def test_run_multiple_repetitions ():
    em = init_em ('run_multiple_repetitions')
    np.random.seed (42)

    mu, std, dict_results = em.run_multiple_repetitions (
        parameters={'rate': 0.03, 'epochs': 5, 'offset': 0.1},
        other_parameters = {'verbose': False, 'noise': 0.001}, nruns=5
    )
    df = em.get_experiment_data ()
    assert df.shape==(1,24)
    x=[f'{i}_validation_accuracy' for i in range(5)]; assert df.columns.isin(x).sum()==5
    assert (0 < np.abs(mu-0.25) < 1e-3) and (0 < std < 1e-3)

    # *********************************
    # *********************************
    em.remove_previous_experiments (parent=True)
    mu, std, dict_results = em.run_multiple_repetitions (
        parameters={'rate': 0.03, 'epochs': 5, 'offset': 0.1},
        other_parameters = {'verbose': False}
    )
    df = em.get_experiment_data ()
    assert df.shape==(1,8)
    assert mu==0.25 and std==0

    em.remove_previous_experiments (parent=True)

In [52]:
tst.run (test_run_multiple_repetitions, tag='dummy')

### hp_optimization

In [53]:
#export tests.test_experiment_manager
def parameter_sampler (trial):
    rate = trial.suggest_uniform('rate', 0.001, 0.01)
    offset = trial.suggest_categorical('offset', [0.01, 0.05, 0.1])

    parameters = dict(rate=rate,
                      offset=offset)

    return parameters

def test_hp_optimization ():
    em = init_em ('hp_optimization')
    np.random.seed (42)

    parameters = {'epochs': 12}

    em.hp_optimization (parameter_sampler=parameter_sampler, parameters=parameters,
                        study_name='test_hp_optimization_study',
                        n_trials=5)

    df = em.get_experiment_data ()
    display (df)
    # TODO: error in pytest
    #assert df.shape == (5,8)
    #assert (df['offset']==[0.01,0.10,0.05,0.01,0.10]).all()
    #assert np.max(np.abs(df['rate']-[0.005939, 0.004813, 0.009673, 0.006112, 0.001182])) < 1e-5

    em.remove_previous_experiments (parent=True)

In [54]:
tst.run (test_hp_optimization, tag='dummy')

### rerun_experiment

In [55]:
#export tests.test_experiment_manager
def parameter_sampler (trial):
    epochs = trial.suggest_categorical('epochs', [2, 4])
    offset = trial.suggest_categorical('offset', [0.02, 0.06])

    parameters = dict(epochs=epochs, offset=offset)

    return parameters

def test_rerun_experiment ():
    em = init_em ('rerun_experiment')

    # first 3 experiments
    with pytest.raises (KeyboardInterrupt):
        _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 5},
                                          other_parameters={'halt': True})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.03, 'epochs': 2})
    _ = em.create_experiment_and_run (parameters={'rate': 0.04})
    df = em.get_experiment_data ()
    assert df.shape==(3,8)

    # ****************************************
    # case 1: re-running finished experiment
    # ****************************************
    em.raise_error_if_run = True
    em.rerun_experiment (experiments=[1])

    # ****************************************
    # case 2: re-running interrupted experiment
    # ****************************************
    em.raise_error_if_run = False
    em.rerun_experiment (experiments=[0], other_parameters={'halt':False, 'verbose':False})
    df = em.get_experiment_data ()
    assert df.loc[0,'0_validation_accuracy']==0.35

    # ****************************************
    # case 3: adding more runs to previous experiment
    # ****************************************
    em.rerun_experiment (experiments=[1], nruns=5, other_parameters={'noise': 0.001, 'verbose':False})
    df = em.get_experiment_data ()
    x=[f'{i}_validation_accuracy' for i in range(5)]; assert df.columns.isin(x).sum()==5
    assert df.shape==(3,24)

    # ****************************************
    # case 4: using previous experiment parameters as fixed, and using grid search with other
    # parameters
    # ****************************************
    em.rerun_experiment (experiments=[2],
                         parameters_multiple_values={'offset': [0.01,0.05], 'epochs': [3,5]},
                         other_parameters={'verbose':False},
                         nruns=2)
    df = em.get_experiment_data ()
    assert df.shape==(7,24)
    assert np.max(np.abs(df['0_validation_accuracy'].values- [0.35, 0.16, 0.9,  0.13, 0.17, 0.21, 0.25])) < 1e-10
    assert df.isna()['1_validation_accuracy'].sum()==2
    n1 = (~df.isna())['1_validation_accuracy'].sum()
    n0 = (~df.isna())['0_validation_accuracy'].sum()

    # ****************************************
    # case 5: using previous experiment parameters as fixed, and using BO with other
    # parameters
    # ****************************************
    em.rerun_experiment (experiments=[2],
                         parameter_sampler=parameter_sampler,
                         other_parameters={'verbose':False},
                         n_trials=4, sampler_method='skopt')
    df2 = em.get_experiment_data ()
    display (df2)
    print (df2.shape)
    assert df2.shape[0]>7
    n1 = (~df2.isna())['1_validation_accuracy'].sum()
    n0 = (~df2.isna())['0_validation_accuracy'].sum()
    #assert (n0+n1)==16

    em.remove_previous_experiments (parent=True)

In [56]:
tst.run (test_rerun_experiment, tag='dummy')

### rerun_experiment_pipeline

Allows to update some of the parameters on previous experiments and re-run them with those updated parameters, keeping the experiment number unchanged. Optionally, it can save the result to the csv file. 

In [57]:
#export tests.test_experiment_manager
def test_rerun_experiment_pipeline ():
    em = init_em ('rerun_experiment_pipeline')

    # first 3 experiments
    with pytest.raises (KeyboardInterrupt):
        _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 5},
                                          other_parameters={'halt': True})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.03, 'epochs': 2})
    _ = em.run_multiple_repetitions (parameters={'rate': 0.04}, nruns=5)
    df = em.get_experiment_data ()
    display (df)
    assert np.abs(df.loc[1,'0_validation_accuracy']-0.16)<1e-5 and (df.loc[1, '0_test_accuracy']-0.26)<1e-5
    #print (df.shape)
    assert df.shape==(3,24)

    # ****************************************
    # case 1: re-running finished experiment
    # ****************************************
    # the following produces an error since run_numbers must be indicated
    with pytest.raises (TypeError):
        em.rerun_experiment_pipeline (experiments=[1])

    em.raise_error_if_run = True
    with pytest.raises (RuntimeError):
        em.rerun_experiment_pipeline (experiments=[1], run_numbers=[0])
    em.raise_error_if_run = False
    # ****************************************
    # case 2: changing parameters of prev experiment number
    # ****************************************
    em.rerun_experiment_pipeline (experiments=[1], run_numbers=[0],
                                  new_parameters={'rate': 0.04}, save_results=True)
    df = em.get_experiment_data ()
    assert np.abs(df.loc[1,'0_validation_accuracy']-0.18)<1e-5 and np.abs(df.loc[1, '0_test_accuracy']-0.28)<1e-5

    # ****************************************
    # case 2: re-running interrupted experiment
    # ****************************************
    # the following produces an error since halt is True in loaded parameters
    with pytest.raises (KeyboardInterrupt):
        em.rerun_experiment_pipeline (experiments=[0], run_numbers=[0])

    # ****************************************
    # case 3: adding more runs to previous experiment
    # ****************************************
    # the following produces an error since run_numbers must be a subset of those already run
    with pytest.raises (FileNotFoundError):
        em.rerun_experiment_pipeline (experiments=[1], run_numbers=list(range(5)))

    em.rerun_experiment_pipeline (experiments=[2], run_numbers=list(range(5)))
    df2 = em.get_experiment_data ()
    pd.testing.assert_frame_equal(df,df2)

    em.remove_previous_experiments (parent=True)

In [58]:
tst.run (test_rerun_experiment_pipeline, tag='dummy')

### rerun_experiment_par

The only difference with `rerun_experiment_pipeline` is that now we need to introduce all the parameters to be used. Therefore, `rerun_experiment_par` is not about updating *some* of the parameters but about using entirely new parameters. There is no saving.

In [59]:
#export tests.test_experiment_manager
def test_rerun_experiment_par ():
    em = init_em ('rerun_experiment_par')

    # first 3 experiments
    with pytest.raises (KeyboardInterrupt):
        _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.05, 'epochs': 5},
                                          other_parameters={'halt': True})
    _ = em.create_experiment_and_run (parameters={'offset':0.1, 'rate': 0.03, 'epochs': 2})
    _ = em.run_multiple_repetitions (parameters={'rate': 0.04}, nruns=5)
    df = em.get_experiment_data ()
    display (df)
    assert np.abs(df.loc[1,'0_validation_accuracy']-0.16)<1e-5 and (df.loc[1, '0_test_accuracy']-0.26)<1e-5
    #print (df.shape)
    assert df.shape==(3,24)

    # ****************************************
    # case 1: re-running finished experiment
    # ****************************************
    # the following produces an error since run_numbers must be indicated
    with pytest.raises (TypeError):
        em.rerun_experiment_par (experiments=[1])

    em.raise_error_if_run = True
    with pytest.raises (RuntimeError):
        em.rerun_experiment_par (experiments=[1], run_numbers=[0])
    em.raise_error_if_run = False
    # ****************************************
    # case 2: changing parameters of prev experiment number
    # ****************************************
    em.rerun_experiment_par (experiments=[1], run_numbers=[0],
                                  parameters={'rate': 0.04})
    df2 = em.get_experiment_data ()

    pd.testing.assert_frame_equal(df,df2)

    em.remove_previous_experiments (parent=True)

In [60]:
tst.run (test_rerun_experiment_par, tag='dummy')

## get_git_revision_hash

In [61]:
#export
def get_git_revision_hash (path_experiments=None):
    path_experiments = Path(path_experiments).resolve() if path_experiments is not None else None
    try:
        git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
        git_hash = str(git_hash)
        if path_experiments is not None:
            json.dump(git_hash, open(path_experiments/'git_hash.json', 'wt'))
    except:
        logger = logging.getLogger("experiment_manager")
        if path_experiments is not None and os.path.exists(path_experiments):
            logger.info ('could not get git hash, retrieving it from disk...')
            git_hash = json.load(open(path_experiments/'git_hash.json', 'rt'))
        else:
            logger.info ('could not get git hash, using empty string...')
            git_hash = ''

    return str(git_hash)

### Usage example

In [62]:
#export tests.test_experiment_manager
def test_get_git_revision_hash ():
    global git_hash
    path_results = 'test_get_git_revision_hash'
    os.makedirs (path_results, exist_ok=True)

    # first option: git hash returned but not saved to disk
    git_hash = get_git_revision_hash ()
    assert git_hash != ''

    # second option: git hash saved to disk
    git_hash = get_git_revision_hash (path_results)
    assert os.listdir (path_results)==['git_hash.json']

    # third option: no git repo
    curdir = os.path.abspath('.')
    os.chdir ('..')
    git_hash = get_git_revision_hash (path_results)
    assert git_hash == ''

    os.chdir (curdir)
    remove_previous_results (path_results)

In [63]:
tst.run (test_get_git_revision_hash, tag='dummy')

## record_parameters

In [64]:
#export
def record_parameters (path_save, parameters, other_parameters=None, em_args=None, info=None,
                      em_attrs=None):
    if em_attrs is not None:
        em_attrs = get_scalar_fields (em_attrs)
    with open('%s/parameters.txt' %path_save, 'wt') as f:
        f.write('%s\n' %mypprint(parameters, dict_name='parameters'))
        if other_parameters is not None:
            f.write('\n\n%s\n' %mypprint(other_parameters, dict_name='other_parameters'))
        if em_args is not None:
            f.write('\n\n%s\n' %mypprint(other_parameters, dict_name='em_args'))
        if info is not None:
            f.write('\n\n%s\n' %mypprint(info, dict_name='info'))
        if em_attrs is not None:
            f.write('\n\n%s\n' %mypprint(em_attrs, dict_name='info'))

    to_pickle = [parameters]
    if other_parameters is not None:
        to_pickle.append (other_parameters)
    if em_args is not None:
        to_pickle.append(em_args)
    if info is not None:
        to_pickle.append(info)
    if em_attrs is not None:
        to_pickle.append(em_attrs)
    if len(to_pickle) == 1: to_pickle = to_pickle[0]
    joblib.dump (to_pickle,f'{path_save}/parameters.pk')

    try:
        json.dump(parameters, open(f'{path_save}/parameters.json', 'wt'))
    except:
        pass
    if other_parameters is not None:
        try:
            json.dump(other_parameters, open (f'{path_save}/other_parameters.json', 'wt'))
        except:
            pass
    if em_args is not None:
        try:
            json.dump(em_args, open (f'{path_save}/em_args.json', 'wt'))
        except:
            pass
    if info is not None:
        try:
            json.dump(info, open (f'{path_save}/info.json', 'wt'))
        except:
            pass
    if em_attrs is not None:
        try:
            json.dump(em_attrs, open (f'{path_save}/em_attrs.json', 'wt'))
        except:
            pass

## mypprint

In [65]:
#export
def mypprint(parameters, dict_name=None):
    if dict_name is not None:
        text = '%s=dict(' %dict_name
        tpad = ' ' * len(text)
    else:
        text = '\t'
        tpad = '\t'
    for idx, (key, value) in enumerate(sorted(parameters.items(), key=lambda x: x[0])):
        if type(value) is str:
            value = '%s%s%s' %("'",value,"'")
        text += '{}={}'.format(key, value)
        if idx < (len(parameters)-1):
            text += ',\n{}'.format(tpad)

    if dict_name is not None:
        text += ')\n'
    else:
        text += '\n'

    return text

## load_or_create_experiment_values

In [70]:
#export
def load_or_create_experiment_values (path_csv, parameters, precision=1e-15, logger=None):

    if logger is None: logger = logging.getLogger("experiment_manager")
    path_pickle = path_csv.replace('csv', 'pk')
    experiment_numbers = []
    changed_dataframe = False

    if os.path.exists (path_pickle) or os.path.exists (path_csv):
        read_csv_flag = False
        if os.path.exists (path_pickle):
            # work-around for solving the issue with pandas versions
            # Pandas >= 1.1.0 presents problems when reading pickle files
            # from earlier versions
            try:
                experiment_data = pd.read_pickle (path_pickle)
                experiment_data = experiment_data.copy()
            except AttributeError:
                read_csv_flag = True
        else:
            read_csv_flag = True
        if read_csv_flag:
            experiment_data = pd.read_csv (path_csv, index_col=0)
            experiment_data.to_pickle(path_pickle)

        experiment_data, removed_defaults = remove_defaults_from_experiment_data (experiment_data)

        # Finds rows that match parameters. If the dataframe doesn't have any parameter with that name, 
        # a new column is created and changed_dataframe is set to True
        experiment_numbers, changed_dataframe, _ = experiment_utils.find_rows_with_parameters_dict (
            experiment_data, parameters, precision = precision
        )

        changed_dataframe = changed_dataframe or removed_defaults

        if len(experiment_numbers) > 1:
            logger.info ('more than one matching experiment: ', experiment_numbers)
    else:
        experiment_data = pd.DataFrame()

    if len(experiment_numbers) == 0:
        experiment_data = experiment_data.append (parameters, ignore_index=True)
        experiment_data.columns = pd.MultiIndex.from_product (
            [[dflt.parameters_col], list(parameters.keys()), ['']])
        experiment_data = experiment_data[experiment_data.columns.sort_values()]
        changed_dataframe = True
        experiment_number = experiment_data.shape[0]-1
    else:
        experiment_number = experiment_numbers[0]

    if changed_dataframe:
        experiment_data.to_csv(path_csv)
        experiment_data.to_pickle(path_pickle)

    return experiment_number, experiment_data

### Usage example

In [68]:
#export tests.test_experiment_manager
def test_load_or_create_experiment_values ():
    path_csv_folder = 'test_load_or_create_experiment_values'
    os.makedirs (path_csv_folder, exist_ok=True)
    load_or_create_experiment_values (path_csv, parameters, precision=1e-15, logger=None)

In [None]:
tst.run (test_load_or_create_experiment_values, tag='dummy')

In [72]:
path_csv_folder = 'test_load_or_create_experiment_values'
os.makedirs (path_csv_folder, exist_ok=True)
parameters = dict (a='yes', b=1.2, c=True)
experiment_number, experiment_data = load_or_create_experiment_values (
    f'{path_csv_folder}/experiments_data.csv', parameters)
experiment_data

Unnamed: 0_level_0,parameters,parameters,parameters
Unnamed: 0_level_1,a,b,c
,,,
0.0,yes,1.2,1.0


In [73]:
parameters = dict (a='no', b=1.2, c=True)
experiment_number, experiment_data = load_or_create_experiment_values (
    f'{path_csv_folder}/experiments_data.csv', parameters)
experiment_data

AttributeError: 'tuple' object has no attribute 'find'

In [None]:
remove_previous_results (path_csv_folder)

## store_parameters

In [None]:
#export
def store_parameters (path_experiments, experiment_number, parameters):
    """ Keeps track of dictionary to map experiment number and parameters values for the different experiments."""
    path_experiments = Path(path_experiments).resolve() if path_experiments is not None else None
    path_hp_dictionary = path_experiments/'parameters.pk'
    if os.path.exists(path_hp_dictionary):
        all_parameters = pickle.load (open(path_hp_dictionary,'rb'))
    else:
        all_parameters = {}
    if experiment_number not in all_parameters.keys():
        str_par = '\n\nExperiment %d => parameters: \n%s\n' %(experiment_number,mypprint(parameters))
        f = open(path_experiments/'parameters.txt', 'at')
        f.write(str_par)
        f.close()
        all_parameters[experiment_number] = parameters
        pickle.dump (all_parameters, open(path_hp_dictionary,'wb'))

    # pickle number of current experiment, for visualization
    pickle.dump (experiment_number, open(path_experiments/'current_experiment_number.pkl','wb'))

## isnull

In [None]:
#export
def isnull (experiment_data, experiment_number, name_column):
    return (name_column not in experiment_data.columns) or (experiment_data.loc[experiment_number, name_column] is None) or np.isnan(experiment_data.loc[experiment_number, name_column])

## get_experiment_number

In [None]:
#export
def get_experiment_number (path_experiments, parameters = {}):

    path_csv = path_experiments/'experiments_data.csv'
    path_pickle = path_csv.replace('csv', 'pk')
    experiment_number, _ = load_or_create_experiment_values (path_csv, parameters)

    return experiment_number

## get_experiment_numbers

In [None]:
#export
def get_experiment_numbers (path_results_base, parameters_single_value, parameters_multiple_values_all):

    experiment_numbers = []

    parameters_multiple_values_all = list(ParameterGrid(parameters_multiple_values_all))

    for (i_hp, parameters_multiple_values) in enumerate(parameters_multiple_values_all):
        parameters = parameters_multiple_values.copy()
        parameters.update(parameters_single_value)
        parameters = remove_defaults (parameters)

        experiment_number = get_experiment_number (path_results_base, parameters=parameters)
        experiment_numbers.append(experiment_number)

    return experiment_numbers

## insert_experiment_script_path

In [None]:
#export
def insert_experiment_script_path (info, logger, stack_level=-3):
    if info.get('script_path') is None:
        stack_level = info.get('stack_level', stack_level)
        stack = traceback.extract_stack()[stack_level]
        info['script_path'] = stack.filename
        info['lineno'] = stack.lineno
        logger.info ('experiment script: {}, line: {}'.format(stack.filename, stack.lineno))
        if 'stack_level' in info:
            del info['stack_level']

## load_parameters

In [None]:
#export
def load_parameters (experiment=None,
                     other_parameters={}, em_args={}, parameters = {},
                     check_experiment_matches=True, em=None):

    if em is None:
        from hpsearch.config.hpconfig import get_experiment_manager
        em = get_experiment_manager ()

    path_experiments = em.path_experiments

    path_experiment = em.get_path_experiment (experiment)

    if os.path.exists('%s/parameters.pk' %path_experiment):
        parameters2, other_parameters2, em_args2, *_ = joblib.load (f'{path_experiment}/parameters.pk')

        other_parameters2.update(other_parameters)
        other_parameters = other_parameters2
        em_args2.update(em_args)
        em_args = em_args2

        # if we don't add or modify parameters, we require that the old experiment number matches the new one
        if (len(parameters) == 0) and check_experiment_matches:
            em.logger.info (f'requiring experiment number to be {experiment}')
            em_args['experiment_number'] = experiment
        elif 'experiment_number' in em_args:
            del em_args['experiment_number']

        parameters2.update(parameters)
        parameters = parameters2
    else:
        raise FileNotFoundError (f'file {path_experiment/"parameters.pk"} not found')

    return parameters, other_parameters, em_args

## save_other_parameters

In [None]:
#export
def get_scalar_fields (other_parameters):
    parameters_to_save = {}
    for k in other_parameters.keys():
        if type(other_parameters[k]) is str:
            parameters_to_save[k] = other_parameters[k]
        elif np.isscalar(other_parameters[k]) and np.isreal(other_parameters[k]):
            parameters_to_save[k] = other_parameters[k]
    return parameters_to_save

def save_other_parameters (experiment_number, other_parameters, path_experiments):

    parameters_to_save = get_scalar_fields (other_parameters)

    path_csv = f'{str(path_experiments)}/other_parameters.csv'
    df = pd.DataFrame (index = [experiment_number], data=parameters_to_save)

    if os.path.exists (path_csv):
        df_all = pd.read_csv (path_csv, index_col=0)
        df_all = pd.concat([df_all, df], sort=True)
        df_all = df_all.loc[~df_all.index.duplicated(keep='last')]
    else:
        df_all = df
    df_all.to_csv (path_csv)