In [2]:
#hide
#default_exp examples.dummy_experiment_manager
from nbdev.showdoc import *
from block_types.utils.nbdev_utils import nbdev_setup, TestRunner

nbdev_setup ()
tst = TestRunner (targets=['dummy'])

# Dummy Experiment Manager

> Dummy experiment manager

In [3]:
#export
import numpy as np
import pickle

In [4]:
#for tests
import pytest
import numpy as np
import pandas as pd

## FakeModel

In [5]:
#export
class FakeModel (object):
    
    overfitting_epochs = 20
    
    def __init__ (self, offset=0.5, rate=0.01, epochs=10, noise=0.0, verbose=True):
        # hyper-parameters
        self.offset = offset
        self.rate = rate
        self.epochs = epochs
        
        # fake internal weight
        self.weight = 0
        
        # fake accuracy
        self.accuracy = 0
        
        # noise
        self.noise = noise
        
        # other parameters
        self.verbose = verbose
        
        self.history = {}
        self.current_epoch = 0
    
    def fit (self):
        number_epochs = int(self.epochs)
        if self.verbose:
            print (f'fitting model with {number_epochs} epochs')
        
        if self.current_epoch==0:
            self.accuracy = self.offset
        
        for epoch in range(number_epochs):
            self.weight += self.rate
            if self.current_epoch < self.overfitting_epochs:
                self.accuracy += self.rate
            else:
                self.accuracy -= self.rate
            if self.verbose:
                print (f'epoch {epoch}: accuracy: {self.accuracy}')
            
            # we keep track of the evolution of different metrics to later be able to visualize it
            self.store_intermediate_metrics ()
            
            # increase current epoch by 1
            self.current_epoch += 1
    
    def store_intermediate_metrics (self):
        validation_accuracy, test_accuracy = self.score()
        if 'validation_accuracy' not in self.history:
            self.history['validation_accuracy'] = []
        self.history['validation_accuracy'].append(validation_accuracy)
        
        if 'test_accuracy' not in self.history:
            self.history['test_accuracy'] = []
        self.history['test_accuracy'].append(test_accuracy)
        
        if 'accuracy' not in self.history:
            self.history['accuracy'] = []
        self.history['accuracy'].append(self.accuracy)
        
    def save_model_and_history (self, path_results):
        pickle.dump (self.weight, open(f'{path_results}/model_weights.pk','wb'))
        pickle.dump (self.history, open(f'{path_results}/model_history.pk','wb'))
        
    def load_model_and_history (self, path_results):
        if os.path.exists(f'{path_results}/model_weights.pk'):
            print (f'reading model from {path_results}/model_weights.pk')
            self.weight = pickle.load (open(f'{path_results}/model_weights.pk','rb'))
            self.history = pickle.load (open(f'{path_results}/model_history.pk','rb'))
            self.current_epoch = len(self.history['accuracy'])
            if self.current_epoch > 0:
                self.accuracy = self.history['accuracy'][-1]
            else:
                self.accuracy = self.offset
        else:
            print (f'model not found in {path_results}')
        
    def score (self):
        # validation accuracy
        validation_accuracy = self.accuracy + np.random.randn() * self.noise
        
        # test accuracy
        if self.current_epoch < 10:
            test_accuracy = self.accuracy + 0.1
        else:
            test_accuracy = self.accuracy - 0.1
        test_accuracy = test_accuracy + np.random.randn() * self.noise
        
        # make accuracy be in interval [0,1] 
        validation_accuracy = max(min(validation_accuracy, 1.0), 0.0)
        test_accuracy = max(min(test_accuracy, 1.0), 0.0)
        
        return validation_accuracy, test_accuracy
    
    # fake load_data which does nothing
    def load_data (self):
        pass
    

## DummyExperimentManager

In [6]:
#export
from hpsearch.experiment_manager import ExperimentManager
import hpsearch
import os
from hpsearch.visualization import plot_utils 

class DummyExperimentManager (ExperimentManager):

    def __init__ (self, 
                  path_experiments=None, 
                  root='',
                  metric='validation_accuracy',
                  op='max',
                  **kwargs):
        
        if path_experiments is None: path_experiments = f'{os.path.dirname(hpsearch.__file__)}/../results'
        
        super().__init__ (path_experiments=path_experiments, 
                          root=root,
                          metric=metric,
                          op=op,
                          **kwargs)

    def run_experiment (self, parameters={}, path_results='./results'):
        # extract hyper-parameters used by our model. All the parameters have default values if they are not passed.
        offset = parameters.get('offset', 0.5)   # default value: 0.5
        rate = parameters.get('rate', 0.01)   # default value: 0.01
        epochs = parameters.get('epochs', 10) # default value: 10
        noise = parameters.get('noise', 0.0)
        
        # other parameters that do not form part of our experiment definition
        # changing the values of these other parameters, does not make the ID of the experiment change
        verbose = parameters.get('verbose', True)
        
        # build model with given hyper-parameters
        model = FakeModel (offset=offset, rate=rate, epochs=epochs, noise = noise, verbose=verbose)
        
        # load training, validation and test data (fake step)
        model.load_data()
        
        # fit model with training data 
        model.fit ()
        
        # save model weights and evolution of accuracy metric across epochs
        model.save_model_and_history(path_results)
        
        # evaluate model with validation and test data
        validation_accuracy, test_accuracy = model.score()
        
        # store model
        self.model = model
        
        # the function returns a dictionary with keys corresponding to the names of each metric. 
        # We return result on validation and test set in this example
        dict_results = dict (validation_accuracy = validation_accuracy,
                             test_accuracy = test_accuracy)
        
        return dict_results
    
    # implementing the following method is not necessary but recommended
    def get_default_parameters (self, parameters):
        """Indicate the default value for each of the hyper-parameters used."""
        defaults = dict(offset=0.5,
                        rate=0.01,
                        epochs=10)
        
        if parameters.get('rate', defaults['rate']) < 0.001:
            defaults.update (epochs=100)
        
        return defaults
    
    def experiment_visualization (self, experiments=None, run_number=0, root_path=None, root_folder=None, 
                                  name_file='model_history.pk', metric='test_accuracy', backend='matplotlib', 
                                  **kwargs):
        if root_path is None:
            root_path = self.get_path_experiments(folder=root_folder)
        traces = []
        for experiment_id in experiments:
            path_results = self.get_path_results (experiment_id, run_number=run_number, root_path=root_path)
            if os.path.exists('%s/%s' %(path_results, name_file)):
                history = pickle.load(open('%s/%s' %(path_results, name_file),'rb'))
                label = '{}'.format(experiment_id)
                traces = plot_utils.add_trace ((1-np.array(history[metric]))*20, style='A.-', label=label, 
                                               backend=backend, traces=traces)
        plot_utils.plot(title=metric, xlabel='epoch', ylabel=metric, traces=traces, backend=backend)

## Helper functions for testing purposes

In [7]:
# export
def run_multiple_experiments (nruns=1, noise=0.0, verbose=True, rate=0.03, values_to_explore=None,
                              EM=DummyExperimentManager, em=None, **kwargs):
    if em is None:
        em = EM (**kwargs)
    parameters_single_value = dict(rate=rate, noise=noise)   # parameters where we use a fixed value
    if values_to_explore is None:
        parameters_multiple_values=dict(offset=[0.1, 0.3, 0.6], epochs=[5, 15, 30]) # parameters where we try multiple values
    else:
        parameters_multiple_values=values_to_explore
    other_parameters = dict(verbose=verbose) # parameters that control other aspects that are not part of our experiment definition (a new experiment is not created if we assign different values for these parametsers)
    em.grid_search (log_message='fixed rate, multiple epochs values',
            parameters_single_value=parameters_single_value,
            parameters_multiple_values=parameters_multiple_values,
            other_parameters=other_parameters,
            nruns=nruns)

In [8]:
#export
def generate_data (name_folder):
    em = DummyExperimentManager (path_experiments=f'test_{name_folder}', verbose=0)
    em.remove_previous_experiments ()
    run_multiple_experiments (em=em, nruns=5, noise=0.1, verbose=False)
    return em

In [9]:
# export 
import shutil
import os

def remove_previous_experiments (EM=DummyExperimentManager):
    em = EM ()
    em.remove_previous_experiments ()

### Usage

In [12]:
#exports tests.examples.test_dummy_experiment_manager
def test_dummy_experiment_manager ():
    em = generate_data ('dummy_experiment_manager')

    path_results = em.get_path_experiments()
    df = pd.read_csv (f'{path_results}/experiments_data.csv', index_col=0)
    display (df)

    # check that stored parameters are correct
    assert (df.epochs.values == np.array([ 5.,  5.,  5., 15., 15., 15., 30., 30., 30.])).all()
    assert (df.offset.values == np.array([0.1, 0.3, 0.6, 0.1, 0.3, 0.6, 0.1, 0.3, 0.6])).all()
    assert (df.rate.values == 0.03).all()

    # check that the accuracy values are correct
    epochs_before_overfitting = 20
    epochs_test = 10
    for experiment_id in df.index:
        if df.loc[experiment_id, 'epochs'] < epochs_before_overfitting:
            accuracy = df.loc[experiment_id, 'offset'] + df.loc[experiment_id, 'rate'] * df.loc[experiment_id, 'epochs']
        else:
            epochs_after_overfitting = df.loc[experiment_id, 'epochs']-epochs_before_overfitting
            accuracy = df.loc[experiment_id, 'offset'] + df.loc[experiment_id, 'rate'] * (epochs_before_overfitting  - epochs_after_overfitting)
        if df.loc[experiment_id, 'epochs'] < epochs_test:
            test_accuracy = accuracy + 0.1
        else:
            test_accuracy = accuracy - 0.1
        validation_accuracy = max(min(accuracy, 1.0), 0.0)
        test_accuracy = max(min(test_accuracy, 1.0), 0.0)

        assert np.abs(df.loc[experiment_id, '0_validation_accuracy'] - validation_accuracy) <1.e-10, f"experiment {experiment_id}: {df.loc[experiment_id, '0_validation_accuracy']} == {validation_accuracy}" 
        assert np.abs(df.loc[experiment_id, '0_test_accuracy'] - test_accuracy) <1.e-10

        md ('check that model history is written correcly')
        path_experiment = em.get_path_results (3, 0)
    model = FakeModel()
    model.load_model_and_history(path_experiment)
    assert np.max(np.abs(model.history['accuracy']-np.arange(0.13, 0.55, 0.03))) < 1e-10

    em.experiment_visualization ([3,4,5], backend='matplotlib')

    em.remove_previous_experiments()

In [13]:
tst.run (test_dummy_experiment_manager, tag='dummy')

write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class


running test_dummy_experiment_manager


write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed with exception <class '__main__.DummyExperimentManager'> is a built-in class
write_manager failed

Unnamed: 0,epochs,noise,offset,rate,0_validation_accuracy,0_test_accuracy,time_0,date,0_finished,1_validation_accuracy,...,time_2,2_finished,3_validation_accuracy,3_test_accuracy,time_3,3_finished,4_validation_accuracy,4_test_accuracy,time_4,4_finished
0,5.0,0.1,0.1,0.03,0.258346,0.479362,0.000251,23:34:24.198639,True,0.100439,...,0.000227,True,0.119856,0.378851,0.000248,True,0.049509,0.492394,0.000237,True
1,5.0,0.1,0.3,0.03,0.516995,0.66271,0.000229,23:34:24.360379,True,0.545112,...,0.000233,True,0.629369,0.577808,0.000254,True,0.638762,0.629643,0.000224,True
2,5.0,0.1,0.6,0.03,0.877515,0.7962,0.000246,23:34:24.528384,True,0.802632,...,0.000226,True,0.552479,0.79707,0.000233,True,0.495809,0.950973,0.000237,True
3,15.0,0.1,0.1,0.03,0.629959,0.432134,0.000278,23:34:24.703160,True,0.581996,...,0.000274,True,0.481629,0.52948,0.000282,True,0.384646,0.539312,0.000279,True
4,15.0,0.1,0.3,0.03,0.719918,0.515527,0.000279,23:34:24.890510,True,0.773483,...,0.000299,True,0.611251,0.514665,0.000285,True,0.61274,0.621241,0.000265,True
5,15.0,0.1,0.6,0.03,0.946606,0.787672,0.000295,23:34:25.084281,True,0.96757,...,0.000268,True,1.0,0.900374,0.000269,True,1.0,0.845161,0.000283,True
6,30.0,0.1,0.1,0.03,0.306008,0.290677,0.000313,23:34:25.288224,True,0.306261,...,0.000323,True,0.579755,0.323987,0.000327,True,0.407276,0.261914,0.000323,True
7,30.0,0.1,0.3,0.03,0.697518,0.497054,0.000312,23:34:25.504385,True,0.557703,...,0.00032,True,0.635177,0.498495,0.000351,True,0.524572,0.584592,0.0003,True
8,30.0,0.1,0.6,0.03,0.855438,0.74779,0.000336,23:34:25.733829,True,0.761694,...,0.000337,True,0.929841,0.810269,0.000323,True,0.900978,0.891672,0.000308,True


AssertionError: experiment 0: 0.2583461342632857 == 0.25