In [1]:
#hide
#default_exp config.manager_factory
from nbdev.showdoc import *
from dsblocks.utils.nbdev_utils import nbdev_setup, TestRunner

nbdev_setup ()
tst = TestRunner (targets=['dummy'])

# Manager factory

> Register a subclassed ExperimentManager to be used by other modules. 

In [2]:
#export
import inspect
import shutil
import os
import logging
import joblib
import pickle
import dill
from pathlib import Path
import glob
import cloudpickle
import importlib.util
import pandas as pd
import numpy as np

from dsblocks.utils.utils import set_logger

import hpsearch
import hpsearch.config.hp_defaults as dflt

experiment_manager = None

In [3]:
#for tests
import pytest
from hpsearch.examples.example_experiment_manager import ExampleExperimentManager

## get_pickable_fields

In [4]:
#export
def get_pickable_fields (obj):
    dict_fields = vars(obj)
    # dill seems to have issues with DataFrame and possibly np.array
    dict_fields = {k:dict_fields[k] for k in dict_fields 
                   if not isinstance(dict_fields[k], pd.DataFrame) and not isinstance(dict_fields[k], np.ndarray)}
    try:
        result = {k:dict_fields[k] for k in dict_fields if dill.pickles (dict_fields[k])}
    except:
        result = dict_fields
    return result

### Usage example

In [5]:
#exports tests.config.test_manager_factory
def test_get_pickable_fields ():
    em = ExampleExperimentManager ()
    d = get_pickable_fields (em)
    os.makedirs ('test_get_pickable', exist_ok=True)
    pickle.dump (d, open('test_get_pickable/test.pk','wb'))
    del em
    d2 = pickle.load (open('test_get_pickable/test.pk','rb'))

    n=0
    for k in sorted(d):
        n += (d[k]==d2[k])
    #print (f'{n}')
    assert n==25, f'{n}'
    shutil.rmtree ('test_get_pickable')
    
def test_get_pickable_fields_no_df_or_array ():
    em = ExampleExperimentManager ()
    em.df = pd.DataFrame ({'a':[1,2,3],'b':[4,5,6]})
    em.vector = np.array ([1,2,3])
    d = get_pickable_fields (em)
    assert 'df' not in d and 'vector' not in d

In [6]:
tst.run (test_get_pickable_fields, tag='dummy')
tst.run (test_get_pickable_fields_no_df_or_array, tag='dummy')

running test_get_pickable_fields
running test_get_pickable_fields_no_df_or_array


## ManagerFactory

In [7]:
# export
class ManagerFactory (object):
    def __init__ (self, allow_base_class=True, manager_path=dflt.manager_path, 
                  import_manager=False, verbose=dflt.verbose, logger=None, 
                  name_logger_factory=dflt.name_logger_factory):
        
        self.allow_base_class = allow_base_class
        self.manager_path = Path(manager_path).resolve()
        self.import_manager = import_manager
        
        self.verbose = verbose
        self.logger = logger
        self.name_logger_factory = name_logger_factory
        if self.logger is None:
            self.logger = set_logger (self.name_logger_factory, path_results=self.manager_path, 
                                      verbose=self.verbose)
        
    # **************************************************
    # get manager, load it / import it
    # **************************************************
    def get_experiment_manager (self):
        if experiment_manager is not None:
            em = experiment_manager
            self.logger.debug ('returning registered experiment manager')
        else:
            self.logger.debug ('experiment manager not registered yet, importing experiment manager')
            try:
                self.import_or_load_manager()
            except FileNotFoundError:
                self.logger.debug ('No experiment manager to import was found, setting base manager.')
                self.set_base_manager ()
            em = self.get_experiment_manager ()
            
        self.logger.debug (f'returning experiment manager {em}')
        return em
                        
    def import_or_load_manager (self):
        if self.import_manager:
            em = self.import_written_manager ()
        else:
            em = self.load_manager ()
        global experiment_manager
        experiment_manager = em
    
    def import_written_manager (self):
        info_path =self.manager_path / 'info'
        self.info = joblib.load (info_path / 'last.pk')
        
        spec = importlib.util.spec_from_file_location(self.info['import_module_string'], 
                                                      self.info['source_path'])
        manager_module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(manager_module)
        Manager = getattr (manager_module, self.info['class_name'])
        em = Manager ()
        
        self.load_pickle_and_set_em_fields (em)
        return em
        
    def load_manager (self):
        whole_object_path = self.manager_path / 'whole'
        self.logger.debug (f'loading manager from {whole_object_path}')
        em = cloudpickle.load (open(whole_object_path / 'last.pk', 'rb'))
        return em
    
    def load_pickle_and_set_em_fields (self, em, manager_path=None):
        manager_path = manager_path if manager_path is not None else self.manager_path
        fields_path = manager_path / 'fields'
        dict_fields = joblib.load (fields_path / 'last.pk')
        self.logger.debug (f'loading pickled em fields from {fields_path}')
        for k in dict_fields:
            setattr (em, k, dict_fields[k])

    # ***********************************************************
    # register manager, persist manager
    # ***********************************************************
    def register_manager (self, experiment_manager_to_register):
        global experiment_manager
        experiment_manager = experiment_manager_to_register
                
    def write_manager (self, em):
        name_subclass = em.__class__.__name__
        registered_name = em.registered_name
        import_module_string = em.__class__.__module__
        try:
            source_path = inspect.getfile(em.__class__)
        except TypeError:
            source_path = ''
        self.info = {'source_path': source_path,
                     'import_module_string': import_module_string,
                     'class_name': name_subclass}
        # store em fields in pickle and cloud-pickle files
        self.pickle_object (em=em)
            
    def pickle_object (self, em=None, manager_path=None, store_info=True):
        manager_path = manager_path if manager_path is not None else self.manager_path
        manager_path = Path(manager_path).resolve ()
        
        whole_object_path = manager_path / 'whole'
        fields_path = manager_path / 'fields'
        info_path = manager_path / 'info'
        whole_object_path.mkdir (parents=True, exist_ok=True)
        fields_path.mkdir (parents=True, exist_ok=True)
        info_path.mkdir (parents=True, exist_ok=True)
        
        em = em if em is not None else self.get_experiment_manager ()
        
        # fields pickle file
        dict_fields = self.em_pickable_fields (em=em)
        joblib.dump (dict_fields, fields_path / f'{em.registered_name}.pk')
        joblib.dump (dict_fields, fields_path / 'last.pk')
        
        # store pickable and non-pickable fields
        fields = {k: getattr (em, k) for k in em.avoid_saving_fields}
        for k in em.avoid_saving_fields: setattr (em, k, None)
        cloudpickle.dump (em, open(whole_object_path / f'{em.registered_name}.pk', 'wb'))
        cloudpickle.dump (em, open(whole_object_path / 'last.pk', 'wb'))
        for k in em.avoid_saving_fields: setattr (em, k, fields[k])
        
        # info file
        if store_info:
            joblib.dump (self.info, info_path / f'{em.registered_name}.pk')
            joblib.dump (self.info, info_path / 'last.pk')

    def em_pickable_fields (self, em=None):
        em = self.get_experiment_manager () if em is None else em
        pickable_fields = get_pickable_fields (em)
        pickable_fields = {k:pickable_fields[k] for k in pickable_fields 
                           if k not in em.non_pickable_fields}
        return pickable_fields
            
    # **********************************************************
    # change manager
    # **********************************************************
    def change_manager (self, name_manager):
        self.previous_manager = self.get_experiment_manager ()
        self.overwrite_last_manager (name_manager)
        
        self.reset_manager()
        self.import_or_load_manager()
    
    def overwrite_last_manager (self, name_manager):
        whole_object_path = self.manager_path / 'whole'
        fields_path = self.manager_path / 'fields'
        info_path = self.manager_path / 'info'
        
        shutil.copy (whole_object_path / f'{name_manager}.pk', whole_object_path / 'last.pk')
        shutil.copy (fields_path / f'{name_manager}.pk', fields_path / 'last.pk')
        shutil.copy (info_path / f'{name_manager}.pk', info_path / 'last.pk')
                
    def switch_back (self):
        self.register_manager (self.previous_manager)
        self.write_manager (self.previous_manager)

    # **********************************************************
    #  list stored managers and print current one
    # **********************************************************
    def list_subclasses (self):
        self.list_pickled_managers ()
        self.print_current_manager ()
        
    def list_pickled_managers (self):
        managers = glob.glob (f'{self.manager_path}/fields/*.pk')
        managers = [Path(x).name.split('.pk')[0] for x in managers]
        managers = [x for x in managers if x != 'last']
        print (f'managers: {managers}')
        
    def print_current_manager (self):
        em = self.get_experiment_manager ()
        print (f'experiment manager registered: {em.__class__.__name__}')
        print (f'registered name: {em.registered_name}')
        
    # **********************************************************
    #  reset and delete managers
    # **********************************************************
    def reset_manager (self):
        self.register_manager (None)
        
    def set_base_manager (self):
        from hpsearch.experiment_manager import ExperimentManager
        em = ExperimentManager()
        self.register_manager (em)
    
    def delete_and_reset_all (self):    
        if self.manager_path.exists ():
            self.logger.debug (f'deleting {self.manager_path}')
            shutil.rmtree (str(self.manager_path))
            
        self.set_base_manager ()

### get_experiment_manager

The method `get_experiment_manager` returns the base class ExperimentManager initially, unless a subclass definition has been stored by calling the `write_manager` method

In [8]:
#exports tests.config.test_manager_factory
def test_get_experiment_manager ():
    factory = ManagerFactory ()
    factory.delete_and_reset_all()
    em = factory.get_experiment_manager()
    assert em.__class__.__name__ == 'ExperimentManager'

In [9]:
tst.run (test_get_experiment_manager, tag='dummy')

running test_get_experiment_manager


### write_manager

We can write the definition of a desired subclass of ExperimentManager with the method `write_manager`. After writing this definition, the object manager_factory will return an instance of the same subclass for any module that imports ManagerFactory. Let us check this with one example. We will use the subclass `ExampleExperimentManager` as an example, and check that the module file exists after calling the method `write_manager`.

After writting the subclass, the method `get_experiment_manager` returns an instance of this subclass. This allows command-line scripts to use functions of the subclass indicated by other modules. 

In [10]:
#exports tests.config.test_manager_factory
def _write_manager (import_manager):
    factory = ManagerFactory (import_manager=import_manager)
    factory.delete_and_reset_all()
    # we check that the file does not exist before writting it
    assert not factory.manager_path.exists()
    em = factory.get_experiment_manager()
    assert em.__class__.__name__ == 'ExperimentManager'

    # we write new module and check that the file exists afterwards
    em = ExampleExperimentManager()
    factory.register_manager (em)
    factory.write_manager (em)
    assert os.path.exists(factory.manager_path)

    # TODO: error in pytest
    em2 = factory.get_experiment_manager()
    assert em.__class__.__name__ == em2.__class__.__name__
    assert em.get_default_parameters({}) == em2.get_default_parameters({})
    
    # After writting the subclass, the method `get_experiment_manager` 
    # returns an instance of this subclass. This allows command-line 
    # scripts to use functions of the subclass indicated by other modules. 
    em.register_and_store_subclassed_manager()
    em2 = factory.get_experiment_manager()
    assert em2.__class__.__name__ == em.__class__.__name__

def test_write_manager ():
    _write_manager (import_manager=True)
    _write_manager (import_manager=False)

In [11]:
tst.run (test_write_manager, tag='dummy')

running test_write_manager


### pickle_object

We can also write fields stored in the current experiment manager. When the command-line tool loads the manager, it loads these fields and stores them in the manager.

In [19]:
#exports tests.config.test_manager_factory
def test_pickle_object ():
    from hpsearch.examples.complex_dummy_experiment_manager import ComplexDummyExperimentManager
    em = ComplexDummyExperimentManager (path_experiments='my_new_path/other_folder')
    em.my_new_field = [2, 1, 3]
    em.greeting_message = 'good morning!'

    #from hpsearch.config.manager_factory import ManagerFactory
    factory = ManagerFactory (verbose=2)
    factory.delete_and_reset_all()
    factory.register_manager (em)
    factory.write_manager (em)
    assert sorted(os.listdir (factory.manager_path))==['fields', 'info', 'whole']
    assert sorted(os.listdir (factory.manager_path / 'fields'))==[
        'ComplexDummyExperimentManager-other_folder.pk', 'last.pk']
    assert sorted(os.listdir (factory.manager_path / 'info'))==[
        'ComplexDummyExperimentManager-other_folder.pk', 'last.pk']
    assert sorted(os.listdir (factory.manager_path / 'whole'))==[
        'ComplexDummyExperimentManager-other_folder.pk', 'last.pk']

    del em
    em = factory.get_experiment_manager ()
    assert em.my_new_field == [2, 1, 3]
    assert em.greeting_message == 'good morning!'

    global experiment_manager
    experiment_manager=None
    #del experiment_manager
    em = factory.get_experiment_manager()

    #assert em.path_experiments=='my_new_path/other_folder'
    assert em.folder=='other_folder'
    assert em.my_new_field == [2, 1, 3]
    assert em.greeting_message == 'good morning!'
    
#exports tests.config.test_manager_factory
def test_does_not_pickle_unpickable ():
    global em
    from hpsearch.examples.complex_dummy_experiment_manager import DummyManagerAvoidSaving    
    
    em = DummyManagerAvoidSaving (path_experiments='my_new_path/other_folder')
    
    factory = ManagerFactory (verbose=2)
    factory.delete_and_reset_all()
    factory.register_manager (em)
    factory.write_manager (em)
    assert sorted(os.listdir (factory.manager_path / 'fields'))==[
        'DummyManagerAvoidSaving-other_folder.pk', 'last.pk']
    assert em.my_new_field == [2, 1, 3]
    assert em.greeting_message == 'good morning!'
    
    del em
    em = factory.get_experiment_manager ()
    assert em.my_new_field == [2, 1, 3]
    assert em.greeting_message == 'good morning!'

    global experiment_manager
    experiment_manager=None
    #del experiment_manager
    del em
    em = factory.get_experiment_manager()

    #assert em.path_experiments=='my_new_path/other_folder'
    assert em.folder=='other_folder'
    assert em.my_new_field is None
    assert em.greeting_message is None

In [20]:
tst.run (test_pickle_object, tag='dummy')

deleting /home/jcidatascience/jaume/workspace/remote/hpsearch/em_obj
returning registered experiment manager
returning experiment manager <hpsearch.examples.complex_dummy_experiment_manager.ComplexDummyExperimentManager object at 0x7f421eafc190>
experiment manager not registered yet, importing experiment manager
loading manager from /home/jcidatascience/jaume/workspace/remote/hpsearch/em_obj/whole
returning registered experiment manager
returning experiment manager <hpsearch.examples.complex_dummy_experiment_manager.ComplexDummyExperimentManager object at 0x7f42357d3f10>
returning experiment manager <hpsearch.examples.complex_dummy_experiment_manager.ComplexDummyExperimentManager object at 0x7f42357d3f10>


running test_pickle_object


In [21]:
tst.run (test_does_not_pickle_unpickable, tag='dummy')

deleting /home/jcidatascience/jaume/workspace/remote/hpsearch/em_obj
returning registered experiment manager
returning experiment manager <__main__.test_does_not_pickle_unpickable.<locals>.MyNewManager object at 0x7f421eafcc10>
experiment manager not registered yet, importing experiment manager
loading manager from /home/jcidatascience/jaume/workspace/remote/hpsearch/em_obj/whole
returning registered experiment manager
returning experiment manager <__main__.test_does_not_pickle_unpickable.<locals>.MyNewManager object at 0x7f421eafc040>
returning experiment manager <__main__.test_does_not_pickle_unpickable.<locals>.MyNewManager object at 0x7f421eafc040>


running test_does_not_pickle_unpickable


### register_manager

The method `get_experiment_manager` returns a registered experiment_manager

In [None]:
#exports tests.config.test_manager_factory
def test_register_manager ():
    from hpsearch.config.manager_factory import ManagerFactory
    factory = ManagerFactory (import_manager=True)
    factory.delete_and_reset_all()
    
    from hpsearch.experiment_manager import ExperimentManager
    em = ExperimentManager()
    factory.register_manager(em)
    em2 = factory.get_experiment_manager()
    assert em2.__class__.__name__ == 'ExperimentManager'

    em = ExampleExperimentManager()
    factory.register_manager(em)
    em2 = factory.get_experiment_manager()
    assert em2.__class__.__name__ == 'ExampleExperimentManager'
    
    # We check that a module registered in one module is used in another one. 
    del ManagerFactory
    from hpsearch.config.manager_factory import ManagerFactory

    factory = ManagerFactory()
    em = factory.get_experiment_manager()
    assert em.__class__.__name__ == 'ExampleExperimentManager'

    defaults=em.get_default_parameters({})
    
    # We check that the defaults are removed properly by using remove_defaults
    from hpsearch.utils.experiment_utils import remove_defaults

    removed=remove_defaults(defaults)
    assert removed=={}

    removed=remove_defaults({'my_first':50, 'my_second':10, 'my_third':30})
    assert removed=={'my_first': 50, 'my_third': 30}
    
    # We check that no defaults are removed if the base class ExperimentManager 
    # is registered
    em3 = ExperimentManager(allow_base_class=True)
    factory.register_manager(em3)
    parameters = {'my_first':50, 'my_second':10, 'my_third':30}
    parameters_copy = parameters.copy()
    removed=remove_defaults(parameters_copy)
    assert removed==parameters

In [None]:
tst.run (test_register_manager, tag='dummy')

```python
import inspect

from hpsearch.examples.example_experiment_manager import ExampleExperimentManager

em = ExampleExperimentManager()

source_path = inspect.getfile(em.__class__)
inspect.getmodulename('ExampleExperimentManager')
```