In [1]:
#hide
#default_exp config.manager_factory
from nbdev.showdoc import *
from block_types.utils.nbdev_utils import nbdev_setup, TestRunner

nbdev_setup ()
tst = TestRunner (targets=['dummy'])

# Manager factory

> Register a subclassed ExperimentManager to be used by other modules. 

In [2]:
#export
import inspect
import shutil
import hpsearch
import os
import logging
import cloudpickle
import joblib
import pickle
import dill
from pathlib import Path
import glob

from block_types.utils.utils import set_logger

import hpsearch.config.hp_defaults as dflt

experiment_manager = None

In [3]:
#for tests
import pytest
from hpsearch.examples.example_experiment_manager import ExampleExperimentManager

## get_pickable_fields

In [4]:
#export
def get_pickable_fields (obj):
    dict_fields = vars(obj)
    return {k:dict_fields[k] for k in dict_fields if dill.pickles (dict_fields[k])}

### Usage example

In [5]:
#exports tests.config.test_manager_factory
def test_get_pickable_fields ():
    em = ExampleExperimentManager ()
    d = get_pickable_fields (em)
    os.makedirs ('test_get_pickable', exist_ok=True)
    pickle.dump (d, open('test_get_pickable/test.pk','wb'))
    del em
    d2 = pickle.load (open('test_get_pickable/test.pk','rb'))

    n=0
    for k in sorted(d):
        n += (d[k]==d2[k])
    #print (f'{n}')
    assert n==24, f'{n}'
    shutil.rmtree ('test_get_pickable')

In [6]:
tst.run (test_get_pickable_fields, tag='dummy')

running test_get_pickable_fields


## ManagerFactory

In [7]:
# export
class ManagerFactory (object):
    def __init__ (self, allow_base_class=True, manager_path=dflt.manager_path, 
                  import_manager=False, verbose=dflt.verbose, logger=None, 
                  name_logger=dflt.name_logger):
        
        self.allow_base_class = allow_base_class
        self.obtain_paths()
        self.method = 1
        self.manager_path = Path(manager_path).resolve()
        self.import_manager = import_manager
        
        self.verbose = verbose
        self.logger = logger
        self.name_logger = name_logger
        if self.logger is None:
            self.logger = set_logger (self.name_logger, path_results=self.manager_path, 
                                      verbose=self.verbose)
        
    def register_manager (self, experiment_manager_to_register):
        global experiment_manager
        experiment_manager = experiment_manager_to_register
        
    def obtain_paths (self):
        destination_path_folder = os.path.dirname (hpsearch.__file__)
        destination_path_folder = f'{destination_path_folder}/app_config'
        destination_path_module = f'{destination_path_folder}/subclassed_manager.py'
        self.destination_path_folder = destination_path_folder
        self.destination_path_module = destination_path_module
        self.destination_path_import = f'{destination_path_folder}/subclassed_manager_import.py'
        self.class_two_module_file = f'{self.destination_path_folder}/class_two_module.pk'
        self.class_two_import_file = f'{self.destination_path_folder}/class_two_import.pk'
        self.class_two_base_file = f'{self.destination_path_folder}/class_two_base.pk'
        self.current_path = os.path.abspath(os.path.curdir)
        #print (f'current path: {self.current_path}')
        
    def determine_import_string (self, source_path, base_path, experiment_manager):
        if self.method==1:
            import_module_string = experiment_manager.__class__.__module__
        elif self.method==2:
            if source_path.startswith(base_path + '/'):
                split_list = source_path.split(base_path + '/')
                import_module_string = split_list[1]
                import_module_string = import_module_string.replace('/','.')
                import_module_string = import_module_string.replace('.py', '')
            else:
                self.logger.warning (f'current path {base_path} not found in source path {source_path}')
                import_module_string = 'hpsearch.app_config.subclassed_manager'
        self.import_module_string = import_module_string
        return import_module_string
        
    def write_manager (self, em):
        name_subclass = em.__class__.__name__
        try:
            source_path = inspect.getfile(em.__class__)
            self.obtain_paths()
            import_module_string = self.determine_import_string (source_path, self.current_path, em)
            self.write_manager_subclass (name_subclass, source_path, self.current_path, import_module_string)
            
            self.load_class_two_module ()
            self.class_two_module.update({name_subclass: source_path})
            self.class_two_import.update({name_subclass: import_module_string})
            self.class_two_base.update({name_subclass: self.current_path})
            pickle.dump (self.class_two_module, open(self.class_two_module_file, 'wb'))
            pickle.dump (self.class_two_import, open(self.class_two_import_file, 'wb'))
            pickle.dump (self.class_two_base, open(self.class_two_base_file, 'wb'))
            
            # store em fields in pickle and cloud-pickle files
            self.pickle_object (em=em)
        except Exception as e:
            self.logger.warning (f'write_manager failed with exception {e}')
            raise (e)
      
    def em_pickable_fields (self, em=None):
        em = self.get_experiment_manager () if em is None else em
        pickable_fields = get_pickable_fields (em)
        pickable_fields = {k:pickable_fields[k] for k in pickable_fields 
                           if k not in em.non_pickable_fields}
        return pickable_fields
    
    def write_manager_subclass (self, name_subclass, source_path, base_path=None, 
                                import_module_string=None):
        if base_path is None:
            base_path = self.current_path
        os.makedirs(self.destination_path_folder, exist_ok=True)
        
        # 1 copy subclass module's source file
        shutil.copy (source_path, self.destination_path_module)
        
        # 2 copy import statement
        f = open (self.destination_path_import, 'wt')
        f.write ('import sys\n')
        f.write (f'sys.path.append("{base_path}")\n')
        f.write (f'from {import_module_string} import {name_subclass} as Manager')
        f.close()
    
    def write_pickle_of_subclass (self, name_manager, manager_path=None, extension='.cpk'):
        manager_path = manager_path if manager_path is not None else self.manager_path
        shutil.copy (manager_path / f'{name_manager}{extension}', manager_path / f'last{extension}')
            
    def pickle_object (self, em=None, manager_path=None):
        manager_path = manager_path if manager_path is not None else self.manager_path
        
        self.manager_path.mkdir (parents=True, exist_ok=True)
        em = em if em is not None else self.get_experiment_manager ()
        dict_fields = self.em_pickable_fields (em=em)
        joblib.dump (dict_fields, manager_path / f'{em.registered_name}.pk')
        joblib.dump (dict_fields, manager_path / 'last.pk')
        
        # 4 store pickable and non-pickable fields
        cloudpickle.dump (em, open(manager_path / f'{em.registered_name}.cpk', 'wb'))
        cloudpickle.dump (em, open(manager_path / 'last.cpk', 'wb'))
    
    def load_class_two_module (self):
        if os.path.exists (self.class_two_module_file):
            self.class_two_module = pickle.load (open(self.class_two_module_file,'rb'))
        else:
            self.class_two_module = {}
            
        if os.path.exists (self.class_two_import_file):
            self.class_two_import = pickle.load (open(self.class_two_import_file,'rb'))
        else:
            self.class_two_import = {}
            self.logger.warning ('class2import not found, switching to original method')
            self.method = 2
            
        if os.path.exists (self.class_two_base_file):
            self.class_two_base = pickle.load (open(self.class_two_base_file,'rb'))
        else:
            self.class_two_base = {}
    
    def change_manager (self, name_manager):
        self.previous_manager = self.get_experiment_manager ()
        if self.import_manager:
            self.write_manager_to_import (name_manager)
            self.write_pickle_of_subclass (name_manager, extension='.pk')
        else:
            self.write_pickle_of_subclass (name_manager, extension='.cpk')
        self.reset_manager()
        self.import_or_load_manager()
        
    def write_manager_to_import (self, name_manager):
        name_subclass = name_manager.split ('-')[0]
        self.obtain_paths()
        self.load_class_two_module ()
        if name_subclass not in self.class_two_module:
            raise ValueError (f'{name_subclass} not in dictionary class_two_module={self.class_two_module}')
        if name_subclass in self.class_two_base:
            base_path = self.class_two_base[name_subclass]
        else:
            base_path = self.current_path
        self.write_manager_subclass (name_subclass, self.class_two_module[name_subclass], base_path,
                                    self.class_two_import[name_subclass])
        
    def switch_back (self):
        self.register_manager (self.previous_manager)
        self.write_manager (self.previous_manager)
        
    def print_current_manager (self):
        em = self.get_experiment_manager ()
        print (f'experiment manager registered: {em.__class__.__name__}')
        print (f'registered name: {em.registered_name}')
        
    def list_subclasses (self):
        if self.import_manager:
            self.list_pickled_managers (extension='.pk')
        else:
            self.list_pickled_managers (extension='.cpk')
        self.print_current_manager()
    
    def list_subclass_modules (self):
        self.load_class_two_module ()
        print (f'subclasses: {self.class_two_module.keys()}')
    
    def list_pickled_managers (self, extension='.cpk'):
        managers = glob.glob (f'{self.manager_path}/*{extension}')
        managers = [Path(x).name.split(extension)[0] for x in managers]
        managers = [x for x in managers if x != 'last']
        print (f'managers: {managers}')
        
    def load_pickle_and_set_em_fields (self, em, manager_path=None):
        manager_path = manager_path if manager_path is not None else self.manager_path
        dict_fields = joblib.load (manager_path / 'last.pk')
        self.logger.debug (f'loading pickled em fields from {manager_path}')
        for k in dict_fields:
            setattr (em, k, dict_fields[k])
            
    def load_manager (self, manager_path=None):
        manager_path = manager_path if manager_path is not None else self.manager_path
        self.logger.debug (f'loading manager from {manager_path}')
        em = cloudpickle.load (open(manager_path / f'last.cpk', 'rb'))
        return em
            
    def import_or_load_manager (self):
        if self.import_manager:
            em = self.import_written_manager ()
        else:
            em = self.load_manager ()
        global experiment_manager
        experiment_manager = em
    
    def import_written_manager (self):
        try:
            import hpsearch.app_config.subclassed_manager_import as subclass_module
            from importlib import reload
            reload (subclass_module)
            
            from hpsearch.app_config.subclassed_manager_import import Manager
            em = Manager()
            self.logger.debug ('returning subclassed manager')
        except ImportError:
            if not self.allow_base_class:
                raise ImportError (f'it was not possible to import subclassed manager, and allow_base_class=False')
            self.logger.debug ('importing base class ExperimentManager')
            from hpsearch.experiment_manager import ExperimentManager
            em = ExperimentManager()
        self.load_pickle_and_set_em_fields (em)
        return em
        
    def get_experiment_manager (self):
        if experiment_manager is not None:
            em = experiment_manager
            self.logger.debug ('returning registered experiment manager')
        else:
            self.logger.debug ('experiment manager not registered yet, importing experiment manager')
            self.import_or_load_manager()
            em = self.get_experiment_manager ()
            
        self.logger.debug (f'returning experiment manager {em}')
        return em
    
    def reset_manager (self):
        self.register_manager (None)
        
    def set_base_manager (self):
        from hpsearch.experiment_manager import ExperimentManager
        em = ExperimentManager()
        self.register_manager (em)
    
    def delete_and_reset_all (self):
        self.obtain_paths()
        self.logger.debug (self.destination_path_module)
        if os.path.exists(self.destination_path_module):
            self.logger.debug ('deleting')
            os.remove(self.destination_path_module)
        if os.path.exists(self.destination_path_import):
            self.logger.debug ('deleting')
            os.remove(self.destination_path_import)
            
        self.load_class_two_module ()
        if os.path.exists(self.class_two_module_file):
            self.logger.debug (f'deleting {self.class_two_module_file}')
            os.remove(self.class_two_module_file)
            
        if os.path.exists(self.class_two_import_file):
            self.logger.debug (f'deleting {self.class_two_import_file}')
            os.remove(self.class_two_import_file)
            
        if os.path.exists(self.class_two_base_file):
            self.logger.debug (f'deleting {self.class_two_base_file}')
            os.remove(self.class_two_base_file)
            
        if self.manager_path.exists ():
            self.logger.debug (f'deleting {self.manager_path}')
            shutil.rmtree (self.manager_path)
            
        self.set_base_manager ()

### get_experiment_manager

The method `get_experiment_manager` returns the base class ExperimentManager initially, unless a subclass definition has been stored by calling the `write_manager` method

In [8]:
#exports tests.config.test_manager_factory
def test_get_experiment_manager ():
    factory = ManagerFactory ()
    factory.delete_and_reset_all()
    em = factory.get_experiment_manager()
    assert em.__class__.__name__ == 'ExperimentManager'

In [9]:
tst.run (test_get_experiment_manager, tag='dummy')

class2import not found, switching to original method


running test_get_experiment_manager


### write_manager

We can write the definition of a desired subclass of ExperimentManager with the method `write_manager`. After writing this definition, the object manager_factory will return an instance of the same subclass for any module that imports ManagerFactory. Let us check this with one example. We will use the subclass `ExampleExperimentManager` as an example, and check that the module file exists after calling the method `write_manager`.

After writting the subclass, the method `get_experiment_manager` returns an instance of this subclass. This allows command-line scripts to use functions of the subclass indicated by other modules. 

In [17]:
import importlib.util
importlib.util.spec_from_file_location?

[0;31mSignature:[0m
[0mimportlib[0m[0;34m.[0m[0mutil[0m[0;34m.[0m[0mspec_from_file_location[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mname[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlocation[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mloader[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msubmodule_search_locations[0m[0;34m=[0m[0;34m<[0m[0mobject[0m [0mobject[0m [0mat[0m [0;36m0x7f42c76250c0[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Return a module spec based on a file location.

To indicate that the module is a package, set
submodule_search_locations to a list of directory paths.  An
empty list is sufficient, though its not otherwise useful to the
import system.

The loader must take a spec as its only __init__() arg.
[0;31mFile:[0m      ~/jaume/workspace/remote/hpsearch/<frozen imp

In [19]:
spec

In [23]:
ls hpsearch/app_config/

__init__.py  [0m[01;34m__pycache__[0m/


In [26]:
#exports tests.config.test_manager_factory
def test_write_manager ():
    factory = ManagerFactory ()
    factory.delete_and_reset_all()
    # we check that the file does not exist before writting it
    assert not os.path.exists(factory.destination_path_module)

    # we write new module and check that the file exists afterwards
    em = ExampleExperimentManager()
    factory.register_manager (em)
    factory.write_manager (em)
    assert os.path.exists(factory.destination_path_module)

    # we check that the written module corresponds to the class ExampleExperimentManager
    from hpsearch.app_config.subclassed_manager_import import Manager
    em2 = Manager()
    
    import importlib.util
    spec = importlib.util.spec_from_file_location("module.name", "hpsearch/app_config/subclassed_manager_import.py")
    manager_module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(manager_module)
    Manager2 = getattr (manager_module, 'Manager')
    print ('2', Manager2)
    
    # TODO: error in pytest
    #assert Manager.__name__ == 'ExampleExperimentManager'
    #assert em2.__class__.__name__ == em.__class__.__name__
    #assert em.get_default_parameters({}) == em2.get_default_parameters({})
    
    # After writting the subclass, the method `get_experiment_manager` 
    # returns an instance of this subclass. This allows command-line 
    # scripts to use functions of the subclass indicated by other modules. 
    em.register_and_store_subclassed_manager()
    em2 = factory.get_experiment_manager()
    assert em2.__class__.__name__ == em.__class__.__name__

In [27]:
tst.run (test_write_manager, tag='dummy')

class2import not found, switching to original method


running test_write_manager
2 <class 'hpsearch.examples.example_experiment_manager.ExampleExperimentManager'>


### pickle_object

We can also write fields stored in the current experiment manager. When the command-line tool loads the manager, it loads these fields and stores them in the manager.

In [12]:
#exports tests.config.test_manager_factory
def test_pickle_object ():
    from hpsearch.examples.complex_dummy_experiment_manager import ComplexDummyExperimentManager
    em = ComplexDummyExperimentManager (path_experiments='my_new_path',
                                        root_folder='other_root')
    em.my_new_field = [2, 1, 3]
    em.greeting_message = 'good morning!'

    #from hpsearch.config.manager_factory import ManagerFactory
    factory = ManagerFactory (verbose=2)
    factory.delete_and_reset_all()
    factory.register_manager (em)
    factory.write_manager (em)
    assert sorted(os.listdir (factory.manager_path))==[
        'ComplexDummyExperimentManager-other_root.cpk', 
        'ComplexDummyExperimentManager-other_root.pk', 
        'last.cpk', 'last.pk']

    del em
    em = factory.get_experiment_manager ()
    assert em.my_new_field == [2, 1, 3]
    assert em.greeting_message == 'good morning!'

    global experiment_manager
    experiment_manager=None
    #del experiment_manager
    em = factory.get_experiment_manager()

    assert em.path_experiments=='my_new_path'
    assert em.root_folder=='other_root'
    assert em.my_new_field == [2, 1, 3]
    assert em.greeting_message == 'good morning!'
    

In [13]:
tst.run (test_pickle_object, tag='dummy')

/home/jcidatascience/jaume/workspace/remote/hpsearch/hpsearch/app_config/subclassed_manager.py
deleting
deleting
deleting /home/jcidatascience/jaume/workspace/remote/hpsearch/hpsearch/app_config/class_two_module.pk
deleting /home/jcidatascience/jaume/workspace/remote/hpsearch/hpsearch/app_config/class_two_import.pk
deleting /home/jcidatascience/jaume/workspace/remote/hpsearch/hpsearch/app_config/class_two_base.pk
deleting /home/jcidatascience/jaume/workspace/remote/hpsearch/em_obj
class2import not found, switching to original method


running test_pickle_object


### register_manager

The method `get_experiment_manager` returns a registered experiment_manager

In [14]:
#exports tests.config.test_manager_factory
def test_register_manager ():
    from hpsearch.config.manager_factory import ManagerFactory
    factory = ManagerFactory ()
    factory.delete_and_reset_all()
    
    from hpsearch.experiment_manager import ExperimentManager
    em = ExperimentManager()
    factory.register_manager(em)
    em2 = factory.get_experiment_manager()
    assert em2.__class__.__name__ == 'ExperimentManager'

    em = ExampleExperimentManager()
    factory.register_manager(em)
    em2 = factory.get_experiment_manager()
    assert em2.__class__.__name__ == 'ExampleExperimentManager'
    
    # We check that a module registered in one module is used in another one. 
    del ManagerFactory
    from hpsearch.config.manager_factory import ManagerFactory

    factory = ManagerFactory()
    em = factory.get_experiment_manager()
    assert em.__class__.__name__ == 'ExampleExperimentManager'

    defaults=em.get_default_parameters({})
    
    # We check that the defaults are removed properly by using remove_defaults
    from hpsearch.utils.experiment_utils import remove_defaults

    removed=remove_defaults(defaults)
    assert removed=={}

    removed=remove_defaults({'my_first':50, 'my_second':10, 'my_third':30})
    assert removed=={'my_first': 50, 'my_third': 30}
    
    # We check that no defaults are removed if the base class ExperimentManager 
    # is registered
    em3 = ExperimentManager(allow_base_class=True)
    factory.register_manager(em3)
    parameters = {'my_first':50, 'my_second':10, 'my_third':30}
    parameters_copy = parameters.copy()
    removed=remove_defaults(parameters_copy)
    assert removed==parameters

In [15]:
tst.run (test_register_manager, tag='dummy')

running test_register_manager


```python
import inspect

from hpsearch.examples.example_experiment_manager import ExampleExperimentManager

em = ExampleExperimentManager()

source_path = inspect.getfile(em.__class__)
inspect.getmodulename('ExampleExperimentManager')
```