In [1]:
#default_exp utils

# Utils

In [26]:
from shutil import rmtree
from pathlib import Path
from warnings import warn
import os
import gc

#import pickle library used
import pickle as pickle

In [27]:
#export
def remove_folder_or_file(path):
    '''
    recursively remove dir and files in dir given by direcotry(path)
    '''
    path = Path(path)
    if path.exists():
        if path.is_dir():
            rmtree(path)        
        else:
            path.unlink()
    else:
        pass

    return

## Cache Handler

In [124]:
#export 
class Cacher():
    '''
    Cacher class. Creates a link between an object and a pickle path.
    Enables pickling and unpickling.
    '''
    def __init__(self, obj, filename, dirpath = '_scikit_dag_cache', pickler = None, **serialization_kwargs):        
        """ A Cacher provides quick funcionlality to serialize and deserialize objects.
        The serialized object can be accessed through the Cacher().obj attribute
        
        Parameters
        ----------
        obj : serializable python obj
            
        filename : str
            name of the file to be saved under dirpath        
        
        dirpath : str or Path (optional, default= '_scikit_dag_cache')
            Path  of dir in which the cached serialized obj will be saved
        
        pickler : pickler obj            
            pickler object or module containing .dump and .save methods
        
        **serialization_kwargs :
            Serialization kwargs passed to pickle.dump and pickle.load
        
        Returns
        -------
        Cache object
        
        """
        
        self.filename = filename           
        if pickler is None:
            self.pickler = pickle # pickle module
        else:
            self.pickler = pickler        
        self.serialization_kwargs = serialization_kwargs
        
        # properties/ mangled attributes
        self.__obj = obj
        self.__path = None
        self.__dirpath = Path(dirpath)
        
        return
    
    @property
    def dirpath(self,):
        return self.__dirpath
    
    @property
    def obj(self,):        
        
        try:            
            return self.__obj
        except AttributeError:
            raise AttributeError(f'Cacher is not populated with serialized object. Run .load() prior to accessing obj')
    
    @property
    def path(self,):                
        
        if self.__path is None:
            raise AttributeError('path is None. Run dump() in order to set path')
        if not self.__path.exists():
            warn(f'No file found under path. In order to cache self.obj, run dump()')
        
        return self.__path
        
    
    def dump(self, override = False):
        
        #assigns path only after pickling, so that it raises error when trying to unpickle before pickling
        path = self.dirpath/(self.filename)        
        
        if override:                        
            if path.exists():
                warn(f'File already exists under {path.absolute()} and will be overriden.')                                    
            else:
                pass
        else:            
            if path.exists():
                raise FileExistsError(f'A file already exists under {path.absolute()}. If you want to override, set override param to True')
            else:
                pass
        
        
        #create folder if does not exist
        if not path.parent.exists():
            os.makedirs(path.parent)
        
        
        #override if reached this point
        with open(path, 'wb+') as f:
            try:
                self.pickler.dump(self.__obj, f)
            except AttributeError:
                self.pickler.load()
                self.pickler.dump(self.__obj, f)
            #path is only set if objected is dumped
            #reset self.__obj and clean memory
            del self.__obj
            gc.collect()            
            self.__path = path
        
        return self
    
    def load(self,):
        try:
            with open(self.path, 'rb') as f:
                self.__obj = self.pickler.load(f)
            return self
        
        except TypeError as e:
            raise TypeError(str(e)+ '. Check if path is valid or not None. If so try running .dump() before trying to unpickle')
                        
    def clean_cache(self,):
        '''
        deletes pickled file under path
        '''
        if self.path.exists():
            self.load()
            remove_folder_or_file(self.path)
            self.__path = None            
                
        return
    
    def reset_state(self, assert_is_cached = True):
        '''
        resets self.__obj state
        '''
        
        if assert_is_cached:
            assert self.path.exists(), f'No file cached under self.path ({self.path.absolute()})'
        
        #if state is reset, pass
        try: self.__obj
        except AttributeError: return self
        # if not reset
        del self.__obj
        gc.collect()
        return

# Export

In [2]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted d6tflow-sklearn.ipynb.
Converted dag.ipynb.
Converted node.ipynb.
Converted utils.ipynb.
