# Quality of Life

> Functions to improve quality of life. Duplicates a subset of those in `EnvDL`'s `core`.

In [12]:
#| default_exp qol

In [13]:
#| export


In [14]:
#| hide
from nbdev.showdoc import *

## Functions for working with files, directories with the intention of caching results.

In [15]:
#| export
def read_txt(path, 
             **kwargs # Intended to allow for explicit 'encoding' to be passed into open the file
            ):
    if 'encoding' in kwargs.keys():
        print(kwargs)
        with open(path, 'r', encoding  = kwargs['encoding']) as f:
            data = f.read()        
    else:    
        with open(path, 'r') as f:
            data = f.read()
            
    return(data)

In [16]:
#| export

def print_txt(path):
    print(read_txt(path = path))

In [17]:
#| export

def read_json(json_path
             ):
    "Read and return json. Used for train/validation/test splits"
    import json
    with open(json_path, 'r') as fp:
        dat = json.load(fp)
    return(dat)

In [18]:
#| export

def ensure_dir_path_exists(dir_path = '../ext_data' # Directory path to check
                          ):
    "Iteratively check for and create directories to store output. Ideally this would just be os.mkdirs() but that function is not available in this version of python"
    import os
    
    for i in range(2, len(dir_path.split('/'))+1):
        path_part = '/'.join(dir_path.split('/')[0:i])
        if not os.path.exists(path_part):
            os.mkdir(path_part)

In [19]:
#| export

"Retrieve a previously calculated result. Return None if it cannot be found."
def get_cached_result(
    save_path
):
    import os
    import pickle as pkl
#     import pickle5 as pkl # Using non-base version of pickle 
#                           # conda env with gpu support for tf and torch uses python 3.7.
#                           # Python 3.7 doesn't contain pickle v 5
    if not os.path.exists(save_path):
        cached_result = None
    else:
        with open(save_path, 'rb') as handle:
                cached_result = pkl.load(handle)
    return(cached_result)

In [20]:
#| export

def put_cached_result(
    save_path,
    save_obj
):
    import pickle as pkl
#     import pickle5 as pkl
#     from EnvDL.core import ensure_dir_path_exists
    ensure_dir_path_exists(dir_path= '/'.join(save_path.split('/')[:-1]) )
    
    with open(save_path, 'wb') as handle:
            pkl.dump(save_obj, 
                     handle, 
                     protocol=4 # version 4 is used instead of 5 because the container
                                # I'm using with tf and torch uses python 3.7 and version
                                # 5 is introduced in 3.8
                    )

In [21]:
#| export

def remove_matching_files(
    cache_path, # Directory to query
    match_regex_list = ['.*\.pt', 'yhats\.csv', 'loss_df\.csv'], # List of regexes to match (okay if two regexes match the same entry)
    dry_run = True # Print files to be deleted or delete them. 
):
    "Helper function to clear out cache. Remove files from a folder if they match one of a given set of regexes. Ignores directories in directory. Useful for clearing out model artifacts."
    import os
    import re
    # if empty set is provided, match nothing.
    if match_regex_list == []:
        match_regex_list = ['']
    
    files_to_remove = [[e for e in os.listdir(cache_path) if re.match(match_regex, e)
                       ] for match_regex in match_regex_list]
    # make a (potential) list of lists into a flat list
    new_list = []
    for sub_list in files_to_remove:
        new_list = new_list + sub_list
    # ensure it's deduplicated in case two regexes match with the same item
    files_to_remove = list(set(new_list))
    # remove any directories from consideration
    files_to_remove = [e for e in files_to_remove if os.path.isfile(cache_path+e)]
    # sort to make output more pleasant
    files_to_remove.sort()

    if files_to_remove == []:
        print('No files found to remove.')
    else:
        if dry_run:
            print('Command would remove:')
            print('\n'.join(files_to_remove))
        else:
            for file in files_to_remove:
                os.remove(cache_path+file)

# remove_matching_files(
#     cache_path,
#     match_regex_list = ['.*\.pt', 'yhats\.csv', 'loss_df\.csv'],
#     dry_run = False
# )

In [22]:
#| hide
import nbdev; nbdev.nbdev_export()