In [None]:
# default_exp core

# Core

> Core functions for MRL, mostly low level plumbing and parallel processing

In [None]:
#hide
from nbdev.showdoc import *
%load_ext autoreload
%autoreload 2

In [None]:
# export
from mrl.imports import *

## Miscellaneous Functions

Low level helper functions

In [None]:
# export
def is_container(x):
    "check if `x` is a container (used for parallel processing)"
    if (type(x) == list) or (type(x) == np.ndarray):
        return True
    else:
        return False

def flatten_recursive(list_of_lists):
    "Recursively flattel list of lists"
    flat_list = []
    for item in list_of_lists:
        if type(item) == list:
            flat_list += flatten_recursive(item)
        else:
            flat_list.append(item)
            
    return flat_list

def flatten_list_of_lists(list_of_lists):
    "Flattens list of lists (not recursive)"
    return [item for sublist in list_of_lists for item in sublist]

def deduplicate_list(l):
    "Deduplicates list l"
    return list(set(l))

def chunk_list(input_list, chunksize):
    'Breaks `input_list` into chunks of size `chunksize`, ragged on last list'
    return [input_list[i:i+chunksize] for i in range(0, len(input_list), chunksize)]

def filter_passing(inputs, bools):
    'Subsets `inputs` (list) by `bools` (list of bools)'
    assert len(inputs)==len(bools), '`inputs` and `bools` must have the same length'
    return [inputs[i] for i in range(len(inputs)) if bools[i]]

In [None]:
assert flatten_list_of_lists([[1],[2],[3]]) == [1,2,3]
assert flatten_recursive([[1],[2],[3, [4,5, [6,7,8]]]]) == [1,2,3,4,5,6,7,8]

## Parallel Processing

MRL tries to build in automatic parallel processing at every level. This can make a huge difference when you're processing millions of molecules

`maybe_parallel` is a convenient wrapper for parallel processing. The given `func` is wrapped with `**kwargs` and used to process the `iterable`. If `iterable` is a `list` or `np.ndarray`, the elements in `iterable` are run in parallel by `func`.

`maybe_parallel` defaults to using all availlable CPUs for processing. To control CPU usage, either pass in a specific number for the `cpus` argument, or set `ncpus` as an environment variable:

`os.environ['ncpus'] = '8'`

Passing `cpus=0` or setting `os.environ['ncpus'] = '0'` causes `maybe_parallel` to default to serial processing

In [None]:
# export
def maybe_parallel(func, iterable, cpus=None, **kwargs):
    
    func = partial(func, **kwargs)
    
    if is_container(iterable):
    
        if cpus is None:
            if 'ncpus' in os.environ.keys():
                cpus = int(os.environ['ncpus'])
            else:
                cpus = os.cpu_count()

        processes = min(cpus, len(iterable))

        if processes == 0:
            output = [func(i) for i in iterable]

        else:
            with Pool(processes=cpus) as p:
                output = p.map(func, iterable)
            
    else:
        output = func(iterable)
        
    return output

In [None]:
def test_func(x):
    time.sleep(1)
    return x

start = time.time()
_ = [test_func(i) for i in range(10)]
t1 = time.time()
_ = maybe_parallel(test_func, list(range(10)))
t2 = time.time()

print(f'Serial time: {t1-start:.2f}, Parallel time: {t2-t1:.2f}.\nResults will depend on the number of CPUs available')

Serial time: 10.03, Parallel time: 1.09.
Results will depend on the number of CPUs available


## Debugging Parallel Processing

Errors in parallel processing can be difficult to debug because the true error and stack trace are obscured by the parallel processing stack trace. If you have errors in parallel processing, first try setting `os.environ['ncpus'] = '0'` to disable python multiprocessing. This should reveal the true error.

If everything works fine when multiprocessing is disabled, it is likely one of your functions is failing to pickle.

In [None]:
# hide
from nbdev.export import notebook2script; notebook2script()

Converted 00_core.ipynb.
Converted 01_chem.ipynb.
Converted 02_template.filters.ipynb.
Converted 03_template.template.ipynb.
Converted 04_template.blocks.ipynb.
Converted index.ipynb.
