In [None]:
# default_exp core

# Core

> Core functions for MRL, mostly low level plumbing and parallel processing

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
# export
import pandas as pd
import numpy as np
import os
import re
import random
import matplotlib.pyplot as plt
from multiprocessing import Pool
import time
from functools import partial
import itertools

In [None]:
# export
def is_container(x):
    "check if `x` is a container (used for parallel processing)"
    if (type(x) == list) or (type(x) == np.ndarray):
        return True
    else:
        return False

`maybe_parallel` is a convenient wrapper for parallel processing. The given `func` is wrapped with `**kwargs` and used to process the `iterable`. If `iterable` is a `list` or `np.ndarray`, the elements in `iterable` are run in parallel by `func`.

`maybe_parallel` defaults to using all availlable CPUs for processing. To control CPU usage, either pass in a specific number for the `cpus` argument, or set `ncpus` as an environment variable:

`os.environ['ncpus'] = '8'`

Passing `cpus=0` or setting `os.environ['ncpus'] = '0'` causes `maybe_parallel` to default to serial processing

In [None]:
# export
def maybe_parallel(func, iterable, cpus=None, **kwargs):
    
    func = partial(func, **kwargs)
    
    if is_container(iterable):
    
        if cpus is None:
            if 'ncpus' in os.environ.keys():
                cpus = int(os.environ['ncpus'])
            else:
                cpus = os.cpu_count()

        processes = min(cpus, len(iterable))

        if processes == 0:
            output = [func(i) for i in iterable]

        else:
            p = Pool(processes=cpus)
            output = p.map(func, iterable)
            p.close()
            
    else:
        output = func(iterable)
        
    return output

In [None]:
def test_func(x):
    time.sleep(1)
    return x

start = time.time()
_ = [test_func(i) for i in range(10)]
t1 = time.time()
_ = maybe_parallel(test_func, list(range(10)))
t2 = time.time()

print(f'Serial time: {t1-start:.2f}, Parallel time: {t2-t1:.2f}.\nResults will depend on the number of CPUs available')

Serial time: 10.03, Parallel time: 1.04.
Results will depend on the number of CPUs available
