In [20]:
import requests
from joblib import Parallel, delayed
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot


In [21]:
Parallel(2, verbose=100)([delayed(requests.get)('http://google.com')])

[Parallel(n_jobs=2)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   1 tasks      | elapsed:    0.7s
[Parallel(n_jobs=2)]: Done   1 out of   1 | elapsed:    0.7s finished


[<Response [200]>]

In [2]:
import numpy as np

data = np.random.random((int(1e7),))
window_size = int(5e5)
slices = [slice(start, start + window_size)
          for start in range(0, data.size - window_size, int(1e5))]

import time


def slow_mean(data, sl):
    """Simulate a time consuming processing."""
    time.sleep(0.01)
    return data[sl].mean()

In [4]:
tic = time.time()
results = [slow_mean(data, sl) for sl in slices]
toc = time.time()
print('\nElapsed time computing the average of couple of slices {:.2f} s'.format(toc - tic))


Elapsed time computing the average of couple of slices 1.04 s


In [6]:
from joblib import Parallel, delayed

tic = time.time()
results = Parallel(n_jobs=2)(delayed(slow_mean)(data, sl) for sl in slices)
toc = time.time()
print('\nElapsed time computing the average of couple of slices {:.2f} s'.format(toc - tic))


Elapsed time computing the average of couple of slices 0.80 s


In [7]:
import os
from joblib import dump, load

folder = './joblib_memmap'
try:
    os.mkdir(folder)
except FileExistsError:
    pass

data_filename_memmap = os.path.join(folder, 'data_memmap')
dump(data, data_filename_memmap)
data = load(data_filename_memmap, mmap_mode='r')

tic = time.time()
results = Parallel(n_jobs=2)(delayed(slow_mean)(data, sl) for sl in slices)
toc = time.time()
print('\nElapsed time computing the average of couple of slices {:.2f} s\n'.format(toc - tic))


Elapsed time computing the average of couple of slices 0.69 s



In [8]:
def slow_mean_write_output(data, sl, output, idx):
    """Simulate a time consuming processing."""
    time.sleep(0.005)
    res_ = data[sl].mean()
    print("[Worker %d] Mean for slice %d is %f" % (os.getpid(), idx, res_))
    output[idx] = res_
output_filename_memmap = os.path.join(folder, 'output_memmap')
output = np.memmap(output_filename_memmap, dtype=data.dtype, shape=len(slices), mode='w+')
data = load(data_filename_memmap, mmap_mode='r')

In [14]:
Parallel(n_jobs=2)(delayed(slow_mean_write_output)(data, sl, output, idx)
                   for idx, sl in enumerate(slices))
print("Equal: {}".format(np.array_equal(np.array(results), output)))

Equal: True
