In [1]:
from joblib import Memory
from numpy.linalg import linalg
import numpy as np


In [2]:
# Creo una cache di nome mem
cachedir = './cache'
mem = Memory(cachedir)



In [3]:
# Registro la funzione f, alla cache mem
@mem.cache
def f(x):
     print('Running f(%s)', x+2)
     return x+2

In [4]:
# Computo la funzione f per la prima volta
print(f(1))

3


In [5]:
# Computo f una seconda volta, ma il valore e nella cache
print(f(1))

3


In [6]:
# Un'altro approcio per registrare le funzioni in cache

np.random.seed(123)

a = np.random.rand(2000, 3000)
a[0,0] = 10
svd = mem.cache(linalg.svd)
b = svd(a)

________________________________________________________________________________
[Memory] Calling numpy.linalg.svd...
svd(array([[10.      , ...,  0.445216],
       ...,
       [ 0.259933, ...,  0.635653]]))
_____________________________________________________________svd - 14.2s, 0.2min


In [7]:
c = svd(a)



In [18]:
# Fonte: https://joblib.readthedocs.io/en/latest/auto_examples/nested_parallel_memory.html#sphx-glr-auto-examples-nested-parallel-memory-py

# Dichiarazione funzioni

import time


def costly_compute(data, column):
    """Emulate a costly function by sleeping and returning a column."""
    time.sleep(2)
    return data[column]


def data_processing_mean(data, column):
    """Compute the mean of a column."""
    return costly_compute(data, column).mean()


import numpy as np
rng = np.random.RandomState(323)
data = rng.randn(int(1e4), 4)

In [19]:
# Baseline
start = time.time()
results = [data_processing_mean(data, col) for col in range(data.shape[1])]
stop = time.time()

print('\nSequential processing')
print('Elapsed time for the entire processing: {:.2f} s'
      .format(stop - start))


Sequential processing
Elapsed time for the entire processing: 8.01 s


In [20]:
location = './cachedir'
memory = Memory(location, verbose=0)
costly_compute_cached = memory.cache(costly_compute)


def data_processing_mean_using_cache(data, column):
    """Compute the mean of a column."""
    return costly_compute_cached(data, column).mean()

In [21]:
from joblib import Parallel, delayed

start = time.time()
results = Parallel(n_jobs=4)(
    delayed(data_processing_mean_using_cache)(data, col)
    for col in range(data.shape[1]))
stop = time.time()

print('\nFirst round - caching the data')
print('Elapsed time for the entire processing: {:.2f} s'
      .format(stop - start))



First round - caching the data
Elapsed time for the entire processing: 2.02 s


In [22]:
start = time.time()
results = Parallel(n_jobs=2)(
    delayed(data_processing_mean_using_cache)(data, col)
    for col in range(data.shape[1]))
stop = time.time()

print('\nSecond round - reloading from the cache')
print('Elapsed time for the entire processing: {:.2f} s'
      .format(stop - start))



Second round - reloading from the cache
Elapsed time for the entire processing: 0.39 s


In [23]:
def data_processing_max_using_cache(data, column):
    """Compute the max of a column."""
    return costly_compute_cached(data, column).max()


start = time.time()
results = Parallel(n_jobs=2)(
    delayed(data_processing_max_using_cache)(data, col)
    for col in range(data.shape[1]))
stop = time.time()

print('\nReusing intermediate checkpoints')
print('Elapsed time for the entire processing: {:.2f} s'
      .format(stop - start))


Reusing intermediate checkpoints
Elapsed time for the entire processing: 0.02 s
