In [None]:
# General imports
import numpy as np
import copy
import time

# For joblib
from joblib import Memory
from joblib import Parallel, delayed

# For in-memory memoization functions
import collections
import functools

In [None]:
cachedir = './'
memory = Memory(cachedir, verbose=True)

### Use joblib.Memory to cache results of a simple function taking Python primitive arguments
We can also use an in-memory memoizer, as in the next cell below.

In [None]:
@memory.cache
def say(s, n):
    print('Running say(\'%s\', %d)' % (s, n))
    full_statement = ''
    for i in range(n):
        full_statement += s + '\n'
    return full_statement

print(say('Hello world', 1))  # first time calling on this argument
print(say('Hello world', 2))  # re-run this because both arguments are not the same
print(say('Hello world', 1))  # don't re-run this
print(say('Polly wants a cracker', 2)) # first time calling on this argument
print(say('Polly wants a cracker', 2)) # don't re-run this

### Use in-memory memoization to cache results of function taking simple primitive arguments
Do the exact same thing as in the previous cell, just with in-memory persistence this time.

In [None]:
# Decorator for in-memory memoization using a dictionary.
# Source: https://wiki.python.org/moin/PythonDecoratorLibrary#Memoize
def memoize(obj):
    cache = obj.cache = {}
    
    @functools.wraps(obj)
    def memoizer(*args, **kwargs):
        key = str(args) + str(kwargs)
        if key not in cache:
            cache[key] = obj(*args, **kwargs)
        return cache[key]
    
    return memoizer

@memoize
def say(s, n):
    print('Running say(\'%s\', %d)' % (s, n))
    full_statement = ''
    for i in range(n):
        full_statement += s + '\n'
    return full_statement

print(say('Hello world', 1))  # first time calling on this argument
print(say('Hello world', 2))  # re-run this because both arguments are not the same
print(say('Hello world', 1))  # don't re-run this
print(say('Polly wants a cracker', 2)) # first time calling on this argument
print(say('Polly wants a cracker', 2)) # don't re-run this





### Use Memory to cache results from a function accepting a mutable argument (e.g. list)
This is one of the stated use cases of Memory over simple memoizers that hash input arguments. However, the in-memory memoizer we used above actually does serialize the input arguments by calling str() on them, thus making them hashable.

In [None]:
@memory.cache
def list_add(l, n):
    print('Running list_add(\'%s\', %d)' % (str(l), n))
    l_copy = copy.deepcopy(l) # avoid modifying the original list
    for i in range(len(l_copy)):
        l_copy[i] += n
    return l_copy

l = [1, 2, 3, 4, 5]

print(list_add(l, 1))  # First call
print('\n')

print(list_add(l, 1))  # Don't re-run
print('\n')

l[0] = 11
print(list_add(l, 1))  # Re-run, as the list has changed
print('\n')


### Memoize the function with list arguments using the in-memory memoizer.
This shows that we can do it in-memory too, but serializing the input arguments using str().

In [None]:
@memoize
def list_add(l, n):
    print('Running list_add(\'%s\', %d)' % (str(l), n))
    l_copy = copy.deepcopy(l) # avoid modifying the original list
    for i in range(len(l_copy)):
        l_copy[i] += n
    return l_copy

l = [1, 2, 3, 4, 5]

print(list_add(l, 1))  # First call
print('\n')

print(list_add(l, 1))  # Don't re-run
print('\n')

l[0] = 11
print(list_add(l, 1))  # Re-run, as the list has changed
print('\n')

### Use Memory to cache results from a function accepting numpy array arguments.
* Handling numpy arrays is another one of the stated use cases of Memory over memoize, supposedly because it can handle insiginificant changes?



In [None]:
@memory.cache
def print_array(array):
    print('Running print_array(%s)' % np.array2string(array))
    return array

# First time calling on this argument
array = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
print(print_array(array))
print('\n')

# Don't re-run, as the argument is identical
print(print_array(array))  
print('\n')

# Don't re-run, as the array has changed insignificantly
array[0, 0] += 1e-200
print(print_array(array)) 
print('\n')

# First time calling on this argument
print(print_array(np.array([1])))
print('\n')

# It DOES re-run this.
print(print_array(np.array([1 + 1e-200])))
print('\n')

# But it DOESN'T re-run this...
print(print_array(np.array([1 + 1e-100]))) 
print('\n')

### Memoize a costly helper function, which is called in other functions

In [None]:
@memory.cache
def costly_preprocess(x):
    print('Running costly_preprocess(%s)' % str(x))
    time.sleep(2)
    return x

def array_add(array, n):
    print('Running array_add({0}, {1})'.format(array, n))
    array2 = costly_preprocess(array)  # The inner function call is memoized
    return(np.add(array2, n))

def array_minus(array, n):
    print('Running array_minus({0}, {1})'.format(array, n))
    array2 = costly_preprocess(array)  # The inner function call is memoized
    return(np.subtract(array2, n))



array = np.array([1, 2, 3, 4])

start = time.time()
print(array_add(array, 1))
stop = time.time()
print('{0} seconds'.format(round(stop-start, 4)))
print('\n')

# It re-runs the outer function, but not the inner memoized function
start = time.time()
print(array_add(array, 1))
stop = time.time()
print('{0} seconds'.format(round(stop-start, 4)))
print('\n')


# Even with a different outer function, we benefit from the memoization of the inner function
start = time.time()
print(array_minus(array, 1))
stop = time.time()
print('{0} seconds'.format(round(stop-start, 4)))
print('\n')




### Use Joblib.Parallel for embarassingly parallel loops
The default usage is to turn the loop into an iterator (e.g. list comprehension).

In [None]:
def costly_preprocess(x):
    print("Running costly_preprocess({0})".format(x))
    time.sleep(1)
    return x

def array_add(array, n):
    array2 = costly_preprocess(array)
    return(np.add(array2, n))


array = np.array([5, 6, 7, 8, 9])
many_arrays = [array]*10

# Serial processing
start = time.time()
result1 = [array_add(array, 1) for array in many_arrays]
stop = time.time()
print('Serial: {0} seconds'.format(round(stop-start, 4)))
print('\n')

# Parallel processing
start = time.time()
result2 = Parallel(n_jobs=2)(
            delayed(array_add)(array, 1) for array in many_arrays)
stop = time.time()
print('Parallel: {0} seconds'.format(round(stop-start, 4)))
print('\n')


### Memoize an existing numpy function
Note that the first call to the non-memoized np.add actually takes *less time* than the second call to the memoized version. It seems that there is some overhead to memoizing large input data such as big lists. This might be improved by pickling the inputs.

In [None]:
array = np.array([2]*50000000)
add_memory = memory.cache(np.add)  # get a memoized version of np.add

def add_indirect(array, n):
    return np.add(array, n)

# First call to add_memo
start = time.time()
array2 = add_memory(array, 1)
stop = time.time()
print('First run with memoized add: {0} s'.format(stop-start))

# Repeat the call to add_memo with same args
start = time.time()
array2 = add_memory(array, 1)
stop = time.time()
print('Second run with memoized add: {0} s'.format(stop-start))

# First call to the non-memoized np.add
# Note that this takes *less time* than even the second call to the memoized version
start = time.time()
array2 = add_indirect(array, 1)
stop = time.time()
print('First run with non-memoized add: {0} s'.format(stop-start))

### Try memoizing using in-memory memoization
In contrast to the previous cell, here we see that the second run to the memoized function is much faster than non-memoized np.add

In [None]:
# Decorator for in-memory memoization using a dictionary.
# Source: https://wiki.python.org/moin/PythonDecoratorLibrary#Memoize
def memoize(obj):
    cache = {}
    
    @functools.wraps(obj)
    def memoizer(*args, **kwargs):
        key = str(args) + str(kwargs)  # broken!
        if key not in cache:
            cache[key] = obj(*args, **kwargs)
        return cache[key]
    
    return memoizer

array = np.array([1]*50000000)
add_memoize = memoize(np.add)

# First call to add_memo
start = time.time()
array2 = add_memoize(array, 1)
stop = time.time()
print('First run with memoized add: {0} s'.format(stop-start))

array[10] = -1

# Repeat the call to add_memo with same args
start = time.time()
array3 = add_memoize(array, 1)
stop = time.time()
print('Second run with memoized add: {0} s'.format(stop-start))

# First call to the non-memoized np.add
start = time.time()
array4 = np.add(array, 1)
stop = time.time()
print('First run with non-memoized add: {0} s'.format(stop-start))
    

### Use functools.partial to emulate currying

In [None]:
array = np.array([1, 2, 3, 4, 5])

add_array = functools.partial(np.add, array)  # np.add with the array argument already supplied

print(add_array(1))
print(add_array(2))
print(add_array(3))
print('\n')

# A three argument function
def greeting(word1='Hello', word2='World', punct='!'):
    print(word1 + ' ' + word2 + punct)

# Test the basic functionality
greeting()
greeting('Good', 'afternoon', '!')
print('\n')

ask = functools.partial(greeting, punct='?')
ask('Welcome', 'to the real world')
ask(word2='Moon')

In [None]:
def g(a, b, c):
    print(a+b+c)
    
g(3,3,3)

gp = functools.partial(g, 5, c=5)
gp(b=9)

gp.keywords

g=(a=5, b=5, c=5) # ok
g=(5, b=5, c=5) # ok
g=(a=5, 5, c=5) # notok