In [1]:
import numpy as np
from numba import jit, prange
from dask import delayed

In [2]:
def bootstrap(data, boots):
    """
    Create bootstrap datasets that represent the distribution of the mean.
    Returns a numpy array containing the bootstrap datasets 
    
    Keyword arguments:
    data -- numpy array of systems to boostrap
    boots -- number of bootstrap (default = 1000)
    """
    
    to_return = np.empty([boots])
    total=0
                
    for b in range(boots):
        
        for s in range(data.shape[0]):
        
            total += data[round(np.random.uniform(0, data.shape[0]-1))]

        to_return[b] = total / data.shape[0]

    return to_return

In [3]:
@jit(nopython=True)
def bootstrap_jit(data, boots):
    """
    Create bootstrap datasets that represent the distribution of the mean.
    Returns a numpy array containing the bootstrap datasets 
    
    Keyword arguments:
    data -- numpy array of systems to boostrap
    boots -- number of bootstrap (default = 1000)
    """
        
    to_return = np.empty(boots)
    total=0.0
                
    for b in range(boots):
        
        for s in range(data.shape[0]):

            total += data[round(np.random.uniform(0, data.shape[0]-1))]

        to_return[b] = total / data.shape[0]

    return to_return

In [4]:
@jit(nopython=True, parallel=True)
def bootstrap_jit_parallel(data, boots):
    """
    Create bootstrap datasets that represent the distribution of the mean.
    Returns a numpy array containing the bootstrap datasets 
    
    Keyword arguments:
    data -- numpy array of systems to boostrap
    boots -- number of bootstrap (default = 1000)
    """
        
    to_return = np.empty(boots)
    total=0.0
                
    for b in prange(boots):
        
        for s in range(data.shape[0]):
        
            total += data[round(np.random.uniform(0, data.shape[0]-1))]

        to_return[b] = total / data.shape[0]

    return to_return

In [5]:
sample = np.arange(1, 6)
sample

array([1, 2, 3, 4, 5])

In [6]:
%%time
x = bootstrap(sample, 1000000)

CPU times: user 6.54 s, sys: 3.17 ms, total: 6.55 s
Wall time: 6.55 s


In [7]:
%%time
x = bootstrap_jit(sample, 1000000)

CPU times: user 233 ms, sys: 8.01 ms, total: 241 ms
Wall time: 242 ms


In [8]:
%%time
x = bootstrap_jit_parallel(sample, 1000000)

CPU times: user 449 ms, sys: 0 ns, total: 449 ms
Wall time: 388 ms


### Notes: Guess is that parallel processing introduces some overhead and benefits lost for small bootstraps

In [9]:
%%time
x = bootstrap_jit(sample, 100)

CPU times: user 19 µs, sys: 0 ns, total: 19 µs
Wall time: 21.2 µs


In [10]:
%%time
x = bootstrap_jit_parallel(sample, 100)

CPU times: user 359 µs, sys: 4 µs, total: 363 µs
Wall time: 629 µs


## TO-DO: Parallel Processing with dask