In [3]:
import numpy as np
from numba import jit, prange
from dask import delayed, compute

In [1]:
def bootstrap(data, boots):
    """
    Create bootstrap datasets that represent the distribution of the mean.
    Returns a numpy array containing the bootstrap datasets 
    
    Keyword arguments:
    data -- numpy array of systems to boostrap
    boots -- number of bootstrap (default = 1000)
    """
    
    to_return = np.empty([boots])
              
    for b in range(boots):
        
        total=0
        
        for s in range(data.shape[0]):
        
            total += data[round(np.random.uniform(0, data.shape[0]-1))]

        to_return[b] = total / data.shape[0]

    return to_return

In [4]:
@jit(nopython=True)
def bootstrap_jit(data, boots):
    """
    Create bootstrap datasets that represent the distribution of the mean.
    Returns a numpy array containing the bootstrap datasets 
    
    Keyword arguments:
    data -- numpy array of systems to boostrap
    boots -- number of bootstrap (default = 1000)
    """
        
    to_return = np.empty(boots)
    
                
    for b in range(boots):
        
        total=0.0
        
        for s in range(data.shape[0]):

            total += data[round(np.random.uniform(0, data.shape[0]-1))]

        to_return[b] = total / data.shape[0]

    return to_return

In [5]:
@jit(nopython=True, parallel=True)
def bootstrap_jit_parallel(data, boots):
    """
    Create bootstrap datasets that represent the distribution of the mean.
    Returns a numpy array containing the bootstrap datasets 
    
    Keyword arguments:
    data -- numpy array of systems to boostrap
    boots -- number of bootstrap (default = 1000)
    """
        
    to_return = np.empty(boots)
                
    for b in prange(boots):
        
        total=0.0
        
        for s in range(data.shape[0]):
        
            total += data[round(np.random.uniform(0, data.shape[0]-1))]

        to_return[b] = total / data.shape[0]

    return to_return

In [6]:
sample = np.arange(1, 6)
sample

array([1, 2, 3, 4, 5])

In [7]:
%%time
x = bootstrap(sample, 1000000)

CPU times: user 7.14 s, sys: 4.51 ms, total: 7.14 s
Wall time: 7.14 s


In [8]:
%%timeit
x = bootstrap_jit(sample, 1000000)

57.2 ms ± 1.21 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [9]:
%%timeit
x = bootstrap_jit_parallel(sample, 1000000)

24.6 ms ± 793 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Notes: Guess is that parallel processing introduces some overhead and benefits lost for small bootstraps

In [10]:
%%timeit
x = bootstrap_jit(sample, 100)

6.22 µs ± 39.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [11]:
%%timeit
x = bootstrap_jit_parallel(sample, 100)

22.3 µs ± 1.02 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


## TO-DO: Parallel Processing with dask