In [16]:
import numpy as np

gen = np.random.default_rng(0)
weights = np.array([gen.random() for _ in range(10)])

weights /= np.sum(weights)

'''The numpy resampling algorithm as defined in numpy.random.Generator.choice, generates N random samples from a uniform distribution (0,1)
and performs a binary search to obtain the index in the sorted CDF where the samples would be inserted to maintain order

Time complexity O(nlogn) where n is the number of samples, in our case n = len(weights)

https://github.com/numpy/numpy/blob/64fc516a0fce06169a1e0fea55c7cd2dc57cd296/numpy/random/_generator.pyx#L3084
'''

cdf = weights.cumsum() #cumulative sum stored in np.array
cdf /= cdf[-1] #normalization done in function
uniform_samples = gen.random(weights.shape)
idx = cdf.searchsorted(uniform_samples, side='right') #a binary search over the array

print(f"indices drawn from numpy choice: {idx}")

# '''The systematic resampling algorithm uses a slightly different approach, assuming that we are drawing n samples where n=len(weights),
# numpy cannot assume this as it supports drawing fewer samples than len(weights). 

# We draw only 1 random sample u=U(1/n) as offset the subsequent n-1 indices by u +(1/n)*j, this has multiple advantages, firstly, if the weights are
# uniform the indices will not change, secondly, the time complexity is only O(n) as no search step is required

# Note also that the indices and resampled in sorted order, although for our case this doesn't matter, it's worth noting 
# '''

# indices = np.zeros(len(weights)) #initialize array to hold the indices
# cdf = np.cumsum(weights) #create cdf
# u = gen.uniform(0,1/len(weights)) #random number between 1 and 1/n, only drawn once vs the n 
# i=0
# for j in range(0,len(weights)): 
#     r = (u + 1/len(weights) * j)
#     while r > cdf[i]: 
#         i += 1
#     indices[j] = i

# print(f"indices drawn from systematic resampling: {indices}")


# '''The systematic resampling in log domain is functionally equivalent to doing so in the lin-domain, but requires a special helper function, 
# the jacobian logarithm, i.e. log(sum(exp(x1)+exp(x2) + ... + exp(xn)))'''


# log_cdf = jacob(weights)
        
#         i = 0
#         indices = np.zeros(ctx.particle_count)
#         u = ctx.rng.uniform(0,1/ctx.particle_count)
#         for j in range(0,ctx.particle_count): 
#             r = np.log(u + 1/ctx.particle_count * j)
#             while r > log_cdf[i]: 
#                 i += 1
#             indices[j] = i



indices drawn from numpy choice: [8 0 9 0 7 4 9 6 4 5]
indices drawn from systematic resampling: [0. 0. 4. 4. 5. 6. 7. 7. 8. 9.]


### <center>Numpy Choice Algorithm</center> ###



Numpy's choice algorithm is defined in the source file "_generator.pyx", and is an element of the Generator class returned when calling default_rng(). Note that generator can be instantiated manually, and wraps a numpy bit_generator, which is a prng, i.e. PCG64, mt19337, etc. 

The default prng is PCG64, which is significantly faster than mt19337 and passes more statistical tests, so there is no reason to deviate in our use case. 

$ CDF = \begin{bmatrix}
           w_{0} \\
           w_{0}+w{1} \\
           \vdots \\
           w_{0}+...+w_{n-1}
         \end{bmatrix} $


where $\sum_{i=0}^{n-1}w_{i} = 1 $

and the final element of the CDF is 1.

The algorithm that numpy uses is referred to in the literature as multinomial resampling, where samples are drawn independently on the interval
$ u_i = [0,1) $ and $i$ between $0$ and $n-1$.

so $ U =  \begin{bmatrix}
           u_{0} \\
           u_1 \\
           \vdots \\
           u_{i}
         \end{bmatrix} $

We then take all the samples and evaluate at which index in $CDF$ they could be inserted while maintaining order, 
this is the function cdf.searchsorted(uniform_samples) which is a binary search implementation which returns a vector of 
indices. This resampling algorithm will have a time complexity of $O(nlogn)$ as the $logn$ binary search must be run over every sample.



In [None]:
import numpy as np

gen = np.random.default_rng(0)
weights = np.array([gen.random() for _ in range(10)])

weights /= np.sum(weights)

'''The numpy resampling algorithm as defined in numpy.random.Generator.choice, generates N random samples from a uniform distribution (0,1)
and performs a binary search to obtain the index in the sorted CDF where the samples would be inserted to maintain order

Time complexity O(nlogn) where n is the number of samples, in our case n = len(weights)

https://github.com/numpy/numpy/blob/64fc516a0fce06169a1e0fea55c7cd2dc57cd296/numpy/random/_generator.pyx#L3084
'''

cdf = weights.cumsum() #cumulative sum stored in np.array
cdf /= cdf[-1] #normalization done in function
uniform_samples = gen.random(weights.shape)
idx = cdf.searchsorted(uniform_samples, side='right') #a binary search over the array

print(f"indices drawn from numpy choice: {idx}")

### <center> The Systematic Resampling Algorithm<\center> ###