# Bucket Sort

Bucket sort assumes that the input is drawn from a uniform distribution and has an average-case running time of
$O(n)$. Like counting sort, bucket sort is fast because it assumes something about the input. Whereas counting sort assumes that the input consists of integers in a small range, bucket sort assumes that the input is generated by a random process that distributes elements uniformly and independently over the interval $[0,1)$.

Bucket sort divides the interval $[0,1)$ into equal-sized subintervals, or buckets, and then distributes the $n$
input numbers into the buckets. Since the inputs are uniformly and independently distributed over $[0,1)$, 
we do not expect many numbers to fall into each bucket. To produce the output, we simply sort the numbers in each
bucket and then go through the buckets in order, listing the elements in each.

<img src = "BucketSort.png" width=300>

In [3]:
# bucket sort with input numbers between 0 and 1

def insertionSort(A):
    for i in range(1, len(A)):
        up = A[i]
        j = i - 1
        while j >= 0 and A[j] > up: 
            A[j + 1] = A[j]
            j -= 1
        A[j + 1] = up     
    return A     
              
def bucketSort(A, slot_num=10): # slot_num slots, each slot's size is 1/slot_num
    
    bucket = []
    for i in range(slot_num):
        bucket.append([]) # create slot_num buckets: [[], [], ..., []]
    
    # Put array elements in different buckets 
    for x in A:
        bucket_indx = int(slot_num * x) # locate its bucket
        bucket[bucket_indx].append(x)
      
    # Sort individual buckets 
    #print("Distribution of numbers in buckets")
    for i in range(slot_num):
        bucket[i] = insertionSort(bucket[i])
        #print(bucket[i])
    
    bucket_size = [len(bucket[i]) for i in range(slot_num)]
    
    # concatenate the result
    k = 0
    for i in range(slot_num):
        for j in range(len(bucket[i])): # note that range(0) contains nothing
            A[k] = bucket[i][j]
            k += 1
    return A

In [4]:
# Bucket sort for an input array with numbers greater than 1

def bucketSort1(A, NumOfBuckets=10):
    max_A = max(A)
    min_A = min(A)
  
    # range (for buckets)
    range_A = (max_A - min_A) / NumOfBuckets
  
    bucket = []
  
    # create empty buckets
    for i in range(NumOfBuckets):
        bucket.append([])
  
    # scatter the array elements into the correct bucket
    for i in range(len(A)):
        diff = (A[i] - min_A) / range_A - int((A[i] - min_A) / range_A)
  
        # append the boundary elements to the lower array
        if (diff == 0 and A[i] != min_A): # boundary
            bucket[int((A[i] - min_A) / range_A) - 1].append(A[i])
        else:
            bucket[int((A[i] - min_A) / range_A)].append(A[i])
    
    #print("Distribution of numbers in buckets")
    #for i in range(NumOfBuckets):
    #    print(bucket[i])
  
    # Sort each bucket individually
    for i in range(len(bucket)):
        if len(bucket[i]) != 0:
            bucket[i].sort()
            # Gather sorted elements 
    
    bucket_size = [len(bucket[i]) for i in range(NumOfBuckets)]
    
    # to the original array
    k = 0
    for list in bucket:
        if list:
            for x in list:
                A[k] = x
                k += 1

    return A, bucket_size

In [5]:
import random
from time import time, time_ns

In [25]:
A = [round(random.uniform(0,50),2) for _ in range(100)]
bucketsort_timer, bucketsort1_timer = 0,0

for i in range(100):

    # for bucketsort()
    start = time_ns()
    bucketSort([round(x/(max(A)+1), 2) for x in A])
    bucketsort_timer += time_ns() - start

    # for bucketsort1()
    start1 = time_ns()
    bucketSort1(A)
    bucketsort1_timer += time_ns() - start1

print(bucketsort_timer)
print(bucketsort1_timer)


22236500
4086900


17564600
6727000
