In [1]:
import numpy as np

# Selection Sort

In [2]:
def selection_sort(L):
    n = len(L)
    if n < 2:
        return
    for i in range(n - 1):
        smallest_idx = i
        for j in range(i + 1, n):
            if L[j] < L[smallest_idx]:
                smallest_idx = j
        if smallest_idx != i:
            L[smallest_idx], L[i] = L[i], L[smallest_idx]

In [3]:
L = list(np.random.randint(-100, 100, 10))
selection_sort(L)
L

[-100, -51, -20, 29, 43, 47, 56, 59, 83, 95]

Time complexity is __$O(n^2)$__ assuming comparisons can be done in constant time ($O(1)$). It does not use any extra memory.

# Bubble Sort

In [4]:
def bubble_sort(L):
    n = len(L)
    if n < 2:
        return
    for i in range(n - 1):
        for j in range(n - i - 1):
            if L[j] > L[j + 1]:
                L[j], L[j + 1] = L[j + 1], L[j]

In [5]:
L = list(np.random.randint(-100, 100, 10))
bubble_sort(L)
L

[-46, -16, -14, -5, -3, 20, 65, 79, 80, 81]

Time complexity is __$O(n^2)$__ assuming comparisons can be done in constant time ($O(1)$). It does not use any extra memory.

In [6]:
def improved_bubble_sort(L):
    n = len(L)
    if n < 2:
        return
    for i in range(n - 1):
        sorted_flag = True
        for j in range(n - i - 1):
            if L[j] > L[j + 1]:
                L[j], L[j + 1] = L[j + 1], L[j]
                sorted_flag = False
        if sorted_flag: # The array is already sorted
            break

In [7]:
L = list(np.random.randint(-100, 100, 10))
improved_bubble_sort(L)
L

[-90, -31, 0, 5, 8, 26, 57, 60, 73, 76]

Time complexity is __$O(n)$__ in the best case when the colloection is sorted; however, the average and worst case is __$O(n^2)$__.

# Insertion Sort

In [8]:
 def insertion_sort(L):
    n = len(L)
    if n < 2:
        return
    for i in range(1, n):
        key = L[i]
        j = i
        while j > 0 and key < L[j - 1]:
            L[j] = L[j - 1]
            j -= 1
        L[j] = key

In [9]:
L = list(np.random.randint(-100, 100, 10))
insertion_sort(L)
L

[-96, -60, -54, 1, 8, 11, 19, 45, 78, 86]

Time complexity is __$O(n)$__ in the best case when the colloection is sorted; however, the average and worst case is __$O(n^2)$__.

# Merge Sort

In [10]:
def merge(left, right, L):
    i = j = 0
    while i < len(left) and j < len(right):
        if left[i] <= right[j]:
            L[i + j] = left[i]
            i += 1
        else:
            L[i + j] = right[j]
            j += 1
    while i < len(left):
        L[i + j] = left[i]
        i += 1
    while j < len(right):
        L[i + j] = right[j]
        j += 1

def merge_sort(L):
    n = len(L)
    if n < 2:
        return
    # divide
    mid = n // 2
    left = L[:mid]
    right = L[mid:]
    # concquer
    merge_sort(left)
    merge_sort(right)
    merge(left, right, L)

In [11]:
L = list(np.random.randint(-100, 100, 10))
merge_sort(L)
L

[-78, -16, 1, 39, 47, 48, 49, 69, 77, 94]

Time complexity is __$O(nlogn)$__ in the worst case. Also, this is not an inplace sorting algorithm and it will take __$O(n)$__ extra memory for all the sub-lists.

# Quick Sort 

Time complexity is __$O(n^2)$__ in the worst case. However, we can get an average case of __$O(nlogn)$__ if we use randomized pivot. It is very widely used in practice due to its high efficiency with high probability. The worst case may happen when the array is already sorted or it is all duplicates.

In [12]:
def quick_sort(L):
    n = len(L)
    if n < 2:
        return L
    L = list(L)
    pivot = np.random.choice(L)
    L.remove(pivot)
    left = [e for e in L if e <= pivot]
    right = [e for e in L if e > pivot]
    return quick_sort(left) + [pivot] + quick_sort(right)

In [13]:
L = list(np.random.randint(-100, 100, 10))
quick_sort(L)

[-91, -70, -40, -17, 9, 25, 38, 57, 64, 72]

# Counting Sort

In [14]:
def count_sort(L, R):
    """Count sort using 0-k(inclusive) integers as the keys."""
    n = len(L)
    count = [0] * (R + 1)

    # count number of elements equal to index i
    for e in L:
        count[e + 1] += 1

    # cumsum of count where count[i] will be the number of elements <= index i
    for i in range(R):
        count[i + 1] += count[i]

    # Distribute the data into the correct order
    tmp = [0] * n
    for e in L:
        tmp[count[e]] = e
        count[e] += 1

    # copy the output back
    for i, e in enumerate(tmp):
        L[i] = e

In [15]:
L = [2, 5, 3, 0, 2, 3, 0, 3]
count_sort(L, 10)
L

[0, 0, 2, 2, 3, 3, 3, 5]

The running time is $O(n + k)$ which is linear as long as $k = n^{O(1)}$. Space complexity is $O(n + k)$. It works well with small range of integers.

# LSD Radix Sort

In [16]:
def lsd(L, w):
    n = len(L)
    R = 256

    for d in range(w - 1, -1, -1):
        count = [0] * (R + 1)

        # Build freq counts
        for key in L:
            count[ord(key[d]) + 1] += 1
        
        # Comp cumsum
        for i in range(R):
            count[i + 1] += count[i]
        
        # Distribute keys
        tmp = [0] * n
        for key in L:
            tmp[count[ord(key[d])]] = key
            count[ord(key[d])] += 1
        
        # Copy back sorted keY
        for i, key in enumerate(tmp):
            L[i] = key

In [17]:
L = ['123', '321', '000', '111']
lsd(L, len(L[0]))
L

['000', '111', '123', '321']

LSD radix sort running time is $O(d(N + R))$.

# Bucket Sort

In [18]:
def bucket_sort(L, k):
    n = len(L)
    bucket = [[] for _ in range(k + 1)]
    for e in L:
        bucket[e].append(e)
    output = [0] * n
    i = 0
    for k in bucket:
        for e in k:
            output[i] = e
            i += 1
    return output

In [19]:
L = [2, 5, 3, 0, 2, 3, 0, 3]
bucket_sort(L, 5)

[0, 0, 2, 2, 3, 3, 3, 5]

The running time is $O(n + k)$ which is linear as long as $k = n^{O(1)}$. Space complexity is $O(n + k)$. It works well with small range of integers.

# Heap Sort

In [20]:
class HeapSort:
    @staticmethod
    def sort(arr):
        n = len(arr)
        HeapSort._heapify(arr)
        for i in range(1, n):
            HeapSort._swap(arr, 0, n - i)
            HeapSort._downheap(arr, 0, n - i)
    
    @staticmethod
    def _swap(arr, i, j):
        arr[i], arr[j] = arr[j], arr[i]
    
    @staticmethod
    def _heapify(arr):
        n = len(arr)
        parent = (n - 1) // 2
        for i in range(parent, -1, -1):
            HeapSort._downheap(arr, i, n)
    
    @staticmethod
    def _downheap(arr, i, j):
        if (2 * i) + 1 < j:
            child = left = (2 * i) + 1
            if (2 * i) + 2 < j:
                right = (2 * i) + 2
                if arr[right] < arr[left]:
                    child = right
            if arr[child] < arr[i]:
                HeapSort._swap(arr, i, child)
                HeapSort._downheap(arr, child, j)

In [21]:
L = list(np.random.randint(-100, 100, 10))
HeapSort.sort(L)
L, L[::-1]

([51, 5, -7, -12, -25, -32, -32, -45, -73, -91],
 [-91, -73, -45, -32, -32, -25, -12, -7, 5, 51])

The running time is $O(nlogn)$.

# Randomized Quick Select

In [22]:
def quick_select(S, k):
    n = len(S)
    if n == 1:
        return S[0]
    pivot = np.random.choice(S)
    L = [e for e in S if e < pivot]
    E = [e for e in S if e == pivot]
    G = [e for e in S if e > pivot]
    
    if k <= len(L):
        return quick_select(L, k)
    elif k <= len(L) + len(E):
        return pivot
    else:
        j = k - len(L) - len(E)
        return quick_select(G, j)

In [23]:
for i in range(1000):
    try:
        k = np.random.randint(1, 11, 1)[0]
        quick_select(np.random.randint(-100, 100, 10), k)
    except:
        print('failed')

It has expected run time of $O(n)$; however, the worst case running time is $O(n^2)$.

# Sorting Summary

<img src="sorting_summary.png">