## Timsort vs other sorting algorithms (comparison complexity)

In [815]:
import random
import numpy as np
import math

random.seed(42)

### Insertion sort

In [816]:
def insertion_sort(A):
    comparisons = 0
    for i in range(1, len(A)):
        key = A[i]
        j = i - 1
        comparisons += 1
        while j >= 0 and A[j] > key:
            comparisons += 1
            A[j + 1] = A[j]
            j -= 1
        A[j + 1] = key
    return A, comparisons

### Selection sort

In [817]:
def selection_sort(A):
    comparisons = 0
    n = len(A)
    for i in range(n - 1):
        min_index = i
        for j in range(i + 1, n):
            comparisons += 1
            if A[j] < A[min_index]:
                min_index = j
        A[i], A[min_index] = A[min_index], A[i]
    return A, comparisons

### Merge sort

In [818]:
def merge_sort(A):
    if len(A) <= 1:
        return A, 0
    
    mid = len(A) // 2
    left_half = A[:mid]
    right_half = A[mid:]
    
    left_half, left_comp = merge_sort(left_half)
    right_half, right_comp = merge_sort(right_half)
    
    merged, merge_comp = merge(left_half, right_half)
    
    total_comp = merge_comp + left_comp + right_comp
    
    return merged, total_comp

def merge(left, right):
    merged = []
    left_index, right_index = 0, 0
    comparisons = 0
    
    while left_index < len(left) and right_index < len(right):
        comparisons += 1
        if left[left_index] < right[right_index]:
            merged.append(left[left_index])
            left_index += 1
        else:
            merged.append(right[right_index])
            right_index += 1
    
    while left_index < len(left):
        merged.append(left[left_index])
        left_index += 1
    while right_index < len(right):
        merged.append(right[right_index])
        right_index += 1
    
    return merged, comparisons

### merge stackoverflow

In [819]:
def mergesorter(A):
    if len(A) <= 1:
        return 0
    mid = len(A) // 2
    left = A[:mid]
    right = A[mid:]

    # Recursive call on each half
    comparecount = mergesorter(left) + mergesorter(right)

    # Two iterators for traversing the two halves
    i = 0
    j = 0
    
    # Iterator for the main list
    k = 0
    
    while i < len(left) and j < len(right):
        comparecount += 1
        if left[i] <= right[j]:
            # The value from the left half has been used
            A[k] = left[i]
            # Move the iterator forward
            i += 1
        else:
            A[k] = right[j]
            j += 1
        # Move to the next slot
        k += 1

    # For all the remaining values
    while i < len(left):
        comparecount += 1
        A[k] = left[i]
        i += 1
        k += 1

    while j < len(right):
        comparecount += 1
        A[k]=right[j]
        j += 1
        k += 1

    return comparecount

print(merge_sort([2,5,4,3,8,9,1,10]))
print(mergesorter([2,5,4,3,8,9,1,10]))
insertion_sort([2,5,4,3,8,9,1,10])

([1, 2, 3, 4, 5, 8, 9, 10], 15)
24


([1, 2, 3, 4, 5, 8, 9, 10], 16)

### Heap sort

In [820]:
def heapify(A, n, i, comparisons):
    largest = i
    l = 2 * i + 1
    r = 2 * i + 2

    if l < n and A[l] > A[largest]:
        largest = l
        comparisons[0] += 1

    if r < n and A[r] > A[largest]:
        largest = r
        comparisons[0] += 1

    if largest != i:
        A[i], A[largest] = A[largest], A[i]
        heapify(A, n, largest, comparisons)

def heap_sort(A):
    comparisons = [0]
    n = len(A)

    for i in range(n // 2 - 1, -1, -1):
        heapify(A, n, i, comparisons)

    for i in range(n - 1, 0, -1):
        A[i], A[0] = A[0], A[i]
        heapify(A, i, 0, comparisons)

    return A, comparisons[0]

### Quicksort

In [821]:
def quicksort(A):
    comparisons = [0]
    quickSorter(A, 0, len(A) - 1, comparisons)
    return A, comparisons[0]

def partition(A, low, high, comparisons):
    pivot = A[high]
    i = low - 1
    for j in range(low, high):
        comparisons[0] += 1
        if A[j] <= pivot:
            i += 1
            A[i], A[j] = A[j], A[i]
    A[i + 1], A[high] = A[high], A[i + 1]
    return i + 1

def quickSorter(A, low, high, comparisons):
    if low < high:
        pi = partition(A, low, high, comparisons)
        quickSorter(A, low, pi - 1, comparisons)
        quickSorter(A, pi + 1, high, comparisons)

### Timsort

In [822]:
MINIMUM = 32

def find_minrun(n): 
    r = 0
    while n >= MINIMUM: 
        r |= n & 1
        n >>= 1
    return n + r 

def tim_insertion_sort(array, left, right): 
    global comparisons
    for i in range(left + 1, right + 1):
        key = array[i]
        j = i - 1
        comparisons += 1
        while j >= left and key < array[j]:
            array[j + 1] = array[j]
            j -= 1
            comparisons += 1
        array[j + 1] = key
    return array
              
def tim_merge(array, l, m, r): 
    global comparisons
    array_length1 = m - l + 1
    array_length2 = r - m 
    left = []
    right = []
    for i in range(array_length1): 
        left.append(array[l + i]) 
    for i in range(array_length2): 
        right.append(array[m + 1 + i]) 
  
    i = 0
    j = 0
    k = l
   
    while j < array_length2 and i < array_length1: 
        if left[i] <= right[j]: 
            array[k] = left[i] 
            i += 1
        else: 
            array[k] = right[j] 
            j += 1
        k += 1
        comparisons += 1
  
    while i < array_length1: 
        array[k] = left[i] 
        k += 1
        i += 1
        comparisons += 1
  
    while j < array_length2: 
        array[k] = right[j] 
        k += 1
        j += 1
        comparisons += 1
  
def timsort(array): 
    n = len(array) 
    minrun = find_minrun(n) 
  
    for start in range(0, n, minrun): 
        end = min(start + minrun - 1, n - 1) 
        tim_insertion_sort(array, start, end) 
   
    size = minrun 
    while size < n: 
        for left in range(0, n, 2 * size): 
            mid = min(n - 1, left + size - 1) 
            right = min((left + 2 * size - 1), (n - 1)) 
            tim_merge(array, left, mid, right) 
        size = 2 * size

    return array, comparisons

### Introsort

In [823]:
def introsort(arr):

    def partition(arr, low, high):
        pivot = arr[high]
        i = low - 1
        for j in range(low, high):
            comparisons[0] += 1
            if arr[j] <= pivot:
                i += 1
                arr[i], arr[j] = arr[j], arr[i]
        arr[i + 1], arr[high] = arr[high], arr[i + 1]
        return i + 1

    def insertion_sort(arr, low, high):
        for i in range(low + 1, high + 1):
            key = arr[i]
            j = i - 1
            comparisons[0] += 1
            while j >= low and arr[j] > key:
                comparisons[0] += 1
                arr[j + 1] = arr[j]
                j -= 1
            arr[j + 1] = key

    def heap_sort(arr):
        def heapify(arr, n, i):
            largest = i
            l = 2 * i + 1
            r = 2 * i + 2

            if l < n and arr[i] < arr[l]:
                largest = l

            if r < n and arr[largest] < arr[r]:
                largest = r

            if largest != i:
                arr[i], arr[largest] = arr[largest], arr[i]
                heapify(arr, n, largest)

        n = len(arr)

        for i in range(n // 2 - 1, -1, -1):
            heapify(arr, n, i)

        for i in range(n - 1, 0, -1):
            arr[i], arr[0] = arr[0], arr[i]
            heapify(arr, i, 0)

    def introsort_util(arr, low, high, depth_limit):
        size = high - low + 1

        if size < 16:
            insertion_sort(arr, low, high)
            return

        if depth_limit == 0:
            heap_sort(arr)
            return

        pivot = partition(arr, low, high)

        introsort_util(arr, low, pivot - 1, depth_limit - 1)
        introsort_util(arr, pivot + 1, high, depth_limit - 1)

    comparisons = [0]
    introsort_util(arr, 0, len(arr) - 1, 2 * math.log(len(arr)))
    return arr, comparisons[0]

## Benchmarking

In [827]:
iter = 100
size = 1000

def measure_comparisons(sort_function):
    total_comparisons = 0
    for _ in range(iter):
        A = np.random.random((size))
        global comparisons
        comparisons = 0
        _, comp = sort_function(A)
        total_comparisons += comp

    return total_comparisons / iter

selection_sort_comparisons = measure_comparisons(selection_sort)
print("Average comparisons for selection sort:", selection_sort_comparisons)

insertion_sort_comparisons = measure_comparisons(insertion_sort)
print("Average comparisons for insertion sort:", insertion_sort_comparisons)

merge_sort_comparisons = measure_comparisons(merge_sort)
print("Average comparisons for merge sort:", merge_sort_comparisons)

mergesorter_comparisons = mergesorter(np.random.random((size)))
print("Average comparisons for merge sorter:", mergesorter_comparisons)

heap_sort_comparisons = measure_comparisons(heap_sort)
print("Average comparisons for heap sort:", heap_sort_comparisons)

quicksort_comparisons = measure_comparisons(quicksort)
print("Average comparisons for quicksort:", quicksort_comparisons)

timsort_comparisons = measure_comparisons(timsort)
print("Average comparisons for timsort:", timsort_comparisons)

introsort_comparisons = measure_comparisons(introsort)
print("Average comparisons for introsort:", introsort_comparisons)

Average comparisons for selection sort: 499500.0
Average comparisons for insertion sort: 250028.73
Average comparisons for merge sort: 8707.59
Average comparisons for merge sorter: 9976
Average comparisons for heap sort: 11684.18
Average comparisons for quicksort: 11111.56
Average comparisons for timsort: 13662.47
Average comparisons for introsort: 11659.94
