In [3]:
from random import randint
from timeit import repeat

O(n log2n) is the best possible worst-case runtime that can be achieved by a sorting algorithm.

Resource: https://realpython.com/sorting-algorithms-python/, 

https://stackabuse.com/big-o-notation-and-algorithm-analysis-with-python-examples/

In [20]:
def run_sorting_algorithm(algorithm, array):
    # Set up the context and prepare the call to the specified
    # algorithm using the supplied array. Only import the
    # algorithm function if it's not the built-in `sorted()`.
    setup_code = f"from __main__ import {algorithm}" \
        if algorithm != "sorted" else ""

    stmt = f"{algorithm}({array})"

    # Execute the code ten different times and return the time
    # in seconds that each execution took
    times = repeat(setup=setup_code, stmt=stmt, repeat=3, number=10)

    # Finally, display the name of the algorithm and the
    # minimum time it took to run
    print(f"Algorithm: {algorithm}. Minimum execution time: {min(times)}")

In [5]:
ARRAY_LENGTH = 10000
array = [randint(0, 1000) for i in range(ARRAY_LENGTH)]

In [40]:
numbers = array[0:10]
numbers_tuple = tuple(array[0:10])
numbers_set = set(array[0:10])
print("sorted numbers:", sorted(numbers))
print("sorted numbers tuple:", sorted(numbers_tuple))
print("sorted numbers_set:", sorted(numbers_set))

run_sorting_algorithm("sorted", array)

sorted numbers: [137, 217, 327, 381, 570, 642, 690, 704, 855, 866]
sorted numbers tuple: [137, 217, 327, 381, 570, 642, 690, 704, 855, 866]
sorted numbers_set: [137, 217, 327, 381, 570, 642, 690, 704, 855, 866]
setup code: 
Algorithm: sorted. Minimum execution time: 0.021266875322908163


list has a built-in .sort() func, which works similar as sorted() but don't apply to tuple, set, string and don't return any value.

In [42]:
try:
    print("string doesn't support .sort()", 'abcdhgf'.sort())
except:
    pass
values_to_sort = array[0:10]
print("before sorting applied:", values_to_sort)
sorted_value = values_to_sort.sort()
print("values already sorted in place:", values_to_sort, " & nothing being returned:", sorted_value)

before sorting applied: [704, 866, 217, 327, 137, 690, 855, 642, 570, 381]
values already sorted: [137, 217, 327, 381, 570, 642, 690, 704, 855, 866]  & nothing being returned: None


In [44]:
mixed_numbers = [1, "2", "3", "4"]
try:
    print("sorted mixed number type:", sorted(mixed_numbers))
except:
    print("with key trick for mixed type", sorted(mixed_numbers, key=int))

with key trick for mixed type [1, '2', '3', '4']


#### Bubble Sort
Sort the array in ascending order, each step “bubbles” the largest element to the end of the array, which leads to each iteration takes fewer steps than the previous iteration. With average- and worst-case complexity of O(n2), and best case to be O(n) if already sorted.

In [9]:
def bubble_sort(array):
    n = len(array)

    for i in range(n):
        # Create a flag that will allow the function to
        # terminate early if there's nothing left to sort
        already_sorted = True

        # Start looking at each item of the list one by one,
        # comparing it with its adjacent value. With each
        # iteration, the portion of the array that you look at
        # shrinks because the remaining items have already been
        # sorted.
        for j in range(n - i - 1):
            if array[j] > array[j + 1]:
                # If the item you're looking at is greater than its
                # adjacent value, then swap them
                array[j], array[j + 1] = array[j + 1], array[j]

                # Since you had to swap two elements,
                # set the `already_sorted` flag to `False` so the
                # algorithm doesn't finish prematurely
                already_sorted = False

        # If there were no swaps during the last iteration,
        # the array is already sorted, and you can terminate
        if already_sorted:
            break

    return array

In [21]:
# Call the function using the name of the sorting algorithm
# and the array you just created
run_sorting_algorithm(algorithm="bubble_sort", array=array)

setup code: from __main__ import bubble_sort
Algorithm: bubble_sort. Minimum execution time: 88.46810892596841


#### Insertion Sort
Build the sorted list one element at a time by comparing each item with the rest of the list and inserting it into its correct position, With an O(n2) runtime complexity on the average case. Especially, on small data, insertion sort is a faster implementation than even more complex sorting algo.

In [22]:
def insertion_sort(array):
    # Loop from the second element of the array until
    # the last element
    for i in range(1, len(array)):
        # This is the element we want to position in its
        # correct place
        key_item = array[i]

        # Initialize the variable that will be used to
        # find the correct position of the element referenced
        # by `key_item`
        j = i - 1

        # Run through the list of items (the left
        # portion of the array) and find the correct position
        # of the element referenced by `key_item`. Do this only
        # if `key_item` is smaller than its adjacent values.
        while j >= 0 and array[j] > key_item:
            # Shift the value one position to the left
            # and reposition j to point to the next element
            # (from right to left)
            array[j + 1] = array[j]
            j -= 1

        # When you finish shifting the elements, you can position
        # `key_item` in its correct location
        array[j + 1] = key_item

    return array

In [23]:
run_sorting_algorithm(algorithm="insertion_sort", array=array)

setup code: from __main__ import insertion_sort
Algorithm: insertion_sort. Minimum execution time: 38.96390027180314


#### Merge Sort
The implementation of the merge sort algorithm needs two different pieces:

1. A function that recursively splits the input in half. Since the array is halved until a single element remains, the total number of halving operations performed by this function is log2n. Since merge() is called for each half, we get a total runtime of O(n log2n).
2. A function that merges both halves, producing a sorted array, with linear runtime, O(n).
3. Since merge sort creates copies of the array when calling itself recursively, which makes merge sort use much more memory than bubble sort and insertion sort, which are both able to sort the list in place.

In [25]:
def merge_sort(array):
    # If the input array contains fewer than two elements,
    # then return it as the result of the function
    if len(array) < 2:
        return array

    midpoint = len(array) // 2

    # Sort the array by recursively splitting the input
    # into two equal halves, sorting each half and merging them
    # together into the final result
    return merge(
        left=merge_sort(array[:midpoint]),
        right=merge_sort(array[midpoint:]))

In [26]:
def merge(left, right):
    if not left:
        return right
    if not right:
        return left
    result = []
    idx_left, idx_right = 0, 0
    while idx_left<= len(left) - 1 and idx_right <= len(right)-1:
        if left[idx_left] <= right[idx_right]:
            result.append(left[idx_left])
            idx_left += 1
        else:
            result.append(right[idx_right])
            idx_right += 1
    result.extend(left[idx_left:])
    result.extend(right[idx_right:])
    return result

In [29]:
run_sorting_algorithm(algorithm="merge_sort", array=array)

setup code: from __main__ import merge_sort
Algorithm: merge_sort. Minimum execution time: 0.5131068797782063


#### Quick Sort
Quicksort first selects a pivot element and partitions the list around the pivot, putting every smaller element into a `low` array and every larger element into a `high` array.

Quicksort’s efficiency often depends on the pivot selection. In the best-case scenario, the algorithm consistently picks the median element as the pivot. That would make each generated subproblem exactly half the size of the previous problem, leading to at most log2n levels. In the worst scenario, the algorithm pick either the smallest or the largest number, the unequal partition would leads to n-1 recursive calls, and O(n2) runtime.

In [32]:
def quicksort(array):
    # If the input array contains fewer than two elements,
    # then return it as the result of the function
    if len(array) < 2:
        return array

    low, high = [], []
    # Select your `pivot` element randomly
    pivot = array[randint(0, len(array)-1)]
    
    # Elements that are smaller than the `pivot` go to
    # the `low` list. Elements that are larger than
    # `pivot` go to the `high` list. Elements that are
    # equal to `pivot` go to the `same` list.
    for item in array:
        if item < pivot:
            low.append(item)
        elif item > pivot:
            high.append(item)
    return quicksort(low) + [pivot] + quicksort(high)

In [34]:
run_sorting_algorithm(algorithm="quicksort", array=array)

setup code: from __main__ import quicksort
Algorithm: quicksort. Minimum execution time: 0.13136613368988037


#### Timsort
Timsort algorithm is considered a hybrid sorting algorithm as a combination of insertion sort and merge sort.

In [36]:
def insertion_sort(array, left=0, right=None):
    if right is None:
        right = len(array) - 1
    
    # Loop from the element indicated by `left` until the element indicated by `right`
    for i in range(left + 1, right + 1):
        # This is the element we want to position in its correct place
        key_item = array[i]

        # Initialize the variable that will be used to
        # find the correct position of the element referenced
        # by `key_item`
        j = i - 1

        # Run through the list of items (the left
        # portion of the array) and find the correct position
        # of the element referenced by `key_item`. Do this only
        # if the `key_item` is smaller than its adjacent values.
        while j >= left and array[j] > key_item:
            # Shift the value one position to the left
            # and reposition `j` to point to the next element
            # (from right to left)
            array[j + 1] = array[j]
            j -= 1

        # When you finish shifting the elements, position the `key_item` in its correct location
        array[j + 1] = key_item

    return array

In [37]:
def timsort(array):
    # Picking a min_run value that’s a power of two ensures better performance when merging all the different runs that the algorithm creates.
    min_run = 32
    
    # Start by slicing and sorting small portions of the input array. The size of these slices is defined by
    # your `min_run` size. Timsort takes advantage of sorting small array.
    n = len(array)
    for i in range(0, n, min_run):
        insertion_sort(array, i, min((i + min_run - 1), n - 1))
    
    # Now you can start merging the sorted slices.
    # Start from `min_run`, doubling the size on each iteration until you surpass the length of the array.
    size = min_run
    while size < n:
        # Determine the arrays that will be merged together
        for start in range(0, n, size * 2):
            # Compute the `midpoint` (where the first array ends and the second starts) and the `endpoint` (where the second array ends)
            midpoint = start + size - 1
            end = min((start + size * 2 - 1), (n-1))

            # Merge the two subarrays.
            # The `left` array should go from `start` to `midpoint + 1`, while the `right` array should go from `midpoint + 1` to `end + 1`.
            merged_array = merge(
                left=array[start:midpoint + 1],
                right=array[midpoint + 1:end + 1])

            # Finally, put the merged array back into your array
            array[start:start + len(merged_array)] = merged_array

        # Each iteration should double the size of your arrays
        size *= 2

    return array

In [38]:
run_sorting_algorithm(algorithm="timsort", array=array)

setup code: from __main__ import timsort
Algorithm: timsort. Minimum execution time: 0.46197556192055345
