# Imports

In [45]:
import time
import numpy as np
import functools
import math

# Timer Decorator

In [2]:
def timer(func):
    @functools.wraps(func)
    def wrapper_timer(*args, **kwargs):
        start = time.perf_counter()
        value = func(*args, **kwargs)
        end = time.perf_counter()
        elapsed = end - start
        print(f"Time taken to run the function: {elapsed} seconds")
        return value
    return wrapper_timer

# Test Cases

In [3]:
array_test_0 = np.array(np.random.randint(10, size = 10))
array_correct_0 = np.array(sorted(array_test_0)) # to compare against our implemented algorithms to make sure they work

array_test_1 = np.array(np.random.randint(100, size = 100))
array_correct_1 = np.array(sorted(array_test_1))

array_test_2 = np.array(np.random.randint(1000, size = 1000))
array_correct_2 = np.array(sorted(array_test_2))

array_test_3 = np.array(np.random.randint(10000, size = 10000))
array_correct_3 = np.array(sorted(array_test_3))

array_test_4 = np.array(np.random.randint(100000, size = 100000))
array_correct_4 = np.array(sorted(array_test_4))

array_test_5 = np.array(np.random.randint(1000000, size = 1000000))
array_correct_5 = np.array(sorted(array_test_5))

array_test_6 = np.array(np.random.randint(1000000, size = 10000000))
array_correct_6 = np.array(sorted(array_test_6))

# Sorting Algorithms

## Insertion Sort

In [18]:
# @timer
def insertion_sort(array):
    comparisons = 0
    temp = np.copy(array)
    for step in range(1, len(temp)):
        key = temp[step]
        j = step - 1
        
        # Compare key with each element on the left of it until an element smaller than it is found
        # For descending order, change key<array[j] to key>array[j].        
        while j >= 0 and key < temp[j]:
            temp[j + 1] = temp[j]
            j = j - 1
            comparisons += 1 # for counting key comparisons
        
        # Place key at after the element just smaller than it.
        temp[j + 1] = key
    
    return temp, comparisons

## Original MergeSort

In [33]:
# @timer
def mergesort(array):
    comparisons = 0
    # recursive step
    if len(array) == 1:
        return array, comparisons
    elif len(array) > 1: 
        mid = len(array)//2

        left = array[:mid]
        right = array[mid:]

        left, left_comparisons = mergesort(left)
        right, right_comparisons = mergesort(right)

    to_return, new_comparisons = merge(left, right)
    comparisons += new_comparisons
    comparisons += left_comparisons
    comparisons += right_comparisons

    return to_return, comparisons

In [6]:
def merge(left, right):
    to_return = []
    i = j = 0
    left_limit = len(left)
    right_limit = len(right)
    comparisons = 0

    while i != left_limit and j != right_limit:
        if left[0] < right[0]:
            to_return.append(left[0])
            left = np.delete(left, 0)
            i += 1
        else:
            to_return.append(right[0])
            right = np.delete(right, 0)
            j += 1
        comparisons += 1

    while i != left_limit and j == right_limit:
        to_return.append(left[0])
        left = np.delete(left, 0)
        i += 1

    while i == left_limit and j != right_limit:
        to_return.append(right[0])
        right = np.delete(right, 0)
        j += 1
    
    to_return = np.array(to_return)
    return to_return, comparisons

## Hybrid Sort

In [36]:
# @timer
def hybrid_sort(array, s):
    comparisons = 0
    if len(array) <= s:
        to_return, comparisons = insertion_sort(array)
        return to_return, comparisons
    elif len(array) > s:
        mid = len(array)//2

        left = array[:mid]
        right = array[mid:]

        left, left_comparisons = hybrid_sort(left, s)
        right, right_comparisons = hybrid_sort(right, s)

    to_return, new_comparisons = merge(left, right)
    comparisons += new_comparisons
    comparisons += right_comparisons
    comparisons += left_comparisons

    return to_return, comparisons

# Counting Key Comparisons

In [58]:
best_s_dictionary = {}

In [60]:
array_size = len(array_test_0)
optimal = math.inf
best_s = 1

print(f"Testing Key Comparisons for array size of {array_size}")
for S in range(1, 10):
    # print(f"S is at value of {S}")
    sorted_array, comparisons = hybrid_sort(array_test_0, S)
    if (np.array_equal(sorted_array, array_correct_0)):
        if (comparisons <= optimal):
            best_s = S
            optimal = comparisons
        # print(f"Number of comparisons done: {comparisons}")
        # print("----------------------------------")

print(f"Best S for array of size {array_size} is {best_s} with a total of {optimal} comparisons")
best_s_dictionary[array_size] = best_s

Testing Key Comparisons for array size of 10
Best S for array of size 10 is 4 with a total of 20 comparisons


In [61]:
for i in range(1, 6):
    array_size = len(eval(f"array_test_{i}"))
    optimal = math.inf
    best_s = 1
    
    print(f"Testing Key Comparisons for array size of {array_size}")
    for S in range(1, 10):
        # print(f"S is at value of {S}")
        sorted_array, comparisons = hybrid_sort(eval(f"array_test_{i}"), S)
        if (np.array_equal(sorted_array, eval(f"array_correct_{i}"))):
            if (comparisons <= optimal):
                best_s = S
                optimal = comparisons
            # print(f"Number of comparisons done: {comparisons}")
            # print("----------------------------------")
    
    print(f"Best S for array of size {array_size} is {best_s} with a total of {optimal} comparisons")
    best_s_dictionary[array_size] = best_s

Testing Key Comparisons for array size of 100
Best S for array of size 100 is 6 with a total of 502 comparisons
Testing Key Comparisons for array size of 1000
Best S for array of size 1000 is 6 with a total of 8295 comparisons
Testing Key Comparisons for array size of 10000
Best S for array of size 10000 is 8 with a total of 116231 comparisons
Testing Key Comparisons for array size of 100000
Best S for array of size 100000 is 5 with a total of 1497300 comparisons
Testing Key Comparisons for array size of 1000000
Best S for array of size 1000000 is 6 with a total of 18261032 comparisons


In [62]:
print(best_s_dictionary)

{100: 6, 10: 4, 1000: 6, 10000: 8, 100000: 5, 1000000: 6}


# Testing Hybrid

In [None]:
timer = time.perf_counter()
test, temp = hybrid_sort(array_test_1, 3)
print(f"Time to sort array of size {len(array_test_1)} is {time.perf_counter() - timer} seconds")
if np.array_equal(test, array_correct_6):
    print(f"Hybrid sorted correctly")

In [63]:
for i in range(6):
    array_size = len(eval(f"array_test_{i}"))
    timer = time.perf_counter()
    test, temp = hybrid_sort(eval(f"array_test_{i}"), best_s_dictionary[array_size])
    print(f"Time to sort array of size {array_size} is {time.perf_counter() - timer} seconds")
    if np.array_equal(test, eval(f"array_correct_{i}")):
        print(f"Hybrid sorted correctly")

Time to sort array of size 10 is 0.0006050000001778244 seconds
Hybrid sorted correctly
Time to sort array of size 100 is 0.003243500000280619 seconds
Hybrid sorted correctly
Time to sort array of size 1000 is 0.0459445999995296 seconds
Hybrid sorted correctly
Time to sort array of size 10000 is 0.6008443000000625 seconds
Hybrid sorted correctly
Time to sort array of size 100000 is 8.756572100000085 seconds
Hybrid sorted correctly


# Testing insertion sort

In [None]:
for i in range(5):
    array_size = len(eval(f"array_test_{i}"))
    timer = time.perf_counter()
    test, temp = insertion_sort(eval(f"array_test_{i}"))
    print(f"Time to sort array of size {array_size} is {time.perf_counter() - timer} seconds")
    if np.array_equal(test, eval(f"array_correct_{i}")):
        print(f"Hybrid sorted correctly")

# Testing merge sort

In [None]:
timer = time.perf_counter()
test, temp = mergesort(array_test_0)
print(f"Time to sort array of size {len(array_test_0)} is {time.perf_counter() - timer} seconds")
if np.array_equal(test, array_correct_0):
    print(f"Merge sorted correctly")

In [22]:
for i in range(6):
    array_size = len(eval(f"array_test_{i}"))
    timer = time.perf_counter()
    test, temp = mergesort(eval(f"array_test_{i}"))
    print(f"Time to sort array of size {array_size} is {time.perf_counter() - timer} seconds")
    if np.array_equal(test, eval(f"array_correct_{i}")):
        print(f"Merge sorted correctly")

Time to sort array of size 10 is 0.0007376000000931526 seconds
Merge sorted correctly
Time to sort array of size 100 is 0.006099599999970451 seconds
Merge sorted correctly
Time to sort array of size 1000 is 0.05398609999997461 seconds
Merge sorted correctly
Time to sort array of size 10000 is 0.7487736000000496 seconds
Merge sorted correctly
Time to sort array of size 100000 is 9.96908429999985 seconds
Merge sorted correctly
Time to sort array of size 1000000 is 309.4118951999999 seconds
Merge sorted correctly


# Direct Comparisons

In [69]:
runtime_dictionary = {}
for i in range(6):
    time_dict = {}
    array_size = len(eval(f"array_test_{i}"))

    # merge sort
    start = time.perf_counter()
    test, temp = mergesort(eval(f"array_test_{i}"))
    end = time.perf_counter()
    merge_time = end - start
    time_dict["merge"] = merge_time
    print(f"Time to mergesort array of size {array_size} is {merge_time:6f} seconds")

    # hybrid sort
    start = time.perf_counter()
    test, temp = hybrid_sort(eval(f"array_test_{i}"), best_s_dictionary[array_size])
    end = time.perf_counter()
    hybrid_time = end - start
    time_dict["hybrid"] = hybrid_time
    print(f"Time to hybrid sort array of size {array_size} is {hybrid_time:6f} seconds")

    runtime_dictionary[array_size] = time_dict
    print("---------------------------------------------------------------------")

Time to mergesort array of size 10 is 0.000477 seconds
Time to hybrid sort array of size 10 is 0.000358 seconds
---------------------------------------------------------------------
Time to mergesort array of size 100 is 0.004833 seconds
Time to hybrid sort array of size 100 is 0.002567 seconds
---------------------------------------------------------------------
Time to mergesort array of size 1000 is 0.054537 seconds
Time to hybrid sort array of size 1000 is 0.043209 seconds
---------------------------------------------------------------------
Time to mergesort array of size 10000 is 0.752548 seconds
Time to hybrid sort array of size 10000 is 0.627456 seconds
---------------------------------------------------------------------
Time to mergesort array of size 100000 is 9.663834 seconds
Time to hybrid sort array of size 100000 is 8.646311 seconds
---------------------------------------------------------------------
Time to mergesort array of size 1000000 is 305.208492 seconds
Time to 

In [70]:
print(runtime_dictionary)

{10: {'merge': 0.00047670000003563473, 'hybrid': 0.00035819999993691454}, 100: {'merge': 0.004833099999814294, 'hybrid': 0.0025673999998616637}, 1000: {'merge': 0.05453690000013012, 'hybrid': 0.04320929999994405}, 10000: {'merge': 0.7525475000002189, 'hybrid': 0.6274556999997003}, 100000: {'merge': 9.6638339000001, 'hybrid': 8.646310600000106}, 1000000: {'merge': 305.20849180000005, 'hybrid': 288.80708949999917}}
