In [1]:
import time
import numpy as np 
import random

# Insertion Sort

In [2]:
def insertion_sort(array):

    for step in range(1, len(array)):
        key = array[step]
        j = step - 1
        
        # Compare the element with the ones before it
        # If it is smaller than the ones before it, then move the larger ones to current position 
        while j >= 0 and key < array[j]:
            array[j + 1] = array[j]
            j = j - 1
        
        # Once an element smaller than it is found 
        # Place key at after it
        array[j + 1] = key
    
    return array 


# Merge Sort

In [3]:
def mergesort(array):

    # If the array only got 1 element 
    if (len(array) > 1):
        mid = len(array) // 2 
        left = array[:mid].copy()
        right = array[mid:].copy()

        mergesort(left)
        # print(left, right)
        mergesort(right)
        
        merge(array, left, right)
    
    return array
    

def merge(array, left, right):
    

    i = j = k = 0
    
    # while length of both right and left are not 0 z
    while(i < len(left) and j < len(right)):
        # compare first element of the 2 halves 
        if(left[i] < right[j]):
            array[k] = left[i]                
            i += 1
            k += 1
        else:
            array[k] = right[j]
            j += 1
            k += 1 

    # If there are still remaining elements in either half 
    while (i < len(left)):
        array[k] = left[i]
        i += 1
        k += 1

    while (j < len(right)):
        array[k] = right[j]
        k += 1
        j += 1
    
    return array

# Hybrid Algorithm

In [33]:
def hybrid_sort(array, S):
    if (len(array) <= S):
        insertion_sort(array)
    else:
        mid = len(array) // 2 
        left = array[:mid].copy()
        right = array[mid:].copy()

        mergesort(left)
        mergesort(right)

        merge(array, left, right)

    return array 

# Testing

Creating Random Arrays

In [76]:
array_test_0 = np.array(np.random.randint(10, size = 10))
array_correct_0 = np.array(sorted(array_test_0)) # to compare against our implemented algorithms to make sure they work

array_test_1 = np.array(np.random.randint(10, size = 100))
array_correct_1 = np.array(sorted(array_test_1))

array_test_2 = np.array(np.random.randint(10, size = 1000))
array_correct_2 = np.array(sorted(array_test_2))

array_test_3 = np.array(np.random.randint(10, size = 10000))
array_correct_3 = np.array(sorted(array_test_3))

array_test_4 = np.array(np.random.randint(10, size = 100000))
array_correct_4 = np.array(sorted(array_test_4))

array_test_5 = np.array(np.random.randint(10, size = 1000000))
array_correct_5 = np.array(sorted(array_test_5))

array_test_6 = np.array(np.random.randint(10, size = 10000000))
array_correct_6 = np.array(sorted(array_test_6))

### Testing Hybrid Sort VS Merge Sort 
- For array sizes 10, 100, 1000, 10000

In [82]:
for i in range(5):
    timer = time.perf_counter()
    test = hybrid_sort(np.copy(eval(f"array_test_{i}")), 7)
    array_size = len(eval(f"array_test_{i}"))
    print(f"Time to sort array of size {array_size} is {time.perf_counter() - timer} seconds")
    if np.array_equal(test, eval(f"array_correct_{i}")):
        print(f"Hybrid sorted correctly")
        
    timer = time.perf_counter()
    test = mergesort(np.copy(eval(f"array_test_{i}")))
    array_size = len(eval(f"array_test_{i}"))
    print(f"Time to sort array of size {array_size} is {time.perf_counter() - timer} seconds")
    if np.array_equal(test, eval(f"array_correct_{i}")):
        print(f"Merge sorted correctly \n")

Time to sort array of size 10 is 0.0001823999991756864 seconds
Hybrid sorted correctly
Time to sort array of size 10 is 0.0002447000006213784 seconds
Merge sorted correctly 

Time to sort array of size 100 is 0.0020479000013438053 seconds
Hybrid sorted correctly
Time to sort array of size 100 is 0.0019839999949908815 seconds
Merge sorted correctly 

Time to sort array of size 1000 is 0.01601739999750862 seconds
Hybrid sorted correctly
Time to sort array of size 1000 is 0.010624900001857895 seconds
Merge sorted correctly 

Time to sort array of size 10000 is 0.12168699999892851 seconds
Hybrid sorted correctly
Time to sort array of size 10000 is 0.11350189999939175 seconds
Merge sorted correctly 

Time to sort array of size 100000 is 1.2347642999957316 seconds
Hybrid sorted correctly
Time to sort array of size 100000 is 1.291285200000857 seconds
Merge sorted correctly 



### Testing for an array size of 1 million

In [84]:
    timer = time.perf_counter()
    test = hybrid_sort(np.copy(array_test_6), 7)
    array_size = len(array_test_6)
    print(f"Time to sort array of size {array_size} is {time.perf_counter() - timer} seconds")
    if np.array_equal(test, array_correct_6):
        print(f"Hybrid sorted correctly")
        
    timer = time.perf_counter()
    test = mergesort(np.copy(array_test_6))
    array_size = len(array_test_6)
    print(f"Time to sort array of size {array_size} is {time.perf_counter() - timer} seconds")
    if np.array_equal(test, array_correct_6):
        print(f"Merge sorted correctly \n")

Time to sort array of size 10000000 is 175.6329906999963 seconds
Hybrid sorted correctly
Time to sort array of size 10000000 is 176.59346269999514 seconds
Merge sorted correctly 



### Best, Worst and Avg Case (Taking array size of 1000)
- Best Case: Already sorted 
- Worst Case: Reverse sorted 
- Avg Case: Randomly generated array

In [94]:
# Sample Test Cases (Using Array size of 1000)

# Best Case --> Array already sorted 
avg_case = np.array(np.random.randint(10, size = 1000))
best_case = np.array(sorted(avg_case))
worst_case = np.flip(best_case)


print("==== TESTING BEST CASE ====")

timer = time.perf_counter()
test = mergesort(np.copy(best_case))
print(f"Time to sort array of size {len(best_case)} is {time.perf_counter() - timer} seconds")
if np.array_equal(test, best_case):
    print(f"Merge sorted correctly \n")


timer = time.perf_counter()
test = hybrid_sort(np.copy(best_case), 7)
print(f"Time to sort array of size {len(best_case)} is {time.perf_counter() - timer} seconds")
if np.array_equal(test, best_case):
    print(f"Hybrid sorted correctly \n")


print("==== TESTING WORST CASE ====")


timer = time.perf_counter()
test = mergesort(np.copy(worst_case))
print(f"Time to sort array of size {len(best_case)} is {time.perf_counter() - timer} seconds")
if np.array_equal(test, best_case):
    print(f"Merge sorted correctly \n")


timer = time.perf_counter()
test = hybrid_sort(np.copy(worst_case), 7)
print(f"Time to sort array of size {len(best_case)} is {time.perf_counter() - timer} seconds")
if np.array_equal(test, best_case):
    print(f"Hybrid sorted correctly \n")


print("==== TESTING AVERAGE CASE ====")


timer = time.perf_counter()
test = mergesort(np.copy(avg_case))
print(f"Time to sort array of size {len(best_case)} is {time.perf_counter() - timer} seconds")
if np.array_equal(test, best_case):
    print(f"Merge sorted correctly \n")


timer = time.perf_counter()
test = hybrid_sort(np.copy(avg_case), 7)
print(f"Time to sort array of size {len(best_case)} is {time.perf_counter() - timer} seconds")
if np.array_equal(test, best_case):
    print(f"Hybrid sorted correctly \n")

==== TESTING BEST CASE ====
Time to sort array of size 1000 is 0.010883800001465715 seconds
Merge sorted correctly 

Time to sort array of size 1000 is 0.010043199996289331 seconds
Hybrid sorted correctly 

==== TESTING WORST CASE ====
Time to sort array of size 1000 is 0.008283199997094925 seconds
Merge sorted correctly 

Time to sort array of size 1000 is 0.008177000003342982 seconds
Hybrid sorted correctly 

==== TESTING AVERAGE CASE ====
Time to sort array of size 1000 is 0.007704199997533578 seconds
Merge sorted correctly 

Time to sort array of size 1000 is 0.009088699996937066 seconds
Hybrid sorted correctly 

