```text
Algorithm: hybrid_sort(A, S)

Input:
    A → array of elements
    S → threshold size for switching to insertion sort

Procedure:
    if length(A) <= S then
        insertion_sort(A)
        return A
    else
        if length(A) <= 1 then
            return A
        mid = length(A) // 2
        B1 = hybrid_sort(A[0:mid], S)
        B2 = hybrid_sort(A[mid:length(A)], S)
        return merge(B1, B2)


In [1]:
import random
import time

In [2]:
class KeyComparisons:
    def __init__(self):
        self.key_comparisons = 0

    def isALargerThanB(self, a, b) -> bool:
        self.key_comparisons += 1
        return a > b

    def isALessThanB(self, a, b) -> bool:
        self.key_comparisons += 1
        return a < b

    def isALessThanOrEqualB(self, a, b) -> bool:
        self.key_comparisons += 1
        return a <= b

    def isAEqualB(self, a, b) -> bool:
        self.key_comparisons += 1
        return a == b

    def resetKeyComparisons(self):
        self.key_comparisons = 0
        
    def returnKeyComparisons(self):
        return self.key_comparisons

In [3]:
#swap elements i, j in the array A
def swap(A, i, j):
    A[i], A[j] = A[j], A[i]

In [4]:
#insertion sort
def insertion_sort(A, comparisons_obj):
    for i in range(1, len(A)): #element at index 0-considered sorted so we start from element at index 1
        for j in range(i, 0, -1): 
            if comparisons_obj.isALessThanB(A[j], A[j - 1]):  #self.key_comparisions += 1 then return true if a<b otherwise false, if true then swaps.
                swap(A, j, j - 1)  #swap if current < left neighbour
            else:  
                break
    return A

In [5]:
def merge(list1, list2, comparisons_obj):
    if list1 == [] or list2 == []:
        return list1 + list2
    
    sorted_list = []
    l1, l2 = list1[:], list2[:] #we shrink l1 and l2, DO NOT MODIFY ORIGINAL LISTS
    
    while l1 != [] and l2 != []:
        head1, head2 = l1[0], l2[0]
        if comparisons_obj.isALessThanOrEqualB(head1, head2):
            sorted_list.append(head1)
            l1 = l1[1:] #removing head1 from l1
        else:
            sorted_list.append(head2)
            l2 = l2[1:]
    
    return sorted_list + l1 + l2 #after one list runs out

In [6]:
def merge_sort(A, comparisons_obj):
    if len(A) <= 1:
        return A
    mid = len(A) // 2
    B1 = merge_sort(A[:mid], comparisons_obj)
    B2 = merge_sort(A[mid:], comparisons_obj)
    return merge(B1, B2, comparisons_obj)

In [7]:
def hybrid_sort(A, S, comparisons_obj):
    if len(A) <= S: #threshold check to determine whether to use insertion sort or merge sort
        return insertion_sort(A[:], comparisons_obj) 
    else:
        mid = len(A) // 2
        B1 = hybrid_sort(A[:mid], S, comparisons_obj)
        B2 = hybrid_sort(A[mid:], S, comparisons_obj)
        return merge(B1, B2, comparisons_obj)

In [8]:
array1 = [44,97,96,45,66,87,45,32,8,5,34,76,90,45,32,1,6,8,5,76,74,87,95]
comparisons_obj = KeyComparisons()   

print(hybrid_sort(array1,3, comparisons_obj))
print("Total comparisons:", comparisons_obj.key_comparisons)

[1, 5, 5, 6, 8, 8, 32, 32, 34, 44, 45, 45, 45, 66, 74, 76, 76, 87, 87, 90, 95, 96, 97]
Total comparisons: 75


In [9]:
array2 = [44,97,96,45,66,87,45,32,8,5,34,76,90,45,32,1,6,8,5,76,74,87,95]
comparisons_obj = KeyComparisons()   

print(merge_sort(array2, comparisons_obj))
print("Total comparisons:", comparisons_obj.key_comparisons)

[1, 5, 5, 6, 8, 8, 32, 32, 34, 44, 45, 45, 45, 66, 74, 76, 76, 87, 87, 90, 95, 96, 97]
Total comparisons: 77


In [10]:
#generates the array sizes from 1000 to 10 million systematically 
def generate_input_sizes():
    input_data_sizes = []
    
    for i in range(10): #each loop generates 4 different size scales
        input_data_sizes.append((i+1) * 1000)      #1000, 2000, ... 10000
        input_data_sizes.append((i+1) * 10000)     #10000, 20000, ... 100000
        input_data_sizes.append((i+1) * 100000)    #100000, .... 1M
        input_data_sizes.append((i+1) * 1000000)   #1M, 2M, ..., 10M
    
    # Removes duplicates(such as the 10000 which appears twice) and sorts the input data sizes
    input_data_sizes = sorted(set(input_data_sizes))
    return input_data_sizes

In [11]:
#testing
print(generate_input_sizes())

[1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000, 100000, 200000, 300000, 400000, 500000, 600000, 700000, 800000, 900000, 1000000, 2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000]


In [12]:
#will generate random arrays with the values in the range [1, size]
def generate_random_array(size):
#generates a random int between 1 and size and the for loop runs size times. each iteratiion, generates a random number in the given range and collates all those into a list
    return [random.randint(1, size) for _ in range(size)]

In [13]:
#testing 
print(generate_random_array(15))

[9, 7, 5, 1, 6, 1, 3, 7, 11, 5, 2, 2, 5, 9, 2]


In [14]:
def generate_all_datasets(seed=22): 
    random.seed(seed) #using the same seed ensures that everytime the same random arrays are produced 
    
    input_data_sizes = generate_input_sizes() #sorted list of sizes from 1K to 10M
    input_data = []
    
    for size in (input_data_sizes):
        array = generate_random_array(size)
        input_data.append(array)
        
    return input_data, input_data_sizes

In [15]:
#to verify that the datasets generated are correctly generated
def verify_datasets(input_data, input_data_sizes):
    
    for size, array in (zip(input_data_sizes, input_data)): #used zip so as to combine 2 lists into pairs
        min_val = min(array)
        max_val = max(array)
        
        print(f"Array Size: {size}, Min: {min_val}, Max: {max_val}")

In [16]:
def main():
    input_data, input_data_sizes = generate_all_datasets()
    verify_datasets(input_data, input_data_sizes)
    return input_data, input_data_sizes

In [18]:
if __name__ == "__main__":
    input_data, input_data_sizes = main()

Array Size: 1000, Min: 1, Max: 1000
Array Size: 2000, Min: 1, Max: 2000
Array Size: 3000, Min: 3, Max: 3000
Array Size: 4000, Min: 1, Max: 4000
Array Size: 5000, Min: 1, Max: 5000
Array Size: 6000, Min: 2, Max: 6000
Array Size: 7000, Min: 1, Max: 6998
Array Size: 8000, Min: 1, Max: 8000
Array Size: 9000, Min: 1, Max: 9000
Array Size: 10000, Min: 1, Max: 9999
Array Size: 20000, Min: 1, Max: 20000
Array Size: 30000, Min: 2, Max: 30000
Array Size: 40000, Min: 1, Max: 40000
Array Size: 50000, Min: 1, Max: 50000
Array Size: 60000, Min: 2, Max: 59999
Array Size: 70000, Min: 1, Max: 70000
Array Size: 80000, Min: 3, Max: 80000
Array Size: 90000, Min: 1, Max: 90000
Array Size: 100000, Min: 1, Max: 99997
Array Size: 200000, Min: 1, Max: 200000
Array Size: 300000, Min: 1, Max: 300000
Array Size: 400000, Min: 1, Max: 400000
Array Size: 500000, Min: 2, Max: 500000
Array Size: 600000, Min: 1, Max: 600000
Array Size: 700000, Min: 1, Max: 700000
Array Size: 800000, Min: 1, Max: 799999
Array Size: 9000

In [27]:
#testing
print(input_data[0])

[982, 930, 144, 249, 25, 628, 458, 189, 719, 124, 758, 668, 900, 354, 812, 911, 82, 238, 276, 983, 51, 328, 1000, 616, 184, 955, 566, 702, 751, 947, 441, 726, 905, 50, 591, 903, 22, 604, 271, 319, 432, 195, 185, 871, 115, 597, 544, 961, 747, 793, 985, 974, 580, 63, 716, 335, 628, 343, 265, 189, 980, 403, 320, 923, 800, 892, 978, 980, 667, 528, 146, 287, 846, 278, 684, 709, 190, 443, 51, 351, 545, 923, 34, 430, 948, 258, 519, 292, 530, 904, 420, 407, 677, 947, 939, 190, 315, 577, 383, 407, 597, 26, 305, 590, 610, 575, 557, 540, 444, 776, 690, 921, 541, 438, 91, 93, 455, 365, 101, 443, 800, 413, 60, 952, 862, 976, 202, 525, 245, 411, 7, 99, 470, 917, 199, 176, 31, 560, 386, 358, 184, 337, 932, 366, 645, 196, 455, 38, 65, 549, 138, 816, 798, 663, 757, 207, 672, 711, 104, 54, 200, 421, 125, 848, 152, 944, 602, 472, 45, 867, 123, 511, 725, 537, 139, 599, 292, 701, 762, 879, 485, 488, 745, 68, 659, 732, 363, 978, 803, 114, 840, 828, 624, 266, 966, 123, 19, 192, 513, 359, 650, 66, 349, 100, 1