In [4]:
import numpy as np
import random
import heapq
import timeit
import time

In [5]:
##Insertion Sort

def insertion_sort(array):
    for j in range(1, len(array)): 
        key = array[j] 
        i = j - 1 
        
        while i >= 0 and array[i] > key: 
            array[i + 1] = array[i] 
            i -= 1 
            
        array[i + 1] = key
        
    return array

Basic merge-sort code taken from https://www.geeksforgeeks.org/merge-sort/

Collaborated on the following functions with Anna Pauxberger.

In [6]:
## 3-way Merge Sort

def three_way_merge_sort(array):
    if len(array) >= 3:
        start = 0
        end = len(array) // 3
    
        left = three_merge_sort(array[:end])
        middle = three_merge_sort(array[end:end * 2])
        right = three_merge_sort(array[end * 2:])
    
        array = [x for x in heapq.merge(left, middle, right)]
    
    elif len(array) == 2:
        if array[0] > array[1]:
            array[0], array[1] = array[1], array[0]
            
    else:
        return array

    return array

The k-way merge function uses heapq.merge() on the different sublists that are generated in the loop to merge them back in order once they are sorted individually. I used heapq instead of a manually written merge function, since it can easily be expanded to work for any value of k (and writing a k-way merge function proved to be more difficult than I wanted the scope of my optional answer to be).


Similarly, I used the heapq.merge() function for my three-way sort, since I could not resist the beauty of such a simple solution (rather than manually comparing all three stacks against each other each time).

In [8]:
## k-way Merge Sort

def k_way_merge_sort(array, k):
    # sublist_end?
    if len(array) > 3*k:
        end = len(array)//k
    
        # for cases where len(a)//k would round to 0
        if end == 0:
            end = 1

            list_of_sublists = []

            for i in range(k):
                sublist = k_way_merge_sort(array[end*(i):end*(i+1)], k)
                list_of_sublists.append(sublist)

            array = [x for x in heapq.merge(*list_of_sublists)]

    else:
        array.sort()
#         array = insertion_sort(array)

    return array

In [9]:
## Data collection
sample_time = np.zeros([10,5])


def data_collection(array):
    col = 0
    row = 0

    #iterating over a range of k values and the corresponding
    #sample sizes to record the times it takes to sort them
    for k_exp in range(1, 11):
        k = 2 ** k_exp

        for s_exp in range(2, 7):
            sample = 10 ** s_exp
            sample_array = random.sample(range(1, sample*10), sample)
        
            #timing function to call in timeit.timeit
            def merge_timing():
                k_way_merge_sort(sample_array[:],k)


            array[row][col] = timeit.timeit(merge_timing, number=1)
            col += 1 
        
        col = 0
        row += 1
            
    return array

print (data_collection(sample_time))

[[  1.09672546e-05   4.60147858e-05   7.35998154e-04   4.54115868e-03
    6.24229908e-02]
 [  1.28746033e-05   3.09944153e-05   4.58955765e-04   6.51597977e-03
    7.12130070e-02]
 [  1.19209290e-05   3.98159027e-05   4.70876694e-04   5.84506989e-03
    6.83619976e-02]
 [  1.21593475e-05   2.00271606e-05   5.82218170e-04   6.65497780e-03
    7.96041489e-02]
 [  1.50203705e-05   4.69684601e-05   8.56876373e-04   5.62500954e-03
    6.59849644e-02]
 [  2.69412994e-05   2.00271606e-05   5.08069992e-04   6.30497932e-03
    6.63280487e-02]
 [  2.59876251e-05   2.09808350e-05   6.24179840e-04   6.72602654e-03
    6.74571991e-02]
 [  2.69412994e-05   2.59876251e-05   5.01871109e-04   5.63406944e-03
    6.38780594e-02]
 [  2.50339508e-05   2.21967697e-04   4.42028046e-04   6.07991219e-03
    7.48710632e-02]
 [  3.00407410e-05   2.43902206e-04   4.73976135e-04   6.25205040e-03
    6.96790218e-02]]
