In [25]:
import random
from statistics import mean
import pandas as pd
from collections import defaultdict
from matplotlib import pyplot as plt
import numpy as np

import MiniProject1
import bubble_sort
import heap_sort
import insertion_sort
import merge_sort
import quick_sort
import selection_sort
import trie

In [2]:
lst = ['selection_sort', 'insertion_sort', 'heap_sort', 'merge_sort', 'trie', 'bubble_sort', 'quick_sort']
alphabet = sorted((list(set(''.join(lst)))))

print('alphabet'.ljust(15),alphabet)
print('original'.ljust(15),lst)
print('reverse'.ljust(15),MiniProject1.reverse(lst))
print('bubble_sort'.ljust(15),bubble_sort.bubble_sort(lst))
print('heap_sort'.ljust(15),heap_sort.heap_sort(lst))
print('insertion_sort'.ljust(15),insertion_sort.insertion_sort(lst))
print('merge_sort'.ljust(15),merge_sort.merge_sort(lst))
print('quick_sort'.ljust(15),quick_sort.quick_sort(lst))
print('3way_quick_sort'.ljust(15),quick_sort.quick_sort_3way(lst))
print('selection_sort'.ljust(15),selection_sort.selection_sort(lst))
print('list_trie_sort'.ljust(15),trie.list_trie_sort(lst,alphabet))
print('dict_trie_sort'.ljust(15),trie.dict_trie_sort(lst,alphabet))
print('bubble_sort'.ljust(15),bubble_sort.bubble_sort(lst,reverse=True))
print('heap_sort'.ljust(15),heap_sort.heap_sort(lst,reverse=True))
print('insertion_sort'.ljust(15),insertion_sort.insertion_sort(lst,reverse=True))
print('merge_sort'.ljust(15),merge_sort.merge_sort(lst,reverse=True))
print('quick_sort'.ljust(15),quick_sort.quick_sort(lst,reverse=True))
print('3way_quick_sort'.ljust(15),quick_sort.quick_sort_3way(lst,reverse=True))
print('selection_sort'.ljust(15),selection_sort.selection_sort(lst,reverse=True))
print('list_trie_sort'.ljust(15),trie.list_trie_sort(lst,alphabet,reverse=True))
print('dict_trie_sort'.ljust(15),trie.dict_trie_sort(lst,alphabet,reverse=True))

alphabet        ['_', 'a', 'b', 'c', 'e', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u']
original        ['selection_sort', 'insertion_sort', 'heap_sort', 'merge_sort', 'trie', 'bubble_sort', 'quick_sort']
reverse         ['quick_sort', 'bubble_sort', 'trie', 'merge_sort', 'heap_sort', 'insertion_sort', 'selection_sort']
bubble_sort     ['bubble_sort', 'heap_sort', 'insertion_sort', 'merge_sort', 'quick_sort', 'selection_sort', 'trie']
heap_sort       ['bubble_sort', 'heap_sort', 'insertion_sort', 'merge_sort', 'quick_sort', 'selection_sort', 'trie']
insertion_sort  ['bubble_sort', 'heap_sort', 'insertion_sort', 'merge_sort', 'quick_sort', 'selection_sort', 'trie']
merge_sort      ['bubble_sort', 'heap_sort', 'insertion_sort', 'merge_sort', 'quick_sort', 'selection_sort', 'trie']
quick_sort      ['bubble_sort', 'heap_sort', 'insertion_sort', 'merge_sort', 'quick_sort', 'selection_sort', 'trie']
3way_quick_sort ['bubble_sort', 'heap_sort', 'insertion_sort', 'merge

In [3]:
text = MiniProject1.read_file('shakespeare-complete-works.txt')
words = MiniProject1.sanitize_string(text)
alphabet = sorted((list(set(''.join(words)))))

# Selection Sort

In [4]:
selection_sort_runtimes = []
for _ in range(10):
    arr = random.sample(words,k=30_000)
    time_elapsed = MiniProject1.time_function(selection_sort.selection_sort,arr)
    selection_sort_runtimes.append(time_elapsed)
    print(f'selection_sort with {len(arr)} items took {time_elapsed} seconds')

selection_sort with 30000 items took 145.27603673934937 seconds
selection_sort with 30000 items took 137.50987029075623 seconds
selection_sort with 30000 items took 137.36583757400513 seconds
selection_sort with 30000 items took 135.98756551742554 seconds
selection_sort with 30000 items took 133.30690741539001 seconds
selection_sort with 30000 items took 134.24925231933594 seconds
selection_sort with 30000 items took 134.33917999267578 seconds
selection_sort with 30000 items took 137.66110587120056 seconds
selection_sort with 30000 items took 134.19113516807556 seconds
selection_sort with 30000 items took 134.54934310913086 seconds


# Insertion Sort

In [5]:
insertion_sort_runtimes = []
for _ in range(10):
    arr = random.sample(words,k=30_000)
    time_elapsed = MiniProject1.time_function(insertion_sort.insertion_sort,arr)
    insertion_sort_runtimes.append(time_elapsed)
    print(f'insertion_sort with {len(arr)} items took {time_elapsed} seconds')

insertion_sort with 30000 items took 101.52496337890625 seconds
insertion_sort with 30000 items took 101.31191110610962 seconds
insertion_sort with 30000 items took 102.01908302307129 seconds
insertion_sort with 30000 items took 110.55416584014893 seconds
insertion_sort with 30000 items took 100.3598530292511 seconds
insertion_sort with 30000 items took 101.30678081512451 seconds
insertion_sort with 30000 items took 100.79165315628052 seconds
insertion_sort with 30000 items took 100.96283173561096 seconds
insertion_sort with 30000 items took 102.92517328262329 seconds
insertion_sort with 30000 items took 107.78833293914795 seconds


# Heap Sort

In [6]:
heap_sort_runtimes = []
for _ in range(10):
    time_elapsed = MiniProject1.time_function(heap_sort.heap_sort,words)
    heap_sort_runtimes.append(time_elapsed)
    print(f'heap_sort with {len(words)} items took {time_elapsed} seconds')

heap_sort with 908223 items took 24.338152647018433 seconds
heap_sort with 908223 items took 21.419453620910645 seconds
heap_sort with 908223 items took 23.47134232521057 seconds
heap_sort with 908223 items took 21.334431886672974 seconds
heap_sort with 908223 items took 22.535728693008423 seconds
heap_sort with 908223 items took 21.787537813186646 seconds
heap_sort with 908223 items took 21.244409799575806 seconds
heap_sort with 908223 items took 21.139382123947144 seconds
heap_sort with 908223 items took 21.311426639556885 seconds
heap_sort with 908223 items took 22.57873558998108 seconds


# Merge Sort

In [7]:
merge_sort_runtimes = []
for _ in range(10):
    time_elapsed = MiniProject1.time_function(merge_sort.merge_sort,words)
    merge_sort_runtimes.append(time_elapsed)
    print(f'merge_sort with {len(words)} items took {time_elapsed} seconds')

merge_sort with 908223 items took 14.797749757766724 seconds
merge_sort with 908223 items took 13.36539340019226 seconds
merge_sort with 908223 items took 13.0583176612854 seconds
merge_sort with 908223 items took 13.016315221786499 seconds
merge_sort with 908223 items took 12.954294204711914 seconds
merge_sort with 908223 items took 12.888281345367432 seconds
merge_sort with 908223 items took 13.207359313964844 seconds
merge_sort with 908223 items took 12.87727427482605 seconds
merge_sort with 908223 items took 13.013316631317139 seconds
merge_sort with 908223 items took 12.940288543701172 seconds


# Trie Sort with list implementation

In [71]:
list_trie_sort_runtimes = []
for _ in range(10):
    time_elapsed = MiniProject1.time_function(trie.list_trie_sort,words,alphabet=alphabet)
    list_trie_sort_runtimes.append(time_elapsed)
    print(f'trie_sort with {len(words)} items took {time_elapsed} seconds')

trie_sort with 908223 items took 12.65021824836731 seconds


KeyboardInterrupt: 

# Trie Sort with dict implementation

In [9]:
dict_trie_sort_runtimes = []
for _ in range(10):
    time_elapsed = MiniProject1.time_function(trie.dict_trie_sort,words,alphabet=alphabet)
    dict_trie_sort_runtimes.append(time_elapsed)
    print(f'trie_sort with {len(words)} items took {time_elapsed} seconds')

trie_sort with 908223 items took 4.329096555709839 seconds
trie_sort with 908223 items took 3.9800119400024414 seconds
trie_sort with 908223 items took 3.9119911193847656 seconds
trie_sort with 908223 items took 3.9169974327087402 seconds
trie_sort with 908223 items took 3.6669349670410156 seconds
trie_sort with 908223 items took 3.7559592723846436 seconds
trie_sort with 908223 items took 4.536127805709839 seconds
trie_sort with 908223 items took 3.8939883708953857 seconds
trie_sort with 908223 items took 3.856976270675659 seconds
trie_sort with 908223 items took 3.788961410522461 seconds


# Bubble Sort

In [10]:
bubble_sort_runtimes = []
for _ in range(10):
    arr = random.sample(words,k=30_000)
    time_elapsed = MiniProject1.time_function(bubble_sort.bubble_sort,arr)
    bubble_sort_runtimes.append(time_elapsed)
    print(f'bubble_sort with {len(arr)} items took {time_elapsed} seconds')

bubble_sort with 30000 items took 231.4428973197937 seconds
bubble_sort with 30000 items took 232.32106137275696 seconds
bubble_sort with 30000 items took 229.86359882354736 seconds
bubble_sort with 30000 items took 235.95406937599182 seconds
bubble_sort with 30000 items took 228.6401653289795 seconds
bubble_sort with 30000 items took 227.95213055610657 seconds
bubble_sort with 30000 items took 226.34274411201477 seconds
bubble_sort with 30000 items took 228.08618712425232 seconds
bubble_sort with 30000 items took 227.1368260383606 seconds
bubble_sort with 30000 items took 228.90939736366272 seconds


# Quick Sort


In [11]:
quick_sort_runtimes = []
for _ in range(10):
    # may run into
    # maximum recursion depth exceeded while calling a Python object
    # for bigger datasets
    arr = random.sample(words,k=90_000)
    time_elapsed = MiniProject1.time_function(quick_sort.quick_sort,arr)
    quick_sort_runtimes.append(time_elapsed)
    print(f'quick_sort with {len(arr)} items took {time_elapsed} seconds')

quick_sort with 90000 items took 8.401270151138306 seconds
quick_sort with 90000 items took 8.052036046981812 seconds
quick_sort with 90000 items took 7.882044792175293 seconds
quick_sort with 90000 items took 7.939014196395874 seconds
quick_sort with 90000 items took 7.751973390579224 seconds
quick_sort with 90000 items took 7.983029842376709 seconds
quick_sort with 90000 items took 7.792982578277588 seconds
quick_sort with 90000 items took 8.19208025932312 seconds
quick_sort with 90000 items took 9.033417463302612 seconds
quick_sort with 90000 items took 7.910141468048096 seconds


# 3way Quick Sort

In [12]:
quick_sort_3way_runtimes = []
for _ in range(10):
    time_elapsed = MiniProject1.time_function(quick_sort.quick_sort_3way,words)
    quick_sort_3way_runtimes.append(time_elapsed)
    print(f'quick_sort with {len(words)} items took {time_elapsed} seconds')

quick_sort with 908223 items took 5.0542824268341064 seconds
quick_sort with 908223 items took 5.084294080734253 seconds
quick_sort with 908223 items took 5.042286157608032 seconds
quick_sort with 908223 items took 5.041273355484009 seconds
quick_sort with 908223 items took 5.0412797927856445 seconds
quick_sort with 908223 items took 4.690188407897949 seconds
quick_sort with 908223 items took 4.698192596435547 seconds
quick_sort with 908223 items took 4.89024543762207 seconds
quick_sort with 908223 items took 4.80422043800354 seconds
quick_sort with 908223 items took 5.058284759521484 seconds


In [13]:
print(f'bubble_sort     mean runtime with {str(30_000).rjust(7)} items was {mean(bubble_sort_runtimes)} seconds')
print(f'selection_sort  mean runtime with {str(30_000).rjust(7)} items was {mean(selection_sort_runtimes)} seconds')
print(f'insertion_sort  mean runtime with {str(30_000).rjust(7)} items was {mean(insertion_sort_runtimes)} seconds')
print('-')
print(f'quick_sort      mean runtime with {str(90_000).rjust(7)} items was {mean(quick_sort_runtimes)} seconds')
print('-')
print(f'heap_sort       mean runtime with {str(len(words)).rjust(7)} items was {mean(heap_sort_runtimes)} seconds')
print(f'merge_sort      mean runtime with {str(len(words)).rjust(7)} items was {mean(merge_sort_runtimes)} seconds')
print(f'list_trie_sort  mean runtime with {str(len(words)).rjust(7)} items was {mean(list_trie_sort_runtimes)} seconds')
print(f'quick_sort_3way mean runtime with {str(len(words)).rjust(7)} items was {mean(quick_sort_3way_runtimes)} seconds')
print(f'dict_trie_sort  mean runtime with {str(len(words)).rjust(7)} items was {mean(dict_trie_sort_runtimes)} seconds')

bubble_sort     mean runtime with   30000 items was 229.66490774154664 seconds
selection_sort  mean runtime with   30000 items was 136.4436233997345 seconds
insertion_sort  mean runtime with   30000 items was 102.95447483062745 seconds
-
quick_sort      mean runtime with   90000 items was 8.093799018859864 seconds
-
heap_sort       mean runtime with  908223 items was 22.11606011390686 seconds
merge_sort      mean runtime with  908223 items was 13.211859035491944 seconds
list_trie_sort  mean runtime with  908223 items was 12.10770583152771 seconds
quick_sort_3way mean runtime with  908223 items was 4.940454745292664 seconds
dict_trie_sort  mean runtime with  908223 items was 3.963704514503479 seconds


![loglog-plot](sorting-runtimes.png)


In [73]:
from scipy.optimize import curve_fit
n2 = lambda n: n * n
nlogn = lambda n: n*np.log(n)

functions = ['dict_trie_sort','quick_sort_3way','list_trie_sort','merge_sort','heap_sort',
             'quick_sort','insertion_sort','selection_sort','bubble_sort']
n2 = ['insertion_sort','selection_sort','bubble_sort']
df = pd.read_csv('sorting-runtimes.csv')

times_dict = {func: defaultdict(list) for func in functions}

for idx, row in df.iterrows():
    if row['Algorithm'] not in functions:
        continue
    times_dict[row['Algorithm']][row['Data size']].append(row['Runtime'])

xys_dict = {func: MiniProject1.get_values(times_dict[func]) for func in functions}

algs = []
algs_reverse = []

for alg, xys in xys_dict.items():
    if 'reversed_' in alg:
        algs_reverse.append(alg)
    else:
        algs.append(alg)

for alg in algs:
    x,y = xys_dict[alg]
    if alg in n2:
#         coef = np.polyfit(x,y,2)
#         fit = np.poly1d(coef)
#         n = len(words)
        popt, pcov = curve_fit(n2, x, y, bounds=(0,1_000_000))
        print(popt,pcov)
    else:
#         coef = np.polyfit(np.log(x),y,1)
#         fit = np.poly1d(coef)
#         n = np.log(len(words))
        popt, pcov = curve_fit(nlogn,x,y, bounds=(0,1_000_000))
        print(popt,pcov)
    
    secs = fit(n)
    hours = secs / 60 / 60
    mins = hours % 1 * 60
    hours = int(hours)
    secs = mins % 1 * 60
    mins = int(mins)
    millis = int(secs % 1 * 1000)
    secs = int(secs)
    print(alg.ljust(15),str(hours).rjust(2)+':'+str(mins).rjust(2)+':'+str(secs).rjust(2)+'.'+str(millis))

ValueError: Unable to determine number of fit parameters.