In [23]:
# Code for sort algorythims in sort_collection is copied from:
# https://stackabuse.com/sorting-algorithms-in-python
# and
# https://www.programiz.com/dsa/radix-sort

In [24]:
import numpy as np
from datetime import datetime
import pandas as pd
from timeit import timeit
import datetime, time
import yaml

In [25]:
import sort_collection as sc
import test_collection as tc

In [26]:
runs    = 10000
min     = 0
max     = 1000
size    = 1000
reset_output_folder = False

In [27]:
algorithm_stmts = [
    ('selection_sort', 'sc.selection_sort(bucket)'),
    ('insertion_sort', 'sc.insertion_sort(bucket)'),
    ('heap_sort', 'sc.heap_sort(bucket)'),
    ('merge_sort', 'sc.merge_sort(bucket)'),
    ('quick_sort', 'sc.quick_sort(bucket)'),
    ('radix_sort', 'sc.radix_sort(bucket)'),
    ('crystal_sort', 'tc.crystal_sort(bucket)'),
    ('push_sort', 'tc.push_sort(bucket)'),
]

In [28]:
results = {}

In [29]:
def get_rand_set(min: int, max: int, size: int):
    set = np.random.randint(min, max, size=size)
    set = set.tolist()
    return set

buckets = []

for run in range(runs):
    bucket = get_rand_set(min, max, size)
    # bucket = [1] * 1000
    buckets.append(bucket)

In [30]:
time_field_name = 'time (us)'
results = {}

In [31]:
for algorithm_name, algorithm_stmt in algorithm_stmts:
    results[algorithm_name] = {}
    results[algorithm_name][time_field_name] = 0
    run_measurements = 0.0
    for bucket in buckets:
        measurement = timeit(
            algorithm_stmt,
            setup="""import sort_collection as sc
import test_collection as tc""",
            globals={"bucket": bucket},
            number=1)
        run_measurements += measurement
    
    results[algorithm_name][time_field_name] = run_measurements / runs

In [32]:
import os

def prep_output_folder(folder_name: str, reset_output_data: bool):
    '''
    Prepare folder for output csv files
    '''
    if not os.path.exists(folder_name):
        os.mkdir(folder_name)
    else:
        if reset_output_data:
            for root, directories, files in os.walk(folder_name):
                for file in files:
                    file_path = root + '/' + file
                    os.remove(file_path)

In [33]:
df_by_name = pd.DataFrame().from_dict(results, orient='index')
df_by_value = df_by_name.sort_values(by='time (us)', axis=0)
print('by name')
print(df_by_name)
print('by value')
print(df_by_value)
prep_output_folder('results', reset_output_folder)
file_name_by_name  = f'results/results_{runs:02}_runs__size_{size}__max_{max}_by_name.csv'
file_name_by_value = f'results/results_{runs:02}_runs__size_{size}__max_{max}_by_value.csv'
df_by_name.to_csv(file_name_by_name)
df_by_value.to_csv(file_name_by_value)

by name
                time (us)
crystal_sort     0.032430
heap_sort        0.003825
insertion_sort   0.000150
merge_sort       0.002135
push_sort        0.013249
quick_sort       0.001082
radix_sort       0.001227
selection_sort   0.027256
by value
                time (us)
insertion_sort   0.000150
quick_sort       0.001082
radix_sort       0.001227
merge_sort       0.002135
heap_sort        0.003825
push_sort        0.013249
selection_sort   0.027256
crystal_sort     0.032430
