In [3]:
# Code for sort algorythims in sort_collection is copied from:
# https://stackabuse.com/sorting-algorithms-in-python
# and
# https://www.programiz.com/dsa/radix-sort

In [4]:
import numpy as np
import pandas as pd
from timeit import timeit

In [5]:
runs    = 100
min     = 0
max     = 100
size    = 1000
reset_output_folder = False

In [6]:
from toolbox import lists as ls

algorithm_stmts = ls.algorithm_stmts

In [7]:
def get_rand_set(min: int, max: int, size: int):
    set = np.random.randint(min, max, size=size)
    set = set.tolist()
    return set

buckets = []

for run in range(runs):
    bucket = get_rand_set(min, max, size)
    # bucket = [1] * 1000
    buckets.append(bucket)

In [8]:
time_field_name = 'time (us)'
results = {}

In [9]:
for algorithm_name, algorithm_stmt in algorithm_stmts:
    results[algorithm_name] = {}
    results[algorithm_name][time_field_name] = 0
    run_measurements = 0.0
    for bucket in buckets:
        measurement = timeit(
            algorithm_stmt,
            setup="""from algorithms import sort_collection as sc
from algorithms import test_collection as tc""",
            globals={"bucket": bucket},
            number=1)
        run_measurements += measurement
    
    results[algorithm_name][time_field_name] = run_measurements / runs

In [10]:
import os

def prep_output_folder(folder_name: str, reset_output_data: bool):
    '''
    Prepare folder for output csv files
    '''
    if not os.path.exists(folder_name):
        os.mkdir(folder_name)
    else:
        if reset_output_data:
            for root, directories, files in os.walk(folder_name):
                for file in files:
                    file_path = root + '/' + file
                    os.remove(file_path)

In [11]:
df_by_name = pd.DataFrame().from_dict(results, orient='index')
df_by_value = df_by_name.sort_values(by='time (us)', axis=0)
print('by name')
print(df_by_name)
print('by value')
print(df_by_value)
prep_output_folder('results', reset_output_folder)
file_name_by_name  = f'results/results_{runs:02}_runs__size_{size}__max_{max}_by_name.csv'
file_name_by_value = f'results/results_{runs:02}_runs__size_{size}__max_{max}_by_value.csv'
df_by_name.to_csv(file_name_by_name)
df_by_value.to_csv(file_name_by_value)

by name
                time (us)
crystal_sort     0.007702
heap_sort        0.005666
insertion_sort   0.000254
merge_sort       0.003174
push_sort        0.021386
quick_sort       0.001536
radix_sort       0.001021
selection_sort   0.037362
by value
                time (us)
insertion_sort   0.000254
radix_sort       0.001021
quick_sort       0.001536
merge_sort       0.003174
heap_sort        0.005666
crystal_sort     0.007702
push_sort        0.021386
selection_sort   0.037362
