In [1]:
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from itertools import product
from pandas import DataFrame
from pathlib import Path
import pickle

from sorts import run_sorts, Sorts

In [2]:
num_strings = [10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000, 12000, 14000, 16000, 18000, 20000, 22000, 24000, 25000]
string_lengths = [25, 35, 45]

In [3]:
# iters = 1

# results = dict()

# for algorithm in Sorts:
#     results[algorithm.name] = dict()
#     for direction in Direction:
#         results[algorithm.name][direction.name] = dict()
#         for num_vects, vect_size in product(input_sizes, vect_sizes):
#             results[algorithm.name][direction.name][(num_vects, vect_size)] = [run_sorts(num_vects, vect_size, algorithm, direction)['sort'] for _ in range(iters)]

In [4]:
iters = 10

parameters = list(product(num_strings, string_lengths, Sorts, range(iters)))
print(len(parameters))

# with open('parameters.p', 'wb') as fout:
#     pickle.dump(parameters, fout)

1260


In [5]:
def get_chunk(size: int, parameters: list):
    for start in range(0, len(parameters), size):
        yield parameters[start: start+size]

In [6]:
chunk_size = 100

chunker = get_chunk(chunk_size, parameters)

for n, chunk in enumerate(chunker): 
    file = Path(f'results_chunk_{n}_{chunk_size}.p')
    if file.exists():
        continue
    
    with ProcessPoolExecutor(max_workers=100) as executor:
        futures = [executor.submit(run_sorts, *params) for params in chunk]

    results = [fut.result() for fut in futures]
        

    rows = [{'num_strings': params[0],
             'string_length': params[1],
             'algorithm': params[2].name,
             'iteration': params[3],
             'real': result['sort'][0],
             'user': result['sort'][1],
             'sys':  result['sort'][2]} for params, result in zip(chunk, results)]
    
    df = DataFrame(rows)
    
    print(f'writing file {file}')
    with open(file, 'wb') as fout:
        pickle.dump(df, fout)

writing file results_chunk_1_100.p
writing file results_chunk_2_100.p
writing file results_chunk_3_100.p
writing file results_chunk_4_100.p
writing file results_chunk_5_100.p
writing file results_chunk_6_100.p
writing file results_chunk_7_100.p
writing file results_chunk_8_100.p
writing file results_chunk_9_100.p
writing file results_chunk_10_100.p
writing file results_chunk_11_100.p
writing file results_chunk_12_100.p


In [21]:
run_sorts(25000, 35, Sorts.RADIX_INSERTION)

{'generate': ('7ms', '0ms', '0ms'), 'sort': ('14131ms', '13940ms', '80ms')}

In [38]:
run_sorts(25000, 45, Sorts.RADIX_COUNTING)

{'generate': ('11ms', '10ms', '0ms'), 'sort': ('26ms', '20ms', '0ms')}

In [8]:
def average_tuple_samples(tuples: list):
    n = len(tuples)
    real, user, sys = zip(*tuples)
    real = sum(int(time[:-2]) for time in real) / n
    user = sum(int(time[:-2]) for time in user) / n
    sys = sum(int(time[:-2]) for time in sys) / n
    return real, user, sys

ave_results = dict()

for algorithm, directions in results.items():
    ave_results[algorithm] = dict()
    for direction, parameters in directions.items():
        ave_results[algorithm][direction] = dict()
        for n_m, samples in parameters.items():
            ave_results[algorithm][direction][n_m] = average_tuple_samples(samples)

In [9]:
rows = list()

for algorithm, directions in ave_results.items():
    for direction, parameters in directions.items():
        for (n, m), (real, user, sys) in parameters.items():
            rows.append({
                'algorithm': algorithm,
                'data': direction,
                'n': n,
                'm': m,
                'real': real,
                'user': user,
                'sys': sys
            })
df = DataFrame(rows).set_index('algorithm')
data_map = {
    'RANDOM': "Random Vector",
    'SORTED': "Sorted Vector",
    'REVERSE': "Inverse Sorted Vector"
}
df['data'] = df['data'].map(data_map)

In [10]:
insertion_data = df.loc['INSERTION']
improved_insertion_data = df.loc['INSERTION_IMPROVED']
merge_data = df.loc['MERGE']

In [11]:
insertion_data.pivot_table(index='m', columns=['n', 'data'], values='real')

n,10,10,10,20,20,20
data,Inverse Sorted Vector,Random Vector,Sorted Vector,Inverse Sorted Vector,Random Vector,Sorted Vector
m,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
10,0.0,0.0,0.0,0.0,0.0,0.0
25,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
improved_insertion_data.pivot_table(index='m', columns=['n', 'data'], values='real')

n,10,10,10,20,20,20
data,Inverse Sorted Vector,Random Vector,Sorted Vector,Inverse Sorted Vector,Random Vector,Sorted Vector
m,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
10,0.0,0.0,0.0,0.0,0.0,0.0
25,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
merge_data.pivot_table(index='m', columns=['n', 'data'], values='real')

n,10,10,10,20,20,20
data,Inverse Sorted Vector,Random Vector,Sorted Vector,Inverse Sorted Vector,Random Vector,Sorted Vector
m,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
10,0.0,0.0,0.0,0.0,0.0,0.0
25,0.0,0.0,0.0,0.0,0.0,0.0
