In [1]:
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from itertools import product
from pandas import DataFrame
from pathlib import Path
import pickle

from sorts import run_sorts, Sorts, Direction

In [2]:
# run_sorts(1, 1, Sorts.INSERTION, Direction.RANDOM)

In [3]:
input_sizes = [1000, 2500, 5000, 10000, 25000, 50000, 100000, 250000]
vect_sizes = [10, 25, 50]
# input_sizes = [1000, 20000]
# vect_sizes = [10, 25]

In [4]:
# iters = 1

# results = dict()

# for algorithm in Sorts:
#     results[algorithm.name] = dict()
#     for direction in Direction:
#         results[algorithm.name][direction.name] = dict()
#         for num_vects, vect_size in product(input_sizes, vect_sizes):
#             results[algorithm.name][direction.name][(num_vects, vect_size)] = [run_sorts(num_vects, vect_size, algorithm, direction)['sort'] for _ in range(iters)]

In [5]:
iters = 10

parameters = list(product(input_sizes, vect_sizes, Sorts, Direction, range(iters)))
print(len(parameters))

# with open('parameters.p', 'wb') as fout:
#     pickle.dump(parameters, fout)

2160


In [6]:
def get_chunk(size: int, parameters: list):
    for start in range(0, len(parameters), size):
        yield parameters[start: start+size]

In [7]:
chunk_size = 100

chunker = get_chunk(chunk_size, parameters)

for n, chunk in enumerate(chunker): 
    file = Path(f'results_chunk_{n}_{chunk_size}.p')
    if file.exists():
        continue
    
    with ProcessPoolExecutor(max_workers=100) as executor:
#         results = executor.map(run_sorts_params, parameters)
        futures = [executor.submit(run_sorts, *params) for params in chunk]
#         results = executor.map(run_sorts, *list(zip(*chunk)))

    results = [fut.result() for fut in futures]
        

    rows = [{'num_vects': params[0],
             'vect_size': params[1],
             'algorithm': params[2].name,
             'data_type': params[3].name,
             'iteration': params[4],
             'real': result['sort'][0],
             'user': result['sort'][1],
             'sys':  result['sort'][2]} for params, result in zip(chunk, results)]
    
    df = DataFrame(rows)
    
    print(f'writing file {file}')
    with open(file, 'wb') as fout:
        pickle.dump(df, fout)

writing file results_chunk_20_100.p
writing file results_chunk_21_100.p


In [118]:
run_sorts(250000, 5, Sorts.MERGE, Direction.RANDOM)

{'generate': ('38ms', '30ms', '0ms'), 'sort': ('200ms', '190ms', '10ms')}

In [50]:
run_sorts(50000, 40, Sorts.INSERTION_IMPROVED, Direction.RANDOM)

{'generate': ('34ms', '20ms', '0ms'), 'sort': ('2075ms', '2040ms', '20ms')}

In [8]:
def average_tuple_samples(tuples: list):
    n = len(tuples)
    real, user, sys = zip(*tuples)
    real = sum(int(time[:-2]) for time in real) / n
    user = sum(int(time[:-2]) for time in user) / n
    sys = sum(int(time[:-2]) for time in sys) / n
    return real, user, sys

ave_results = dict()

for algorithm, directions in results.items():
    ave_results[algorithm] = dict()
    for direction, parameters in directions.items():
        ave_results[algorithm][direction] = dict()
        for n_m, samples in parameters.items():
            ave_results[algorithm][direction][n_m] = average_tuple_samples(samples)

In [9]:
rows = list()

for algorithm, directions in ave_results.items():
    for direction, parameters in directions.items():
        for (n, m), (real, user, sys) in parameters.items():
            rows.append({
                'algorithm': algorithm,
                'data': direction,
                'n': n,
                'm': m,
                'real': real,
                'user': user,
                'sys': sys
            })
df = DataFrame(rows).set_index('algorithm')
data_map = {
    'RANDOM': "Random Vector",
    'SORTED': "Sorted Vector",
    'REVERSE': "Inverse Sorted Vector"
}
df['data'] = df['data'].map(data_map)

In [10]:
insertion_data = df.loc['INSERTION']
improved_insertion_data = df.loc['INSERTION_IMPROVED']
merge_data = df.loc['MERGE']

In [11]:
insertion_data.pivot_table(index='m', columns=['n', 'data'], values='real')

n,10,10,10,20,20,20
data,Inverse Sorted Vector,Random Vector,Sorted Vector,Inverse Sorted Vector,Random Vector,Sorted Vector
m,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
10,0.0,0.0,0.0,0.0,0.0,0.0
25,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
improved_insertion_data.pivot_table(index='m', columns=['n', 'data'], values='real')

n,10,10,10,20,20,20
data,Inverse Sorted Vector,Random Vector,Sorted Vector,Inverse Sorted Vector,Random Vector,Sorted Vector
m,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
10,0.0,0.0,0.0,0.0,0.0,0.0
25,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
merge_data.pivot_table(index='m', columns=['n', 'data'], values='real')

n,10,10,10,20,20,20
data,Inverse Sorted Vector,Random Vector,Sorted Vector,Inverse Sorted Vector,Random Vector,Sorted Vector
m,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
10,0.0,0.0,0.0,0.0,0.0,0.0
25,0.0,0.0,0.0,0.0,0.0,0.0
