In [6]:
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from itertools import product
from pandas import DataFrame
from pathlib import Path
import pickle

from trees import run_tree_sort, Tree, Data

In [7]:
num_nodes = [10000, 20000, 30000, 40000, 50000, 65000, 80000, 95000, 100000, 110000, 115000, 125000]

In [8]:
iters = 10

parameters = list(product(num_nodes, Data, Tree, range(iters)))
print(len(parameters))

720


In [9]:
def get_chunk(size: int, parameters: list):
    for start in range(0, len(parameters), size):
        yield parameters[start: start+size]

In [10]:
chunk_size = 50

chunker = get_chunk(chunk_size, parameters)

for n, chunk in enumerate(chunker): 
    file = Path(f'results_chunk_{n}_{chunk_size}.p')
    if file.exists():
        continue
    
    with ProcessPoolExecutor(max_workers=chunk_size) as executor:
        futures = [executor.submit(run_tree_sort, *params) for params in chunk]

    results = [fut.result() for fut in futures]
        
    rows = [{'num_nodes': params[0],
             'data_type': params[1].name,
             'tree_type': params[2].name,
             'iteration': params[3],
             **result} for params, result in zip(chunk, results)]
    
    df = DataFrame(rows)
    
    print(f'writing file {file}')
    with open(file, 'wb') as fout:
        pickle.dump(df, fout)

writing file results_chunk_0_50.p
writing file results_chunk_1_50.p
writing file results_chunk_2_50.p
writing file results_chunk_3_50.p
writing file results_chunk_4_50.p
writing file results_chunk_5_50.p
writing file results_chunk_6_50.p
writing file results_chunk_7_50.p
writing file results_chunk_8_50.p
writing file results_chunk_9_50.p
writing file results_chunk_10_50.p
writing file results_chunk_11_50.p
writing file results_chunk_12_50.p
writing file results_chunk_13_50.p
writing file results_chunk_14_50.p
