In [None]:
from variant_selection_and_merge_methods import run_experiment

from process_optimizer import __ProcessOptimizerBase

from itertools import product, repeat, chain

import pandas
import numpy
import multiprocessing
import time

In [None]:
experiment_folder = '../experiment_data'
results_folder = '../experiment_results'

# Subset run to show variance magnitudes

In [None]:
tree_types = ['small_combinatory', 'large_combinatory']
resource_types = ['average_and_time_cost_specialized_resources']
rand_events = ['with_random_events']
runs = ['0']
variant_selection = [__ProcessOptimizerBase.ALL_VARIANTS, __ProcessOptimizerBase.REQUIRED_VARIANTS, __ProcessOptimizerBase.MIN_REQUIRED_VARIANTS]
merge_method = [__ProcessOptimizerBase.HIGHEST_COUNT_MERGE, __ProcessOptimizerBase.WEIGHTED_AVERAGE_MERGE, __ProcessOptimizerBase.PARETO_MERGE]
num_of_experiments = 10

In [None]:
exp_setup_list_size = len(list(product(tree_types, resource_types, rand_events, runs, variant_selection, merge_method)))
print(f'Experiment setup size: {exp_setup_list_size}\tTotal run size: {exp_setup_list_size * num_of_experiments}')

In [None]:
start_time = time.perf_counter()

pool = multiprocessing.Pool(10)
res_subset = list(chain.from_iterable(pool.starmap(run_experiment, zip(repeat(experiment_folder), repeat(num_of_experiments), product(tree_types, resource_types, rand_events, runs, variant_selection, merge_method)))))

pool.close()
pool.join()
end_time = time.perf_counter()

In [None]:
end_time - start_time

In [None]:
df_subset = pandas.DataFrame(data=res_subset)

In [None]:
df_subset

In [None]:
files = df_subset['file'].unique()
variants = df_subset['variant_selection'].unique()
merges = df_subset['merging_methods'].unique()
opt_methods = df_subset['opt_method'].unique()

In [None]:
len(list(product(files, variants, merges, opt_methods)))

In [None]:
mean_std_cost = list()
mean_std_time = list()
mean_std_runtime = list()

mean_se_cost = list()
mean_se_time = list()
mean_se_runtime = list()

for comb in product(files, variants, merges, opt_methods):
    target_frame = df_subset.loc[(df_subset['file'] == comb[0]) & (df_subset['variant_selection'] == comb[1]) & (df_subset['merging_methods'] == comb[2]) & (df_subset['opt_method'] == comb[3])]
    len_target_frame = len(target_frame)

    standard_deviation_time = target_frame["time_mean"].std()
    standard_deviation_cost = target_frame["cost_mean"].std()
    standard_deviation_run_time = target_frame["run_time"].std()

    standard_error_time = target_frame["time_mean"].std()/numpy.sqrt(len_target_frame)
    standard_error_cost = target_frame["cost_mean"].std()/numpy.sqrt(len_target_frame)
    standard_error_run_time = target_frame["run_time"].std()/numpy.sqrt(len_target_frame)

    print(f'F:{comb[0]}\tV:{comb[1]}\tM:{comb[2]}\tO:{comb[3]}')
    print(f'MT:{target_frame["time_mean"].mean():.3f}\tMC:{target_frame["cost_mean"].mean():.3f}\tMRT:{target_frame["run_time"].mean():.3f}')
    print(f'ST:{standard_deviation_time:.3f}\tSC:{standard_deviation_cost:.3f}\tSRT:{standard_deviation_run_time:.3f}\n')
    print(f'SET:{standard_error_time:.3f}\tSEC:{standard_error_cost:.3f}\tSERT:{standard_error_run_time:.3f}\n')

    mean_std_cost.append(standard_deviation_time)
    mean_std_time.append(standard_deviation_cost)
    mean_std_runtime.append(standard_deviation_run_time)

    mean_se_cost.append(standard_error_time)
    mean_se_time.append(standard_error_cost)
    mean_se_runtime.append(standard_error_run_time)

In [None]:
df_subset.loc[df_subset['variant_selection'] == 'All variants']['run_time'].mean()

In [None]:
df_subset.loc[df_subset['variant_selection'] == 'Required variants']['run_time'].mean()

In [None]:
df_subset.loc[df_subset['variant_selection'] == 'Min required variants']['run_time'].mean()

In [None]:
mean_std_runtime

In [None]:
print(f'MST:{numpy.mean(mean_std_time):.3f}\tMSC:{numpy.mean(mean_std_cost):.3f}\tMSRT:{numpy.mean(mean_std_runtime):.3f}')

In [None]:
print(f'MSET:{numpy.mean(mean_se_cost):.3f}\tMSEC:{numpy.mean(mean_se_time):.3f}\tMSERT:{numpy.mean(mean_se_runtime):.3f}')

In [None]:
df_subset.to_excel(f'{results_folder}/variant_selection_and_merge_methods_subset_run.xlsx', index=False)

# Full run without repetition

In [None]:
tree_types = ['loops_only', 'parallel_only', 'xor_choice_only', 'small_combinatory', 'large_combinatory']
resource_types = ['average_resources', 'average_and_time_cost_specialized_resources']
rand_events = ['with_random_events', 'without_random_events']
runs = ['0', '1', '2']
variant_selection = [__ProcessOptimizerBase.ALL_VARIANTS, __ProcessOptimizerBase.REQUIRED_VARIANTS, __ProcessOptimizerBase.MIN_REQUIRED_VARIANTS]
merge_method = [__ProcessOptimizerBase.HIGHEST_COUNT_MERGE, __ProcessOptimizerBase.WEIGHTED_AVERAGE_MERGE, __ProcessOptimizerBase.PARETO_MERGE]
num_of_experiments = 1

In [None]:
exp_setup_list_size = len(list(product(tree_types, resource_types, rand_events, runs, variant_selection, merge_method)))
print(f'Experiment setup size: {exp_setup_list_size}\tTotal run size: {exp_setup_list_size * num_of_experiments}')

In [None]:
start_time = time.perf_counter()

pool = multiprocessing.Pool(10)
res_full = list(chain.from_iterable(pool.starmap(run_experiment, zip(repeat(experiment_folder), repeat(num_of_experiments), product(tree_types, resource_types, rand_events, runs, variant_selection, merge_method)))))

pool.close()
pool.join()
end_time = time.perf_counter()

In [None]:
df_full = pandas.DataFrame(data=res_full)

In [None]:
df_full['rep'] = 2

In [None]:
df_full.to_excel(f'{results_folder}/variant_selection_and_merge_methods_full_run_3.xlsx', index=False)

In [None]:
end_time - start_time

In [None]:
df_full