In [1]:
from main import *
from main import GenericOptimizer

datasets_path = "Datasets"
save_path = "results"

iteration_mappings = {}
for file in os.listdir(save_path):
	execution_name = '_'.join(file.replace('.pickle', '').split('_')[1:])
	if execution_name not in iteration_mappings:
		iteration_mappings[execution_name] = []
	iteration_mappings[execution_name].append(file)

In [None]:
def parallel_error(instance, x_train, y_train, x_compare, y_compare):
	x_filtered, y_filtered = x_train[instance], y_train[instance]
	if x_filtered.shape[0] < GenericOptimizer.n_neighbours: 
		error = 1
	else:
		knn = KNeighborsClassifier(n_neighbors=GenericOptimizer.n_neighbours)
		knn.fit(x_filtered, y_filtered)
		y_pred = knn.predict(x_compare)
		error = 1 - accuracy_score(y_compare, y_pred)
	return error

def calculate_metrics(x_train, y_train, x_validation, y_validation, x_test, y_test, result):
	baseline_validation_err = GenericOptimizer.calculate_overall_error(
		x_train, y_train,
		x_validation, y_validation,
		GenericOptimizer.n_neighbours
	)
	baseline_test_err = GenericOptimizer.calculate_overall_error(
		x_train, y_train,
		x_test, y_test,
		GenericOptimizer.n_neighbours
	)

	validation_F = Parallel(n_jobs=-1)(delayed(parallel_error)(instance, x_train, y_train, x_validation, y_validation) for instance in result.X)
	test_F = Parallel(n_jobs=-1)(delayed(parallel_error)(instance, x_train, y_train, x_test, y_test) for instance in result.X)
	ideal_validation = result.X[np.argmin(validation_F)]
	ideal_test = result.X[np.argmin(test_F)]
	
	# reduction_rate = 1 - (best_x_train.shape[0] / x_train.shape[0])
	# optimized_ir = GenericOptimizer.calculate_IR(best_y_train)
	optimized_validation_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_validation],
		y_train[ideal_validation],
		x_validation,
		y_validation,
		GenericOptimizer.n_neighbours
	)
	optimized_test_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_validation],
		y_train[ideal_validation],
		x_test,
		y_test,
		GenericOptimizer.n_neighbours
	)
	ideal_optimized_test_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_test],
		y_train[ideal_test],
		x_test,
		y_test,
		GenericOptimizer.n_neighbours
	)
	return baseline_validation_err, baseline_test_err, optimized_validation_err, optimized_test_err, ideal_optimized_test_err

def load_dataset(dataset_path, dataset):
	try:
		df = pd.read_csv(os.path.join(dataset_path, dataset, f'{dataset}.csv'), delimiter=', ', engine='python')
		x = df.drop(columns='Class')
		y = df['Class']
	except KeyError:
		df = pd.read_csv(os.path.join(dataset_path, dataset, f'{dataset}.csv'), delimiter=',')
		x = df.drop(columns='Class')
		y = df['Class']

	x = np.array(x)
	
	label_encoder = LabelEncoder()
	y = label_encoder.fit_transform(y)

	return x, y


In [17]:
rows = []

for execution_name in iteration_mappings:

	optimized_validation_acc = []
	optimized_test_acc = []
	optimized_ideal_test_acc = []

	baseline_test_acc = []
	baseline_validaion_acc = []

	dataset = None
	x, y = None, None
	num_examples = None
	all_idx = None

	for filename in iteration_mappings[execution_name]:
		with open(os.path.join(save_path, filename), 'rb') as fh:
			result_dict = pickle.load(fh)

		print(run_name)
		train_idx = result_dict['train']
		validation_idx = result_dict['validation']
		result = result_dict['result']
		run_name = result_dict['name']
		
		dataset = None
		for folder in os.listdir("Datasets"):
			if folder in run_name:
				dataset = folder
				break

		if x is None and y is None:
			x, y = load_dataset(datasets_path, dataset)
			num_examples = x.shape[0]
			all_idx = np.arange(num_examples)

		prior_knowledge_idx = np.union1d(train_idx, validation_idx)
		test_idx = np.setdiff1d(all_idx, prior_knowledge_idx)
		x_train, y_train = x[train_idx], y[train_idx]
		x_validation, y_validation = x[validation_idx], y[validation_idx]
		x_test, y_test = x[test_idx], y[test_idx]

		if "overSample" in run_name:
			x_train, y_train = over_sample(
				x_train, 
				y_train
			)
		
		baseline_validation_err, baseline_test_err, optimized_validation_err, optimized_test_err, ideal_optimized_test_err = calculate_metrics(
			x_train, 
			y_train, 
			x_validation, 
			y_validation, 
			x_test, 
			y_test, 
			result
		)

		baseline_validaion_acc.append(1-baseline_validation_err)
		baseline_test_acc.append(1-baseline_test_err)
		optimized_validation_acc.append(1-optimized_validation_err)
		optimized_test_acc.append(1-optimized_test_err)
		optimized_ideal_test_acc.append(1-ideal_optimized_test_err)

	val_pval = ranksums(baseline_validaion_acc, optimized_validation_acc).pvalue
	test_pval = ranksums(baseline_test_acc, optimized_test_acc).pvalue
	ideal_test_pval = ranksums(baseline_test_acc, optimized_ideal_test_acc).pvalue

	row = {
		"Validation Baseline Acc": baseline_validaion_acc,
		"Test Baseline Acc": baseline_test_acc,
		"Optimized Validation Acc": optimized_validation_acc,
		"Optimized Test Acc": optimized_test_acc,
		"Ideal Test Acc": optimized_ideal_test_acc,
		"Validation P-value": val_pval,
		"Test P-value": test_pval,
		"Ideal Test P-value": ideal_test_pval
	}
	rows.append(row)
		



0_autoEncoderMutation_bupa
0_autoEncoderMutation_australian
10_autoEncoderMutation_australian
11_autoEncoderMutation_australian
12_autoEncoderMutation_australian
13_autoEncoderMutation_australian
14_autoEncoderMutation_australian
15_autoEncoderMutation_australian
16_autoEncoderMutation_australian
17_autoEncoderMutation_australian
18_autoEncoderMutation_australian
19_autoEncoderMutation_australian
1_autoEncoderMutation_australian
20_autoEncoderMutation_australian
21_autoEncoderMutation_australian
22_autoEncoderMutation_australian
23_autoEncoderMutation_australian
24_autoEncoderMutation_australian
25_autoEncoderMutation_australian
26_autoEncoderMutation_australian
27_autoEncoderMutation_australian
28_autoEncoderMutation_australian
29_autoEncoderMutation_australian
2_autoEncoderMutation_australian
30_autoEncoderMutation_australian
3_autoEncoderMutation_australian
4_autoEncoderMutation_australian
5_autoEncoderMutation_australian
6_autoEncoderMutation_australian
7_autoEncoderMutation_austra

[WinError 2] The system cannot find the file specified
  File "e:\Code\3_13_venv\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
        "wmic CPU Get NumberOfCores /Format:csv".split(),
        capture_output=True,
        text=True,
    )
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.13_3.13.752.0_x64__qbz5n2kfra8p0\Lib\subprocess.py", line 556, in run
    with Popen(*popenargs, **kwargs) as process:
         ~~~~~^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.13_3.13.752.0_x64__qbz5n2kfra8p0\Lib\subprocess.py", line 1038, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
    ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                        pass_fds, cwd, env,
                        ^^^^^^^^^^^^^^^^^^^
    ...<5 lines>...
                        gid, gids, uid, umask,
                     

16_autoEncoderMutation_glass1
17_autoEncoderMutation_glass1
18_autoEncoderMutation_glass1
19_autoEncoderMutation_glass1
1_autoEncoderMutation_glass1
20_autoEncoderMutation_glass1
21_autoEncoderMutation_glass1
22_autoEncoderMutation_glass1
23_autoEncoderMutation_glass1
24_autoEncoderMutation_glass1
25_autoEncoderMutation_glass1
26_autoEncoderMutation_glass1
27_autoEncoderMutation_glass1
28_autoEncoderMutation_glass1
29_autoEncoderMutation_glass1
2_autoEncoderMutation_glass1
30_autoEncoderMutation_glass1
3_autoEncoderMutation_glass1
4_autoEncoderMutation_glass1
5_autoEncoderMutation_glass1
6_autoEncoderMutation_glass1
7_autoEncoderMutation_glass1
8_autoEncoderMutation_glass1
9_autoEncoderMutation_glass1
0_autoEncoderMutation_pima
10_autoEncoderMutation_pima
11_autoEncoderMutation_pima
12_autoEncoderMutation_pima
13_autoEncoderMutation_pima
14_autoEncoderMutation_pima
15_autoEncoderMutation_pima
16_autoEncoderMutation_pima
17_autoEncoderMutation_pima
18_autoEncoderMutation_pima
19_autoEnc

In [20]:
records = []
for idx, title in enumerate(iteration_mappings):
	record = rows[idx]
	dataset = None

	for folder in os.listdir("Datasets"):
		if folder in title:
			dataset = folder
			break

	records.append({
		"Dataset": dataset,
		"Name": title,
		"Baseline validation acc": np.mean(record['Validation Baseline Acc']),
		"Baseline test acc": np.mean(record['Test Baseline Acc']),
		"Optimized validation acc": np.mean(record['Optimized Validation Acc']),
		"Optimized test acc": np.mean(record['Optimized Test Acc']),
		"Optimized ideal test acc": np.mean(record['Ideal Test Acc']),
		"Validation PVal": record['Validation P-value'],
		"Test PVal": record['Test P-value'],
		"Ideal Test PVal": record['Ideal Test P-value'],
		"Validation Diff": np.mean(np.subtract(record['Optimized Validation Acc'], record['Validation Baseline Acc'])),
		"Test Diff": np.mean(np.subtract(record['Optimized Test Acc'], record['Test Baseline Acc'])),
		"Ideal Diff": np.mean(np.subtract(record['Ideal Test Acc'], record['Test Baseline Acc']))
	})
pd.DataFrame.from_records(records).to_excel("metrics.xlsx", index=False)