In [2]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder

from pymoo.util.nds.non_dominated_sorting import NonDominatedSorting
from pymoo.operators.sampling.rnd import BinaryRandomSampling
from pymoo.operators.crossover.hux import HUX
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.indicators.hv import Hypervolume
from pymoo.core.mutation import Mutation
from pymoo.core.sampling import Sampling
from pymoo.core.problem import Problem
from pymoo.optimize import minimize

from sklearn.model_selection import StratifiedShuffleSplit

from joblib import Parallel, delayed

from scipy.stats import ranksums

from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch
import torch.nn as nn

import pickle
import os
import re

from ucimlrepo import fetch_ucirepo 

import pandas as pd
import re
from io import StringIO

from pathlib import Path

import pandas as pd
import numpy as np

# Function Definitions

In [6]:
def load_UCI_datasets():
	datasets = [
		(fetch_ucirepo(id=52), "ionosphere"),
		(fetch_ucirepo(id=43), "haberman"),
		(fetch_ucirepo(id=53), "iris0"),
		(fetch_ucirepo(id=42), "glass1"),
		(fetch_ucirepo(id=143), "australia"),
		(fetch_ucirepo(id=277), "thoracic"),
		(fetch_ucirepo(id=50), "segment0"),
		# (fetch_ucirepo(id=149), "vehicle0"),
		(fetch_ucirepo(id=109), "wine"),
		# (fetch_ucirepo(id=39), "ecoli"),
		(fetch_ucirepo(id=225), "ILPD"),
		(fetch_ucirepo(id=45), "heart_disease"),
		(fetch_ucirepo(id=17), "wisconsin"),
		# (fetch_ucirepo(id=73), "mushroom"),
		(fetch_ucirepo(id=94), "spambase"),
		(fetch_ucirepo(id=161), "mammographic"),
		# (fetch_ucirepo(id=12), "balance"),
		(fetch_ucirepo(id=110), "yeast1"),
		(fetch_ucirepo(id=451), "coimbra"),
		(fetch_ucirepo(id=244), "fertility")
	]

	return datasets

def load_KEEL_dataset(path):
	with open(path, 'r') as fh:
		lines = fh.readlines()
	
	relation_name = ''
	attributes = []
	attribute_types = {}
	data_lines = []
	in_data_section = False

	for line in lines:
		line = line.strip()
		if line.startswith('@relation'):
			relation_name = line.split()[1]
		elif line.startswith('@attribute'):
			# Match attribute lines with types and optional ranges or enumerations
			match = re.match(r'@attribute\s+(\w+)\s+(\w+)(?:\s+\[.*?\])?', line)
			if match:
				attr_name, attr_type = match.groups()
				attributes.append(attr_name)
				attribute_types[attr_name] = attr_type
			else:
				# Match attribute lines with enumerated types
				match_enum = re.match(r'@attribute\s+(\w+)\s+\{.*?\}', line)
				if match_enum:
					attr_name = match_enum.group(1)
					attributes.append(attr_name)
					attribute_types[attr_name] = 'categorical'
		elif line.startswith('@data'):
			in_data_section = True
		elif in_data_section:
			if line and not line.startswith('@'):
				data_lines.append(line)

	# Create DataFrame from data lines
	data_str = '\n'.join(data_lines)
	df = pd.read_csv(StringIO(data_str), header=None, names=attributes)

	return df, attribute_types
	# attributes = []
	# attribute_types = {}

	# for line in data_text.split("\n"):
	# 	if line.startswith("@attribute"):
	# 		parts = re.match(r'@attribute (\w+) (real|\{.*\})', line)
	# 		if parts:
	# 			attr_name = parts.group(1)
	# 			attr_type = parts.group(2)
	# 			attributes.append(attr_name)
	# 			attribute_types[attr_name] = attr_type

	# # Extract data section
	# data_section = data_text.split("@data")[1].strip()

	# # Convert data section to DataFrame
	# df = pd.read_csv(StringIO(data_section), header=None, names=attributes)

	# return df, attribute_types

class GenericOptimizer(Problem):
	population_size = 100
	n_neighbours = 5
	sequential = False
	def __init__(self, X_train, y_train, X_val, y_val, objectives, exec_mode):
		self.mutation_history = {}
		self.generation_number = 0

		self.exec_mode = exec_mode

		self.X_train = X_train
		self.y_train = y_train

		self.X_val = X_val
		self.y_val = y_val

		self.training_data = X_train
		self.n_instances = X_train.shape[0]
		
		self.objectives = objectives

		super().__init__(
			n_var=self.n_instances,
			n_obj=len(objectives),               
			n_constr=0,            
			xl=0,                  
			xu=1,                  
			type_var=np.bool_,     
		)

	def _evaluate(self, x, out, *args, **kwargs):
		
		if self.exec_mode == "sequential":
			metrics = []
			for objective in self.objectives:
				metrics.append(self.eval_objective((objective, x)))
		else:
			metrics = Parallel(n_jobs=-1)(delayed(self.eval_objective)((objective, x)) for objective in self.objectives)
		
		self.generation_number += 1

		out["F"] = np.column_stack(metrics)

	def eval_objective(self, pack):
		objective, x = pack
			
		if "calculate_num_examples" in repr(objective):
			return GenericOptimizer.calculate_num_examples(x)

		elif "calculate_IR" in repr(objective):
			vals = []
			for instance in x:
				vals.append(GenericOptimizer.calculate_IR(self.y_train[instance]))
			return vals
		
		else:
			vals = []
			for instance in x:
				vals.append(objective(
					self.X_train[instance],
					self.y_train[instance],
					self.X_val,
					self.y_val,
					GenericOptimizer.n_neighbours
				))
			return vals

	@classmethod
	def calculate_IR(cls, y):
		df = pd.DataFrame(y).value_counts()
		return (df[1]/df[0]) if df.min() == 0 else (df[0]/df[1])
	
	@classmethod
	def filter_by_class(cls, x, y, label):
		indices = np.where(y==label)
		return x[indices], y[indices]
	
	@classmethod
	def calculate_overall_error(cls, x_train, y_train, x_val, y_val, n):
				
		num_included_instances = x_train.shape[0]

		if num_included_instances >= n:
			optimization_knn = KNeighborsClassifier(n_neighbors=n)
			optimization_knn.fit(x_train, y_train)

			y_pred = optimization_knn.predict(x_val)
			acc = accuracy_score(y_val, y_pred)
			return 1-acc
		else:
			return 1

	@classmethod
	def calculate_class0_error(cls, x_train, y_train, x_val, y_val, n):
		class0_x_val, class0_y_val = cls.filter_by_class(x_val, y_val, 0)
		err = cls.calculate_overall_error(
			x_train,
			y_train,
			class0_x_val,
			class0_y_val,
			n
		)
		return err

	@classmethod
	def calculate_class1_error(cls, x_train, y_train, x_val, y_val, n):
		class1_x_val, class1_y_val = cls.filter_by_class(x_val, y_val, 1)
		err = cls.calculate_overall_error(
			x_train,
			y_train,
			class1_x_val,
			class1_y_val,
			n
		)
		return err

	@classmethod
	def calculate_overall_inverse_f1(cls, x_train, y_train, x_val, y_val, n):
				
		num_included_instances = x_train.shape[0]
		counts = pd.DataFrame(y_train).value_counts()
		if num_included_instances >= n:
			optimization_knn = KNeighborsClassifier(n_neighbors=n)
			optimization_knn.fit(x_train, y_train)

			y_pred = optimization_knn.predict(x_val)
			f1 = f1_score(y_val, y_pred, average='binary')
			return 1-f1
		else:
			return 1

	@classmethod
	def calculate_class0_inverse_f1(cls, x_train, y_train, x_val, y_val, n):
		class0_x_val, class0_y_val = cls.filter_by_class(x_val, y_val, 0)
		inv_f1 = cls.calculate_overall_inverse_f1(
			x_train,
			y_train,
			class0_x_val,
			class0_y_val,
			n
		)
		return inv_f1

	@classmethod
	def calculate_class1_inverse_f1(cls, x_train, y_train, x_val, y_val, n):
		class1_x_val, class1_y_val = cls.filter_by_class(x_val, y_val, 1)
		inv_f1 = cls.calculate_overall_inverse_f1(
			x_train,
			y_train,
			class1_x_val,
			class1_y_val,
			n
		)
		return inv_f1
	
	@classmethod
	def calculate_num_examples(cls, instances):
		return np.sum(instances, axis=1)

class BiasedBinarySampling(Sampling):
	def __init__(self, labels, major_prob, minor_prob):
		
		self.labels = labels
		counts = pd.DataFrame(labels).value_counts()
		if counts[0] > counts[1]:
			self.c0_thresh = major_prob
			self.c1_thresh = minor_prob
		else:
			self.c0_thresh = minor_prob
			self.c1_thresh = major_prob

		super().__init__()

	def _do(self, problem, n_samples, **kwargs):

		rands = np.random.random((n_samples, problem.n_var))
		init_pops = np.zeros((n_samples, problem.n_var), dtype=bool)
		for idx, label in enumerate(self.labels):
			if label == 0:
				init_pops[:, idx] = (rands[:, idx] < self.c0_thresh).astype(bool)
			if label == 1:
				init_pops[:, idx] = (rands[:, idx] < self.c1_thresh).astype(bool)


		return init_pops
	
class BitflipMutation(Mutation):

	def _do(self, problem, X, **kwargs):
		
		prob_var = self.get_prob_var(problem, size=(len(X), 1))
		Xp = np.copy(X)
		flip = np.random.random(X.shape) < prob_var
		Xp[flip] = ~X[flip]
		
		total_number_of_genes = X.shape[0] * X.shape[1]
		genes_effected = np.sum(X ^ Xp)

		if problem.generation_number not in problem.mutation_history:
			problem.mutation_history[problem.generation_number] = []
		
		problem.mutation_history[problem.generation_number].append(genes_effected/total_number_of_genes)

		return Xp

class CustomDataset(Dataset):
	def __init__(self, x_train, y_train):
		self.x_train = x_train
		self.y_train = y_train
	def __len__(self):
		return self.x_train.shape[0]
	def __getitem__(self, ind):
		x = self.x_train[ind]
		y = self.y_train[ind]
		return x, y

class MLP(nn.Module):
	def __init__(self, input_dim):
		super(MLP, self).__init__()
		self.linear1 = nn.Linear(input_dim, input_dim//2)
		self.relu1 = nn.ReLU()
		self.linear2 = nn.Linear(input_dim//2, input_dim//3)
		self.relu2 = nn.ReLU()
		self.linear3 = nn.Linear(input_dim//3, input_dim)

	def forward(self, x):
		x = self.linear1(x)
		x = self.relu1(x)
		x = self.linear2(x)
		x = self.relu2(x)
		x = self.linear3(x)
		return x
	
class CustomMutation(Mutation):
	curr_MLP = None
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	num_synthetic_examples = 500000
	train_epochs = 50
	batch_size = 215
	primary_objective = GenericOptimizer.calculate_overall_error
	# secondary_objectives = [
	# 	[GenericOptimizer.calculate_num_examples],
	# 	[GenericOptimizer.calculate_class0_error],
	# 	[GenericOptimizer.calculate_class0_inverse_f1],
	# 	[GenericOptimizer.calculate_class0_inverse_precision],
	# 	[GenericOptimizer.calculate_class0_inverse_recall],
	# 	[GenericOptimizer.calculate_class1_error],
	# 	[GenericOptimizer.calculate_class1_inverse_f1],
	# 	[GenericOptimizer.calculate_class1_inverse_precision],
	# 	[GenericOptimizer.calculate_class1_inverse_recall],
	# 	[GenericOptimizer.calculate_overall_inverse_f1],
	# 	[GenericOptimizer.calculate_overall_inverse_precision],
	# 	[GenericOptimizer.calculate_overall_inverse_recall],
	# 	[GenericOptimizer.calculate_class0_inverse_precision, GenericOptimizer.calculate_class1_inverse_precision],
	# 	[GenericOptimizer.calculate_class0_inverse_recall, GenericOptimizer.calculate_class1_inverse_recall],
	# 	[GenericOptimizer.calculate_class0_inverse_f1, GenericOptimizer.calculate_class1_inverse_f1],
	# 	[GenericOptimizer.calculate_class0_error, GenericOptimizer.calculate_class1_error],
	# ]

	def __init__(self, x_train, y_train, x_validation, y_validation, prediction_threshold=0.5):
		super().__init__()
		self.prediction_thresh = prediction_threshold
		synthesized_x, synthesized_y = CustomMutation.create_training_data(x_train, y_train, x_validation, y_validation)
		self.model = CustomMutation.train_mutation(synthesized_x, synthesized_y)

	def _do(self, problem, X, **kwargs):

		int_x = np.array(X, dtype=np.float32)
		dataset = CustomDataset(int_x, int_x)
		loader = DataLoader(dataset, batch_size=X.shape[0], shuffle=False)

		self.model.eval()
		with torch.no_grad():
			for data, _ in loader:
				data = data.to(CustomMutation.device)
				outputs = self.model(data)
				predictions = (outputs > self.prediction_thresh).bool()

		prediction = np.array(predictions)

		total_number_of_genes = X.shape[0] * X.shape[1]
		genes_effected = np.sum(X ^ prediction)

		if problem.generation_number not in problem.mutation_history:
			problem.mutation_history[problem.generation_number] = []
		
		problem.mutation_history[problem.generation_number].append(genes_effected/total_number_of_genes)
		return prediction

	@classmethod
	def train_mutation(cls, x_train, y_train):
		train_set = CustomDataset(x_train, y_train)
		input_dim = x_train.shape[1]
		batch_size = cls.batch_size
		train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)

		device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

		 
		model = MLP(input_dim).to(device)
		optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)
		criterion = nn.BCEWithLogitsLoss()

		model.train()
		for epoch in range(cls.train_epochs):
			losses = []
			for batch_num, input_data in enumerate(train_loader):
				optimizer.zero_grad()
				x, y = input_data
				x, y = x.to(device).float(), y.to(device)

				output = model(x)
				loss = criterion(output, y)
				loss.backward()
				losses.append(loss.item())
				optimizer.step()

		return model
	
	@classmethod
	def create_training_data(cls, x_train, y_train, x_validation, y_validation):
		
		synthesizing_splits = StratifiedShuffleSplit(
			n_splits=len(cls.secondary_objectives), # create a split for each secondary objective
			test_size=0.5, # Half the validation set is randomly excluded
		)	
		packages = []
		for idx, (sub_validation_idx, _) in enumerate(synthesizing_splits.split(x_validation, y_validation)):
			packages.append((
				cls.secondary_objectives[idx],
				x_train,
				y_train,
				x_validation[sub_validation_idx],
				y_validation[sub_validation_idx]
			))

		# Execute optimization and extract the final populations
		populations = Parallel(n_jobs=-1)(delayed(cls.execute_training_data_gen)(package) for package in packages)
		
		# Aggregate all populations into single list containing every unique instance
		all_instances = []
		for population in populations:
			for individual in population.pop:
				all_instances.append(list(individual.X))
				
		all_instances = np.array(all_instances)

		# Create synthetic examples by adding randin noise to each instance. Repeat until threshold is reached.
		synthetic_x, synthetic_y = [], []
		while len(synthetic_x) < cls.num_synthetic_examples:
			
			for y_true in all_instances:
				x_noised = []
				for idx, probability in enumerate(np.random.uniform(0.1, 1.0, y_true.shape[0])):
					if probability < 0.85:
						x_noised.append(y_true[idx])                
					else:
						x_noised.append(0 if y_true[idx] == 1 else 1)

				synthetic_x.append(np.array(x_noised, dtype=np.float32))
				synthetic_y.append(np.array(y_true, dtype=np.float32))

		return np.array(synthetic_x), np.array(synthetic_y)
	
	@classmethod
	def execute_training_data_gen(cls, package):

		objectives, x_train, y_train, x_validation, y_validation = package
		
		objectives.append(cls.primary_objective)

		problem = GenericOptimizer(
			x_train, 
			y_train, 
			x_validation, 
			y_validation,
			objectives,
			"Sequential"
		)

		algorithm = NSGA2(
			pop_size=GenericOptimizer.population_size, 
			sampling=BinaryRandomSampling(), 
			crossover=HUX(), 
			mutation=BitflipMutation(), 
			eliminate_duplicates=True
		)
		result = minimize(
			problem, 
			algorithm, 
			('n_gen', GenericOptimizer.population_size)
		)
		
		return result

def prepare_splits(x, y):
	train_split = StratifiedShuffleSplit(
		n_splits=31, 
		test_size=0.5
	)
	splits = []
	for train_idx, temp_idx in train_split.split(x, y):
		test_split = StratifiedShuffleSplit(
			n_splits=1, 
			test_size=0.5
		)
		test_idx, validation_idx = next(test_split.split(x[temp_idx], y[temp_idx]))

		validation_idx = temp_idx[validation_idx]
		test_idx = temp_idx[test_idx]
		
		splits.append((train_idx, validation_idx, test_idx))
	return splits

def create_UCI_preprocessor_pipeline(variables):
	
	type_mappings = {}
	for variable_idx, variable_name in enumerate(variables['name']):
		variable_type = variables['type'][variable_idx]
		if variable_type not in type_mappings:
			type_mappings[variable_type] = []

		if variables['role'][variable_idx] == 'Feature':
			type_mappings[variable_type].append(variable_name)

	categorical_transformer = Pipeline(steps=[
		('imputer', SimpleImputer(strategy='most_frequent')),
		('onehot', OneHotEncoder(handle_unknown='ignore'))
	])
	numerical_transformer = Pipeline(steps=[
		('imputer', SimpleImputer(strategy='mean')),
		('scaler', StandardScaler())
	])

	numerical_features = []
	if 'Continuous' in type_mappings:
		for feature in type_mappings['Continuous']:
			numerical_features.append(feature)
	if 'Integer' in type_mappings:
		for feature in type_mappings['Integer']:
			numerical_features.append(feature)
			
	transformer_steps = []
	if numerical_features != []:
		transformer_steps.append(
			('num', numerical_transformer, numerical_features)
		)
	if 'Categorical' in type_mappings:
		transformer_steps.append(
			('cat', categorical_transformer, type_mappings['Categorical'])
		)
	preprocessor = ColumnTransformer(
		transformers=transformer_steps
	)
	pipeline = Pipeline(steps=[
		('preprocessor', preprocessor)
	])
	
	return pipeline

	type_mappings = {}
	categorical_features = []
	numerical_features = []

	for column in attributes:
		if column != "Class" and attributes[column] != 'categorical':
			numerical_features.append(column)
		elif column != "Class" and attributes[column] == 'categorical':
			categorical_features.append(column)
	y = dataset['Class']	
	X = dataset.drop(columns=['Class'], inplace=True)

	label_encoder = LabelEncoder()
	y = label_encoder.fit_transform(y)

	categorical_transformer = Pipeline(steps=[
		('imputer', SimpleImputer(strategy='most_frequent')),
		('onehot', OneHotEncoder(handle_unknown='ignore'))
	])
	numerical_transformer = Pipeline(steps=[
		('imputer', SimpleImputer(strategy='mean')),
		('scaler', StandardScaler())
	])

	numerical_features = []
	if 'Continuous' in type_mappings:
		for feature in type_mappings['Continuous']:
			numerical_features.append(feature)
	if 'Integer' in type_mappings:
		for feature in type_mappings['Integer']:
			numerical_features.append(feature)
			
	transformer_steps = []
	if numerical_features != []:
		transformer_steps.append(
			('num', numerical_transformer, numerical_features)
		)
	if 'Categorical' in type_mappings:
		transformer_steps.append(
			('cat', categorical_transformer, categorical_features)
		)
	preprocessor = ColumnTransformer(
		transformers=transformer_steps
	)
	pipeline = Pipeline(steps=[
		('preprocessor', preprocessor)
	])

	return pipeline

def create_KEEL_preprocessor_pipeline(attributes):
	type_mappings = {}
	categorical_features = []
	numerical_features = []

	for column in attributes:
		if column != "Class" and attributes[column] != 'categorical':
			numerical_features.append(column)
		elif column != "Class" and attributes[column] == 'categorical':
			categorical_features.append(column)

	categorical_transformer = Pipeline(steps=[
		('imputer', SimpleImputer(strategy='most_frequent')),
		('onehot', OneHotEncoder(handle_unknown='ignore'))
	])
	numerical_transformer = Pipeline(steps=[
		('imputer', SimpleImputer(strategy='mean')),
		('scaler', StandardScaler())
	])
			
	transformer_steps = []
	if numerical_features != []:
		transformer_steps.append(
			('num', numerical_transformer, numerical_features)
		)
	if 'Categorical' in type_mappings:
		transformer_steps.append(
			('cat', categorical_transformer, categorical_features)
		)
	preprocessor = ColumnTransformer(
		transformers=transformer_steps
	)
	pipeline = Pipeline(steps=[
		('preprocessor', preprocessor)
	])

	return pipeline

def over_sample(x, y):
	counts = pd.DataFrame(y).value_counts()
	minority_class_label = counts.index[np.argmin(counts)][0]
	minority_class_indicies = np.where(y == minority_class_label)[0]
	# y = y.reshape(-1, 1)
	over_sampled_x = np.concatenate((x, x[minority_class_indicies]), axis=0)
	over_sampled_y = np.concatenate((y, y[minority_class_indicies]), axis=0)
	# over_sampled_y = over_sampled_y.reshape(-1)    
	return over_sampled_x, over_sampled_y

def parallel_error(instance, x_train, y_train, x_compare, y_compare):

	x_filtered, y_filtered = x_train[instance], y_train[instance]
	if x_filtered.shape[0] < GenericOptimizer.n_neighbours: 
		error = 1
	else:
		knn = KNeighborsClassifier(n_neighbors=GenericOptimizer.n_neighbours)
		knn.fit(x_filtered, y_filtered)
		y_pred = knn.predict(x_compare)
		error = 1 - accuracy_score(y_compare, y_pred)
	return error

def calculate_metrics(x_train, y_train, x_validation, y_validation, x_test, y_test, result):
	baseline_validation_err = GenericOptimizer.calculate_overall_error(
		x_train, y_train,
		x_validation, y_validation,
		GenericOptimizer.n_neighbours
	)
	baseline_test_err = GenericOptimizer.calculate_overall_error(
		x_train, y_train,
		x_test, y_test,
		GenericOptimizer.n_neighbours
	)

	validation_F = Parallel(n_jobs=-1)(delayed(parallel_error)(instance, x_train, y_train, x_validation, y_validation) for instance in result.X)

	ideal_validation = result.X[np.argmin(validation_F)]
	validation_inclusions = np.sum(ideal_validation)
	optimized_validation_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_validation],
		y_train[ideal_validation],
		x_validation,
		y_validation,
		GenericOptimizer.n_neighbours
	)
	optimized_test_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_validation],
		y_train[ideal_validation],
		x_test,
		y_test,
		GenericOptimizer.n_neighbours
	)

	test_F = Parallel(n_jobs=-1)(delayed(parallel_error)(instance, x_train, y_train, x_test, y_test) for instance in result.X)
	ideal_test = result.X[np.argmin(test_F)]
	test_inclusions = np.sum(ideal_test)


	ideal_optimized_test_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_test],
		y_train[ideal_test],
		x_test,
		y_test,
		GenericOptimizer.n_neighbours
	)
	return validation_inclusions, test_inclusions, baseline_validation_err, baseline_test_err, optimized_validation_err, optimized_test_err, ideal_optimized_test_err

def evaluate_results():
	datasets = load_datasets()
	save_path = "results"

	iteration_mappings = {}
	for file in os.listdir("results"):
		execution_name = "_".join(file.replace(".pickle", "").split("_")[1:])
		if execution_name not in iteration_mappings:
			iteration_mappings[execution_name] = []
		iteration_mappings[execution_name].append(file)

	#####################################################################################################################
	#                                                                                                                   #
	#####################################################################################################################


	rows = []
	for execution_name in iteration_mappings:

		optimized_validation_acc = []
		optimized_test_acc = []
		optimized_ideal_test_acc = []

		baseline_test_acc = []
		baseline_validaion_acc = []

		validation_inclusions = []
		test_inclusions = []
		curr_dataset = execution_name.split(" ")[0].strip()
		for dataset, name in datasets:
			if name == curr_dataset:
				raw_X, y = dataset.data.features, dataset.data.targets
				pipeline = create_preprocessor_pipeline(dataset.variables)
				pipeline.fit(raw_X, y)
				X = pipeline.transform(raw_X)
				label_encoder = LabelEncoder()
				y = label_encoder.fit_transform(y)
				break
		
		for filename in iteration_mappings[execution_name]:
			with open(os.path.join(save_path, filename), 'rb') as fh:
				result_dict = pickle.load(fh)

			train_idx = result_dict['train']
			validation_idx = result_dict['validation']
			test_idx = result_dict['test']
			result = result_dict['result']
			run_name = result_dict['name']

			x_train, y_train = X[train_idx], y[train_idx]
			x_validation, y_validation = X[validation_idx], y[validation_idx]
			x_test, y_test = X[test_idx], y[test_idx]

			if "overSample" in run_name:
				x_train, y_train = over_sample(
					x_train, 
					y_train
				)
			
			num_validation, num_test, baseline_validation_err, baseline_test_err, optimized_validation_err, optimized_test_err, ideal_optimized_test_err = calculate_metrics(
				x_train, 
				y_train, 
				x_validation, 
				y_validation, 
				x_test, 
				y_test, 
				result
			)

			validation_inclusions.append(num_validation)
			test_inclusions.append(num_test)

			baseline_validaion_acc.append(1-baseline_validation_err)
			baseline_test_acc.append(1-baseline_test_err)
			optimized_validation_acc.append(1-optimized_validation_err)
			optimized_test_acc.append(1-optimized_test_err)
			optimized_ideal_test_acc.append(1-ideal_optimized_test_err)

		val_pval = ranksums(baseline_validaion_acc, optimized_validation_acc).pvalue
		test_pval = ranksums(baseline_test_acc, optimized_test_acc).pvalue
		ideal_test_pval = ranksums(baseline_test_acc, optimized_ideal_test_acc).pvalue
		
		row = {
			"Dataset": curr_dataset,
			"Sampling": "over sampling" if "overSample" in execution_name else "regular sampling",
			"Population": "random population" if "randPop" in execution_name else "biased population",
			"Total Size": X.shape[0],
			"Optimized Validation Size": validation_inclusions,
			"Optimized Test Size": test_inclusions,
			"Validation Baseline Acc": baseline_validaion_acc,
			"Test Baseline Acc": baseline_test_acc,
			"Optimized Validation Acc": optimized_validation_acc,
			"Optimized Test Acc": optimized_test_acc,
			"Ideal Test Acc": optimized_ideal_test_acc,
			"Validation P-value": val_pval,
			"Test P-value": test_pval,
			"Ideal Test P-value": ideal_test_pval
		}
		rows.append(row)
			
	#####################################################################################################################
	#                                                                                                                   #
	#####################################################################################################################


	for dataset, name in datasets:
		
		raw_X, y = dataset.data.features, dataset.data.targets
		pipeline = create_preprocessor_pipeline(dataset.variables)
		pipeline.fit(raw_X, y)
		X = pipeline.transform(raw_X)
		label_encoder = LabelEncoder()
		y = label_encoder.fit_transform(y)

		print(name)
		print(pd.DataFrame(y).value_counts())
		print("\n\n")

# Create data packages

In [33]:
objectives_sets = [
	# [GenericOptimizer.calculate_overall_error, GenericOptimizer.calculate_num_examples],
	# [GenericOptimizer.calculate_overall_error, GenericOptimizer.calculate_overall_inverse_f1, GenericOptimizer.calculate_num_examples],
	# [GenericOptimizer.calculate_class0_error, GenericOptimizer.calculate_class1_error],
	[GenericOptimizer.calculate_overall_error]
]

packages = []
for dat_file in Path('datasets').rglob('*.dat'):

	name = str(dat_file.name).replace(".dat", "")
	dataset, attributes = load_KEEL_dataset(dat_file)

	y = dataset['Class']	
	raw_X = dataset.drop(columns=['Class'])
	
	label_encoder = LabelEncoder()
	y = label_encoder.fit_transform(y)

	pipeline = create_KEEL_preprocessor_pipeline(attributes)

	pipeline.fit(raw_X, y)
	X = pipeline.transform(raw_X)
	label_encoder = LabelEncoder()
	y = label_encoder.fit_transform(y)

	for c, (train_idx, validation_idx, test_idx) in enumerate(prepare_splits(X, y)):
		for objectives in objectives_sets:
		
			packages.append((X, y, train_idx, validation_idx, test_idx, objectives, f"{c}_{name}"))


# Executions

In [4]:
def func(package):
	x, y, train_idx, validation_idx, test_idx, objectives, dataset_name = package
	objectives_names = [re.search(r'\.([a-zA-Z_][a-zA-Z0-9_]*)\s', str(objective_name)).group(1) for objective_name in objectives]
	objectives_names = '_'.join(objectives_names)		
	x_train, y_train = x[train_idx], y[train_idx]
	x_validation, y_validation = x[validation_idx], y[validation_idx]
	# x_train, y_train = over_sample( # <------ SAMPLING
	# 	x_train, 
	# 	y_train
	# )
	problem = GenericOptimizer(
		x_train, 
		y_train, 
		x_validation, 
		y_validation,
		objectives,
		"sequential"
	)
	algorithm = NSGA2(
		pop_size=GenericOptimizer.population_size, 
		sampling=BinaryRandomSampling(), # <----- POPULATION
		crossover=HUX(), 
		mutation=BitflipMutation(), 
		eliminate_duplicates=True,
	)
	result = minimize(
		problem, 
		algorithm, 
		('n_gen', GenericOptimizer.population_size),
		save_history=False
	)
	package = {
		"file": f"{dataset_name} regSample randPop {objectives_names}.pickle",
		"train": train_idx,
		"validation": validation_idx,
		"test": test_idx,
		"result": result
	}
	return package

for result in Parallel(n_jobs=-1)(delayed(func)(package) for package in packages):
	with open(os.path.join("results", result['file']), 'wb') as fh:
		pickle.dump(result, fh)	

In [5]:
def func(package):
	x, y, train_idx, validation_idx, test_idx, objectives, dataset_name = package
	objectives_names = [re.search(r'\.([a-zA-Z_][a-zA-Z0-9_]*)\s', str(objective_name)).group(1) for objective_name in objectives]
	objectives_names = '_'.join(objectives_names)		
	x_train, y_train = x[train_idx], y[train_idx]
	x_validation, y_validation = x[validation_idx], y[validation_idx]
	x_train, y_train = over_sample( # <------ SAMPLING
		x_train, 
		y_train
	)
	problem = GenericOptimizer(
		x_train, 
		y_train, 
		x_validation, 
		y_validation,
		objectives,
		"sequential"
	)
	algorithm = NSGA2(
		pop_size=GenericOptimizer.population_size, 
		sampling=BinaryRandomSampling(), # <----- POPULATION
		crossover=HUX(), 
		mutation=BitflipMutation(), 
		eliminate_duplicates=True,
	)
	result = minimize(
		problem, 
		algorithm, 
		('n_gen', GenericOptimizer.population_size),
		save_history=False
	)
	package = {
		"file": f"{dataset_name} overSample randPop {objectives_names}.pickle",
		"train": train_idx,
		"validation": validation_idx,
		"test": test_idx,
		"result": result
	}
	return package

for result in Parallel(n_jobs=-1)(delayed(func)(package) for package in packages):
	with open(os.path.join("results", result['file']), 'wb') as fh:
		pickle.dump(result, fh)	

In [6]:
def func(package):
	x, y, train_idx, validation_idx, test_idx, objectives, dataset_name = package
	objectives_names = [re.search(r'\.([a-zA-Z_][a-zA-Z0-9_]*)\s', str(objective_name)).group(1) for objective_name in objectives]
	objectives_names = '_'.join(objectives_names)		
	x_train, y_train = x[train_idx], y[train_idx]
	x_validation, y_validation = x[validation_idx], y[validation_idx]
	# x_train, y_train = over_sample( # <------ SAMPLING
	# 	x_train, 
	# 	y_train
	# )
	problem = GenericOptimizer(
		x_train, 
		y_train, 
		x_validation, 
		y_validation,
		objectives,
		"sequential"
	)
	algorithm = NSGA2(
		pop_size=GenericOptimizer.population_size, 
		sampling=BiasedBinarySampling(y_train, 0.4, 0.7), # <----- POPULATION
		crossover=HUX(), 
		mutation=BitflipMutation(), 
		eliminate_duplicates=True,
	)
	result = minimize(
		problem, 
		algorithm, 
		('n_gen', GenericOptimizer.population_size),
		save_history=False
	)
	package = {
		"file": f"{dataset_name} regSample biasPop {objectives_names}.pickle",
		"train": train_idx,
		"validation": validation_idx,
		"test": test_idx,
		"result": result
	}
	return package

for result in Parallel(n_jobs=-1)(delayed(func)(package) for package in packages):
	with open(os.path.join("results", result['file']), 'wb') as fh:
		pickle.dump(result, fh)

In [7]:
def func(package):
	x, y, train_idx, validation_idx, test_idx, objectives, dataset_name = package
	objectives_names = [re.search(r'\.([a-zA-Z_][a-zA-Z0-9_]*)\s', str(objective_name)).group(1) for objective_name in objectives]
	objectives_names = '_'.join(objectives_names)		
	x_train, y_train = x[train_idx], y[train_idx]
	x_validation, y_validation = x[validation_idx], y[validation_idx]
	x_train, y_train = over_sample( # <------ SAMPLING
		x_train, 
		y_train
	)
	problem = GenericOptimizer(
		x_train, 
		y_train, 
		x_validation, 
		y_validation,
		objectives,
		"sequential"
	)
	algorithm = NSGA2(
		pop_size=GenericOptimizer.population_size, 
		sampling=BiasedBinarySampling(y_train, 0.4, 0.7), # <----- POPULATION
		crossover=HUX(), 
		mutation=BitflipMutation(), 
		eliminate_duplicates=True,
	)
	result = minimize(
		problem, 
		algorithm, 
		('n_gen', GenericOptimizer.population_size),
		save_history=False
	)
	package = {
		"file": f"{dataset_name} overSample biasPop {objectives_names}.pickle",
		"train": train_idx,
		"validation": validation_idx,
		"test": test_idx,
		"result": result
	}
	return package

for result in Parallel(n_jobs=-1)(delayed(func)(package) for package in packages):
	with open(os.path.join("results", result['file']), 'wb') as fh:
		pickle.dump(result, fh)

In [34]:
def func(package):
	x, y, train_idx, validation_idx, test_idx, objectives, dataset_name = package
	objectives_names = [re.search(r'\.([a-zA-Z_][a-zA-Z0-9_]*)\s', str(objective_name)).group(1) for objective_name in objectives]
	objectives_names = '_'.join(objectives_names)		
	x_train, y_train = x[train_idx], y[train_idx]
	x_validation, y_validation = x[validation_idx], y[validation_idx]
	# x_train, y_train = over_sample( # <------ SAMPLING
	# 	x_train, 
	# 	y_train
	# )
	problem = GenericOptimizer(
		x_train, 
		y_train, 
		x_validation, 
		y_validation,
		objectives,
		"sequential"
	)
	algorithm = NSGA2(
		pop_size=GenericOptimizer.population_size, 
		sampling=BinaryRandomSampling(), # <----- POPULATION
		crossover=HUX(), 
		mutation=BitflipMutation(), 
		eliminate_duplicates=True,
	)
	result = minimize(
		problem, 
		algorithm, 
		('n_gen', GenericOptimizer.population_size),
		save_history=False
	)
	package = {
		"file": f"{dataset_name} regSample randPop {objectives_names}.pickle",
		"train": train_idx,
		"validation": validation_idx,
		"test": test_idx,
		"result": result
	}
	return package

for result in Parallel(n_jobs=-1)(delayed(func)(package) for package in packages):
	with open(os.path.join("results", result['file']), 'wb') as fh:
		pickle.dump(result, fh)

# Calculate results

In [7]:
datasets = {}
for dat_file in Path('datasets').rglob('*.dat'):

	name = str(dat_file.name).replace(".dat", "")
	dataset, attributes = load_KEEL_dataset(dat_file)

	y = dataset['Class']	
	raw_X = dataset.drop(columns=['Class'])
	
	label_encoder = LabelEncoder()
	y = label_encoder.fit_transform(y)

	pipeline = create_KEEL_preprocessor_pipeline(attributes)

	pipeline.fit(raw_X, y)
	X = pipeline.transform(raw_X)
	label_encoder = LabelEncoder()
	y = label_encoder.fit_transform(y)

	datasets[name] = (X, y)

In [None]:
for name in datasets:
	print(name)

	x, y = datasets[name]

	for c, (train_idx, validation_idx, test_idx) in enumerate(prepare_splits(X, y)):
		
		init_pop = BiasedBinarySampling(y, 0.4, 0.7)
		
		problem = GenericOptimizer(
			x_train, 
			y_train, 
			x_validation, 
			y_validation,
			objectives,
			"sequential"
		)
		algorithm = NSGA2(
			pop_size=GenericOptimizer.population_size, 
			sampling=BinaryRandomSampling(), # <----- POPULATION
			crossover=HUX(), 
			mutation=BitflipMutation(), 
			eliminate_duplicates=True,
		)
		result = minimize(
			problem, 
			algorithm, 
			('n_gen', GenericOptimizer.population_size),
			save_history=False
		)


abalone9-18


AttributeError: 'BiasedBinarySampling' object has no attribute 'X'

In [146]:
for name in datasets:
	print(name)

	x, y = datasets[name]
	print(GenericOptimizer.calculate_IR(y))
	print("\n")

abalone9-18
16.404761904761905


ecoli-0-1-3-7_vs_2-6
39.142857142857146


ecoli4
15.8


glass2
11.588235294117647


glass5
22.77777777777778


haberman
2.7777777777777777


new-thyroid1
5.142857142857143


pima
1.8656716417910448


vehicle0
3.251256281407035


vehicle1
2.8986175115207375


winequality-red-8_vs_6-7
46.5


winequality-white-3-9_vs_5
58.28


wisconsin
1.8577405857740585


yeast-1_vs_7
14.3


yeast1
2.4592074592074593


yeast3
8.104294478527608


yeast4
28.098039215686274




In [36]:
execution_name_to_config = {}
file_to_config = {}
for file in os.listdir("results"):
	segments = file.split("_")
	
	execution_name = "_".join(segments[1:]).replace(".pickle", "")

	if execution_name not in execution_name_to_config:
		execution_name_to_config[execution_name] = []

	execution_name_to_config[execution_name].append(file)
	
	name = file.replace(".pickle", "")
	segments = name.split(" ")

	file_to_config[file] = {
		"Dataset": "_".join(segments[0].split("_")[1:]),
		"Sampling": segments[1],
		"Population": segments[2],
		"Objectives": segments[3],
		"Split Num": int(segments[0].split("_")[0])
	}

In [162]:
loaded_files = {}
for execution_name in execution_name_to_config:
	for config_file in execution_name_to_config[execution_name]:
		with open(f"results/{config_file}", 'rb') as fh:
			result_dict = pickle.load(fh)
		loaded_files[config_file] = result_dict

In [60]:
report_rows = []
comparable_rows = []
file_to_results = {}
for execution_name in execution_name_to_config:
    
	optimized_validation_acc = []
	optimized_test_acc = []
	optimized_ideal_test_acc = []

	baseline_test_acc = []
	baseline_validaion_acc = []

	config_names = []
	validation_inclusions = []
	test_inclusions = []

	for config_file in execution_name_to_config[execution_name]:

		if file_to_config[config_file]['Dataset'] not in datasets:
			continue

		result_dict = loaded_files[config_file]

		train_idx = result_dict['train']
		validation_idx = result_dict['validation']
		test_idx = result_dict['test']
		result = result_dict['result']

		X, y = datasets[file_to_config[config_file]['Dataset']]

		x_train, y_train = X[train_idx], y[train_idx]
		x_validation, y_validation = X[validation_idx], y[validation_idx]
		x_test, y_test = X[test_idx], y[test_idx]

		if file_to_config[config_file]['Sampling'] == "overSample":
			x_train, y_train = over_sample(
				x_train, 
				y_train
			)
			
		num_validation, num_test, baseline_validation_err, baseline_test_err, optimized_validation_err, optimized_test_err, ideal_optimized_test_err = calculate_metrics(
			x_train, 
			y_train, 
			x_validation, 
			y_validation, 
			x_test, 
			y_test, 
			result
		)

		config_names.append(config_file)

		validation_inclusions.append(num_validation)
		test_inclusions.append(num_test)

		baseline_validaion_acc.append(1-baseline_validation_err)
		baseline_test_acc.append(1-baseline_test_err)
		optimized_validation_acc.append(1-optimized_validation_err)
		optimized_test_acc.append(1-optimized_test_err)
		optimized_ideal_test_acc.append(1-ideal_optimized_test_err)

	if file_to_config[config_file]['Dataset'] not in datasets:
		continue
		
	val_pval = ranksums(baseline_validaion_acc, optimized_validation_acc).pvalue
	test_pval = ranksums(baseline_test_acc, optimized_test_acc).pvalue
	ideal_test_pval = ranksums(baseline_test_acc, optimized_ideal_test_acc).pvalue

	report_rows.append({
		"Dataset": file_to_config[config_file]['Dataset'],
		"Sampling": "over sampling" if "overSample" in execution_name else "regular sampling",
		"Population": "random population" if "randPop" in execution_name else "biased population",
		"Optimization": file_to_config[config_file]['Objectives'],

		"Total Size": X.shape[0],
		"Optimized Validation Size": np.mean(validation_inclusions),
		"Optimized Test Size": np.mean(test_inclusions),
		
		"Validation Baseline Acc": np.mean(baseline_validaion_acc),
		"Test Baseline Acc": np.mean(baseline_test_acc),
		
		"Optimized Validation Acc": np.mean(optimized_validation_acc),
		"Optimized Test Acc": np.mean(optimized_test_acc),
		"Ideal Test Acc": np.mean(optimized_ideal_test_acc),
		
		"Validation Diff": np.mean(np.subtract(optimized_validation_acc, baseline_validaion_acc)),
		"Test Diff": np.mean(np.subtract(optimized_test_acc, baseline_test_acc)),
		"Ideal Test Diff": np.mean(np.subtract(optimized_ideal_test_acc, baseline_test_acc)),
		
		"Validation P-value": val_pval,
		"Test P-value": test_pval,
		"Ideal Test P-value": ideal_test_pval
	})

	comparable_rows.append({
		"Dataset": file_to_config[config_file]['Dataset'],
		"Configs": config_names,
		"Sampling": "over sampling" if "overSample" in execution_name else "regular sampling",
		"Population": "random population" if "randPop" in execution_name else "biased population",
		"Optimization": file_to_config[config_file]['Objectives'],

		"Total Size": X.shape[0],
		"Optimized Validation Size": validation_inclusions,
		"Optimized Test Size": test_inclusions,
		
		"Validation Baseline Acc": baseline_validaion_acc,
		"Test Baseline Acc": baseline_test_acc,
		
		"Optimized Validation Acc": optimized_validation_acc,
		"Optimized Test Acc": optimized_test_acc,
		"Ideal Test Acc": optimized_ideal_test_acc,
		
		"Validation Diff": np.subtract(optimized_validation_acc, baseline_validaion_acc),
		"Test Diff": np.subtract(optimized_test_acc, baseline_test_acc),
		"Ideal Test Diff": np.subtract(optimized_ideal_test_acc, baseline_test_acc),
		
		"Validation P-value": val_pval,
		"Test P-value": test_pval,
		"Ideal Test P-value": ideal_test_pval
	})
	


In [43]:
df = pd.DataFrame.from_records(report_rows)
df.to_excel("REPORT.xlsx", index=False)
df

Unnamed: 0,Dataset,Sampling,Population,Optimization,Total Size,Optimized Validation Size,Optimized Test Size,Validation Baseline Acc,Test Baseline Acc,Optimized Validation Acc,Optimized Test Acc,Ideal Test Acc,Validation Diff,Test Diff,Ideal Test Diff,Validation P-value,Test P-value,Ideal Test P-value
0,abalone9-18,over sampling,biased population,calculate_class0_error_calculate_class1_error,731,168.935484,169.645161,0.949233,0.948881,0.976027,0.946765,0.959281,0.026794,-0.002115,0.010400,1.967156e-11,0.352793,0.001733
1,abalone9-18,over sampling,biased population,calculate_overall_error_calculate_num_examples,731,27.903226,13.322581,0.949233,0.948881,0.978495,0.937952,0.955050,0.029261,-0.010929,0.006170,1.544705e-11,0.017347,0.072651
2,abalone9-18,over sampling,biased population,calculate_overall_error_calculate_overall_inve...,731,32.225806,13.290323,0.949233,0.948881,0.981315,0.936894,0.953816,0.032082,-0.011987,0.004936,1.401846e-11,0.001174,0.145080
3,abalone9-18,over sampling,random population,calculate_class0_error_calculate_class1_error,731,191.516129,192.741935,0.949233,0.948881,0.973206,0.947647,0.959633,0.023973,-0.001234,0.010753,3.030386e-11,0.622189,0.000608
4,abalone9-18,over sampling,random population,calculate_overall_error_calculate_num_examples,731,23.806452,13.451613,0.949233,0.948881,0.978495,0.936718,0.953464,0.029261,-0.012163,0.004583,1.544705e-11,0.003486,0.169858
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,yeast4,regular sampling,biased population,calculate_overall_error_calculate_overall_inve...,1484,46.032258,34.387097,0.964177,0.967133,0.987045,0.958351,0.965655,0.022868,-0.008782,-0.001478,1.335353e-11,0.000036,0.563790
217,yeast4,regular sampling,random population,calculate_class0_error_calculate_class1_error,1484,367.806452,367.225806,0.964177,0.967133,0.981045,0.966785,0.971394,0.016868,-0.000348,0.004260,1.401846e-11,0.871382,0.000657
218,yeast4,regular sampling,random population,calculate_overall_error,1484,368.064516,369.709677,0.964786,0.966959,0.980350,0.967133,0.972176,0.015564,0.000174,0.005217,1.471579e-11,0.549614,0.000033
219,yeast4,regular sampling,random population,calculate_overall_error_calculate_num_examples,1484,49.387097,44.451613,0.964177,0.967133,0.984262,0.962438,0.965829,0.020085,-0.004695,-0.001304,1.335353e-11,0.004761,0.485872


In [None]:
# df = pd.DataFrame.from_records(comparable_rows)
# df.to_excel("FOR_COMPARE.xlsx", index=False)
df = pd.read_excel("FOR_COMPARE.xlsx")
df

Unnamed: 0,Dataset,Configs,Sampling,Population,Optimization,Total Size,Optimized Validation Size,Optimized Test Size,Validation Baseline Acc,Test Baseline Acc,Optimized Validation Acc,Optimized Test Acc,Ideal Test Acc,Validation Diff,Test Diff,Ideal Test Diff,Validation P-value,Test P-value,Ideal Test P-value
0,abalone9-18,['0_abalone9-18 overSample biasPop calculate_c...,over sampling,biased population,calculate_class0_error_calculate_class1_error,731,"[np.int64(191), np.int64(168), np.int64(163), ...","[np.int64(176), np.int64(169), np.int64(158), ...","[0.9398907103825137, 0.9508196721311475, 0.950...","[0.9508196721311475, 0.9398907103825137, 0.950...","[0.9781420765027322, 0.9890710382513661, 0.978...","[0.9508196721311475, 0.9234972677595629, 0.956...","[0.9617486338797814, 0.9453551912568307, 0.972...",[0.03825137 0.03825137 0.0273224 0.02185792 0...,[ 0. -0.01639344 0.00546448 -0.005464...,[ 0.01092896 0.00546448 0.02185792 0.010928...,1.967156e-11,0.352793,0.001733
1,abalone9-18,['0_abalone9-18 overSample biasPop calculate_o...,over sampling,biased population,calculate_overall_error_calculate_num_examples,731,"[np.int64(46), np.int64(76), np.int64(8), np.i...","[np.int64(7), np.int64(8), np.int64(8), np.int...","[0.9398907103825137, 0.9508196721311475, 0.950...","[0.9508196721311475, 0.9398907103825137, 0.950...","[0.9781420765027322, 0.9890710382513661, 0.978...","[0.9180327868852459, 0.9234972677595629, 0.961...","[0.9617486338797814, 0.9398907103825137, 0.967...",[0.03825137 0.03825137 0.0273224 0.02185792 0...,[-0.03278689 -0.01639344 0.01092896 -0.005464...,[ 0.01092896 0. 0.01639344 -0.005464...,1.544705e-11,0.017347,0.072651
2,abalone9-18,['0_abalone9-18 overSample biasPop calculate_o...,over sampling,biased population,calculate_overall_error_calculate_overall_inve...,731,"[np.int64(49), np.int64(45), np.int64(21), np....","[np.int64(11), np.int64(11), np.int64(6), np.i...","[0.9398907103825137, 0.9508196721311475, 0.950...","[0.9508196721311475, 0.9398907103825137, 0.950...","[0.9781420765027322, 0.9890710382513661, 0.978...","[0.912568306010929, 0.9180327868852459, 0.9398...","[0.9508196721311475, 0.9398907103825137, 0.956...",[0.03825137 0.03825137 0.0273224 0.0273224 0...,[-0.03825137 -0.02185792 -0.01092896 -0.016393...,[ 0. 0. 0.00546448 0. ...,1.401846e-11,0.001174,0.145080
3,abalone9-18,['0_abalone9-18 overSample randPop calculate_c...,over sampling,random population,calculate_class0_error_calculate_class1_error,731,"[np.int64(192), np.int64(187), np.int64(196), ...","[np.int64(200), np.int64(199), np.int64(201), ...","[0.9398907103825137, 0.9508196721311475, 0.950...","[0.9508196721311475, 0.9398907103825137, 0.950...","[0.9781420765027322, 0.9836065573770492, 0.978...","[0.9508196721311475, 0.9234972677595629, 0.950...","[0.9617486338797814, 0.9453551912568307, 0.972...",[0.03825137 0.03278689 0.0273224 0.01639344 0...,[ 0. -0.01639344 0. 0. ...,[ 0.01092896 0.00546448 0.02185792 0.010928...,3.030386e-11,0.622189,0.000608
4,abalone9-18,['0_abalone9-18 overSample randPop calculate_o...,over sampling,random population,calculate_overall_error_calculate_num_examples,731,"[np.int64(46), np.int64(10), np.int64(9), np.i...","[np.int64(12), np.int64(10), np.int64(7), np.i...","[0.9398907103825137, 0.9508196721311475, 0.950...","[0.9508196721311475, 0.9398907103825137, 0.950...","[0.9781420765027322, 0.9836065573770492, 0.983...","[0.9234972677595629, 0.9289617486338798, 0.939...","[0.9562841530054644, 0.9289617486338798, 0.950...",[0.03825137 0.03278689 0.03278689 0.0273224 0...,[-0.0273224 -0.01092896 -0.01092896 -0.027322...,[ 0.00546448 -0.01092896 0. 0. ...,1.544705e-11,0.003486,0.169858
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,yeast4,['0_yeast4 regSample biasPop calculate_overall...,regular sampling,biased population,calculate_overall_error_calculate_overall_inve...,1484,"[np.int64(53), np.int64(24), np.int64(56), np....","[np.int64(32), np.int64(24), np.int64(49), np....","[0.9649595687331537, 0.967654986522911, 0.9622...","[0.967654986522911, 0.967654986522911, 0.97574...","[0.9919137466307277, 0.9838274932614556, 0.991...","[0.9595687331536388, 0.9703504043126685, 0.973...","[0.9622641509433962, 0.9730458221024259, 0.978...",[0.02695418 0.01617251 0.0296496 0.00808625 0...,[-0.00808625 0.00269542 -0.00269542 -0.018867...,[-0.00539084 0.00539084 0.00269542 -0.002695...,1.335353e-11,0.000036,0.563790
217,yeast4,['0_yeast4 regSample randPop calculate_class0_...,regular sampling,random population,calculate_class0_error_calculate_class1_error,1484,"[np.int64(365), np.int64(386), np.int64(334), ...","[np.int64(376), np.int64(373), np.int64(342), ...","[0.9649595687331537, 0.967654986522911, 0.9622...","[0.967654986522911, 0.967654986522911, 0.97574...","[0.9811320754716981, 0.9784366576819407, 0.983...","[0.967654986522911, 0.9730458221024259, 0.9730...","[0.9730458221024259, 0.9757412398921833, 0.975...",[0.01617251 0.01078167 0.02156334 0.00808625 0...,[ 0. 0.00539084 -0.00269542 -0.010781...,[ 0.00539084 0.00808625 0. -0.005390...,1.401846e-11,0.871382,0.000657
218,yeast4,['0_yeast4 regSample randPop calculate_overall...,regular sampling,random population,calculate_overall_error,1484,"[np.int64(346), np.int64(354), np.int64(357), ...","[np.int64(335), np.int64(369), np.int64(356), ...","[0.9649595687331537, 0.9622641509433962, 0.964...","[0.9622641509433962, 0.9757412398921833, 0.964...","[0.9838274932614556, 0.9757412398921833, 0.981...","[0.9595687331536388, 0.967654986522911, 0.9676...","[0.9649595687331537, 0.9730458221024259, 0.975...",[0.01886792 0.01347709 0.01617251 0.01347709 0...,[-0.00269542 -0.00808625 0.00269542 -0.005390...,[ 0.00269542 -0.00269542 0.01078167 0.008086...,1.471579e-11,0.549614,0.000033
219,yeast4,['0_yeast4 regSample randPop calculate_overall...,regular sampling,random population,calculate_overall_error_calculate_num_examples,1484,"[np.int64(59), np.int64(24), np.int64(38), np....","[np.int64(59), np.int64(24), np.int64(38), np....","[0.9649595687331537, 0.967654986522911, 0.9622...","[0.967654986522911, 0.967654986522911, 0.97574...","[0.9892183288409704, 0.9784366576819407, 0.986...","[0.9730458221024259, 0.9649595687331537, 0.967...","[0.9730458221024259, 0.967654986522911, 0.9676...",[0.02425876 0.01078167 0.02425876 0.01347709 0...,[ 0.00539084 -0.00269542 -0.00808625 -0.013477...,[ 0.00539084 0. -0.00808625 -0.008086...,1.335353e-11,0.004761,0.485872


# Winners calculation

## Reguar VS Over sampling

In [148]:
WTL = {
	"Optimized Validation Acc": {"Over sample wins": 0, "Over sample losses": 0, "Tie": 0},
	"Optimized Test Acc": {"Over sample wins": 0, "Over sample losses": 0, "Tie": 0},
	"Ideal Test Acc": {"Over sample wins": 0, "Over sample losses": 0, "Tie": 0},
	"Optimized Validation Size": {"Over sample wins": 0, "Over sample losses": 0, "Tie": 0},
	"Optimized Test Size": {"Over sample wins": 0, "Over sample losses": 0, "Tie": 0}
}

values = []
for dataset in df['Dataset'].unique():
    
	dataset_rows = df.where(df["Dataset"] == dataset).dropna()
	for optimization in dataset_rows['Optimization'].unique():
		for population in dataset_rows['Population'].unique():

			try:
				optimizer_row = df.where(dataset_rows["Optimization"] == optimization).dropna() 
				population_row = df.where(optimizer_row["Population"] == population).dropna()
				
				over_sample = df.where(population_row["Sampling"] == "over sampling").dropna()
				regular_sample = df.where(population_row["Sampling"] == "regular sampling").dropna()

				for column in WTL:
					over_sample_values = np.array(list(over_sample[column])[0])
					regular_sample_values = np.array(list(regular_sample[column])[0])

					if ranksums(over_sample_values, regular_sample_values).pvalue >= 0.05: WTL[column]['Tie'] += 1
					elif np.mean(np.subtract(over_sample_values, regular_sample_values)) > 0: 
						values.append(dataset)
						WTL[column]["Over sample wins"] += 1
					else: WTL[column]["Over sample losses"] += 1	

			except Exception as e:
				pass
	
WTL

{'Optimized Validation Acc': {'Over sample wins': 30,
  'Over sample losses': 8,
  'Tie': 64},
 'Optimized Test Acc': {'Over sample wins': 2,
  'Over sample losses': 16,
  'Tie': 84},
 'Ideal Test Acc': {'Over sample wins': 4,
  'Over sample losses': 12,
  'Tie': 86},
 'Optimized Validation Size': {'Over sample wins': 70,
  'Over sample losses': 0,
  'Tie': 32},
 'Optimized Test Size': {'Over sample wins': 63,
  'Over sample losses': 0,
  'Tie': 39}}

In [None]:
# pd.DataFrame(values).value_counts()

## Random vs Bias

In [9]:
WTL = {
	"Optimized Validation Acc": {"Bias pop wins": 0, "Bias pop losses": 0, "Tie": 0},
	"Optimized Test Acc": {"Bias pop wins": 0, "Bias pop losses": 0, "Tie": 0},
	"Ideal Test Acc": {"Bias pop wins": 0, "Bias pop losses": 0, "Tie": 0},
	"Optimized Validation Size": {"Bias pop wins": 0, "Bias pop losses": 0, "Tie": 0},
	"Optimized Test Size": {"Bias pop wins": 0, "Bias pop losses": 0, "Tie": 0}
}

for dataset in df['Dataset'].unique():
    
	dataset_rows = df.where(df["Dataset"] == dataset).dropna()
	for optimization in dataset_rows['Optimization'].unique():
		for sampling in dataset_rows['Sampling'].unique():

			try:
				optimizer_row = df.where(dataset_rows["Optimization"] == optimization).dropna() 
				sampling_row = df.where(optimizer_row["Sampling"] == sampling).dropna()
				
				random_pop = df.where(sampling_row["Population"] == "random population").dropna()
				bias_pop = df.where(sampling_row["Population"] == "biased population").dropna()

				for column in WTL:
					random_pop_values = np.array(list(random_pop[column])[0])
					bias_pop_values = np.array(list(bias_pop[column])[0])

					if ranksums(bias_pop_values, random_pop_values).pvalue >= 0.05: WTL[column]['Tie'] += 1
					elif np.mean(np.subtract(bias_pop_values, random_pop_values)) > 0: WTL[column]["Bias pop wins"] += 1
					else: WTL[column]["Bias pop losses"] += 1	

			except Exception as e:
				pass

WTL
# for key in WTL:
# 	print(key, f"{WTL[key]['Bias pop wins']}/{WTL[key]['Bias pop losses']}/{WTL[key]['Tie']}.")

{'Optimized Validation Acc': {'Bias pop wins': 0,
  'Bias pop losses': 0,
  'Tie': 102},
 'Optimized Test Acc': {'Bias pop wins': 0, 'Bias pop losses': 0, 'Tie': 102},
 'Ideal Test Acc': {'Bias pop wins': 0, 'Bias pop losses': 0, 'Tie': 102},
 'Optimized Validation Size': {'Bias pop wins': 0,
  'Bias pop losses': 0,
  'Tie': 102},
 'Optimized Test Size': {'Bias pop wins': 0, 'Bias pop losses': 0, 'Tie': 102}}

## Baseline vs Class sensitive error

In [4]:
WTL = {
	"Optimized Validation Acc": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Optimized Test Acc": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Ideal Test Acc": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Optimized Validation Size": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Optimized Test Size": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0}
}

for dataset in df['Dataset'].unique():
	for sampling in dataset_rows['Sampling'].unique():
		for population in dataset_rows['Population'].unique():
			try:
				rows = df.where(df["Dataset"] == dataset).dropna()
				rows = df.where(rows["Sampling"] == sampling).dropna() 
				rows = df.where(rows["Population"] == population).dropna()
				rows = df.where(rows["Optimization"] == "calculate_class0_error_calculate_class1_error").dropna()

				validation_acc_baseline = np.array(list(rows['Validation Baseline Acc'])[0])
				optimized_validation_acc = np.array(list(rows['Optimized Validation Acc'])[0])

				test_acc_baseline = np.array(list(rows['Test Baseline Acc'])[0])
				optimized_test_acc = np.array(list(rows['Optimized Test Acc'])[0])
				ideal_test_acc = np.array(list(rows['Ideal Test Acc'])[0])

				dataset_size = np.array(list(rows['Total Size'])[0])
				validation_size = np.array(list(rows['Optimized Validation Size'])[0])
				ideal_test_size = np.array(list(rows['Optimized Test Size'])[0])

				ideal_validation_reduction_rates = []
				for size_of_ideal_validation_instance in validation_size:
					ideal_validation_reduction_rates.append((dataset_size - size_of_ideal_validation_instance) / dataset_size)

				ideal_test_reduction_rates = []
				for size_of_ideal_test_instance in ideal_test_size:
					ideal_test_reduction_rates.append((dataset_size - size_of_ideal_test_instance) / dataset_size)
					

				# Validation
				if ranksums(optimized_validation_acc, validation_acc_baseline).pvalue >= 0.05: 
					WTL["Optimized Validation Acc"]['Tie'] += 1
				elif np.mean(np.subtract(optimized_validation_acc, validation_acc_baseline)) > 0: 
					WTL["Optimized Validation Acc"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Validation Acc"]["Optimizer losses"] += 1	

				# Test
				if ranksums(optimized_test_acc, test_acc_baseline).pvalue >= 0.05: 
					WTL["Optimized Test Acc"]['Tie'] += 1
				elif np.mean(np.subtract(optimized_test_acc, test_acc_baseline)) > 0: 
					WTL["Optimized Test Acc"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Test Acc"]["Optimizer losses"] += 1	

				# Ideal Test
				if ranksums(ideal_test_acc, test_acc_baseline).pvalue >= 0.05: 
					WTL["Ideal Test Acc"]['Tie'] += 1
				elif np.mean(np.subtract(ideal_test_acc, test_acc_baseline)) > 0: 
					WTL["Ideal Test Acc"]["Optimizer wins"] += 1
				else: 
					WTL["Ideal Test Acc"]["Optimizer losses"] += 1	

				# Optimized validation size
				if ranksums(0, ideal_validation_reduction_rates).pvalue >= 0.05: 
					WTL["Optimized Validation Size"]['Tie'] += 1
				elif np.mean(ideal_validation_reduction_rates) > 0.1: 
					WTL["Optimized Validation Size"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Validation Size"]["Optimizer losses"] += 1	


				print(dataset)
				print(dataset_size)
				print(ideal_test_reduction_rates)
				print("\n")
				# Optimized ideal test size
				if ranksums(0, ideal_test_reduction_rates).pvalue >= 0.05: 
					WTL["Optimized Test Size"]['Tie'] += 1
				elif np.mean(ideal_test_reduction_rates) > 0.1: 
					WTL["Optimized Test Size"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Test Size"]["Optimizer losses"] += 1

			except Exception as e:
				print(e)

WTL
# for key in WTL:
# 	print(key, f"\n{WTL[key]['Optimizer wins']}/{WTL[key]['Optimizer losses']}/{WTL[key]['Tie']}.\n\n")

NameError: name 'dataset_rows' is not defined

## Baseline vs Error + number of examples

In [153]:
WTL = {
	"Optimized Validation Acc": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Optimized Test Acc": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Ideal Test Acc": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Optimized Validation Size": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Optimized Test Size": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0}
}

for dataset in df['Dataset'].unique():
	for sampling in dataset_rows['Sampling'].unique():
		for population in dataset_rows['Population'].unique():
			try:
				rows = df.where(df["Dataset"] == dataset).dropna()
				rows = df.where(rows["Sampling"] == sampling).dropna() 
				rows = df.where(rows["Population"] == population).dropna()
				rows = df.where(rows["Optimization"] == "calculate_overall_error_calculate_num_examples").dropna()

				validation_acc_baseline = np.array(list(rows['Validation Baseline Acc'])[0])
				optimized_validation_acc = np.array(list(rows['Optimized Validation Acc'])[0])

				test_acc_baseline = np.array(list(rows['Test Baseline Acc'])[0])
				optimized_test_acc = np.array(list(rows['Optimized Test Acc'])[0])
				ideal_test_acc = np.array(list(rows['Ideal Test Acc'])[0])

				dataset_size = np.array(list(rows['Total Size'])[0])
				validation_size = np.array(list(rows['Optimized Validation Size'])[0])
				ideal_test_size = np.array(list(rows['Optimized Test Size'])[0])

				ideal_validation_reduction_rates = []
				for size_of_ideal_validation_instance in validation_size:
					ideal_validation_reduction_rates.append((dataset_size - size_of_ideal_validation_instance) / dataset_size)

				ideal_test_reduction_rates = []
				for size_of_ideal_test_instance in ideal_test_size:
					ideal_test_reduction_rates.append((dataset_size - size_of_ideal_test_instance) / dataset_size)
					

				# Validation
				if ranksums(optimized_validation_acc, validation_acc_baseline).pvalue >= 0.05: 
					WTL["Optimized Validation Acc"]['Tie'] += 1
				elif np.mean(np.subtract(optimized_validation_acc, validation_acc_baseline)) > 0: 
					WTL["Optimized Validation Acc"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Validation Acc"]["Optimizer losses"] += 1	

				# Test
				if ranksums(optimized_test_acc, test_acc_baseline).pvalue >= 0.05: 
					WTL["Optimized Test Acc"]['Tie'] += 1
				elif np.mean(np.subtract(optimized_test_acc, test_acc_baseline)) > 0: 
					WTL["Optimized Test Acc"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Test Acc"]["Optimizer losses"] += 1	

				# Ideal Test
				if ranksums(ideal_test_acc, test_acc_baseline).pvalue >= 0.05: 
					WTL["Ideal Test Acc"]['Tie'] += 1
				elif np.mean(np.subtract(ideal_test_acc, test_acc_baseline)) > 0: 
					WTL["Ideal Test Acc"]["Optimizer wins"] += 1
				else: 
					WTL["Ideal Test Acc"]["Optimizer losses"] += 1	

				# Optimized validation size
				if ranksums(0, ideal_validation_reduction_rates).pvalue >= 0.05: 
					WTL["Optimized Validation Size"]['Tie'] += 1
				elif np.mean(ideal_validation_reduction_rates) > 0.1: 
					WTL["Optimized Validation Size"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Validation Size"]["Optimizer losses"] += 1	

				# Optimized ideal test size
				if ranksums(0, ideal_test_reduction_rates).pvalue >= 0.05: 
					WTL["Optimized Test Size"]['Tie'] += 1
				elif np.mean(ideal_test_reduction_rates) > 0.1: 
					WTL["Optimized Test Size"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Test Size"]["Optimizer losses"] += 1

			except Exception as e:
				print(e)

WTL
# for key in WTL:
# 	print(key, f"\n{WTL[key]['Optimizer wins']}/{WTL[key]['Optimizer losses']}/{WTL[key]['Tie']}.\n\n")

{'Optimized Validation Acc': {'Optimizer wins': 67,
  'Optimizer losses': 0,
  'Tie': 1},
 'Optimized Test Acc': {'Optimizer wins': 5,
  'Optimizer losses': 31,
  'Tie': 32},
 'Ideal Test Acc': {'Optimizer wins': 27, 'Optimizer losses': 0, 'Tie': 41},
 'Optimized Validation Size': {'Optimizer wins': 0,
  'Optimizer losses': 0,
  'Tie': 68},
 'Optimized Test Size': {'Optimizer wins': 0,
  'Optimizer losses': 0,
  'Tie': 68}}

## Baseline vs Error+Number of examples+Inverse F1

In [154]:
WTL = {
	"Optimized Validation Acc": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Optimized Test Acc": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Ideal Test Acc": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Optimized Validation Size": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Optimized Test Size": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0}
}

for dataset in df['Dataset'].unique():
	for sampling in dataset_rows['Sampling'].unique():
		for population in dataset_rows['Population'].unique():
			try:
				rows = df.where(df["Dataset"] == dataset).dropna()
				rows = df.where(rows["Sampling"] == sampling).dropna() 
				rows = df.where(rows["Population"] == population).dropna()
				rows = df.where(rows["Optimization"] == "calculate_overall_error_calculate_overall_inverse_f1_calculate_num_examples").dropna()

				validation_acc_baseline = np.array(list(rows['Validation Baseline Acc'])[0])
				optimized_validation_acc = np.array(list(rows['Optimized Validation Acc'])[0])

				test_acc_baseline = np.array(list(rows['Test Baseline Acc'])[0])
				optimized_test_acc = np.array(list(rows['Optimized Test Acc'])[0])
				ideal_test_acc = np.array(list(rows['Ideal Test Acc'])[0])

				dataset_size = np.array(list(rows['Total Size'])[0])
				validation_size = np.array(list(rows['Optimized Validation Size'])[0])
				ideal_test_size = np.array(list(rows['Optimized Test Size'])[0])

				ideal_validation_reduction_rates = []
				for size_of_ideal_validation_instance in validation_size:
					ideal_validation_reduction_rates.append((dataset_size - size_of_ideal_validation_instance) / dataset_size)

				ideal_test_reduction_rates = []
				for size_of_ideal_test_instance in ideal_test_size:
					ideal_test_reduction_rates.append((dataset_size - size_of_ideal_test_instance) / dataset_size)
					

				# Validation
				if ranksums(optimized_validation_acc, validation_acc_baseline).pvalue >= 0.05: 
					WTL["Optimized Validation Acc"]['Tie'] += 1
				elif np.mean(np.subtract(optimized_validation_acc, validation_acc_baseline)) > 0: 
					WTL["Optimized Validation Acc"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Validation Acc"]["Optimizer losses"] += 1	

				# Test
				if ranksums(optimized_test_acc, test_acc_baseline).pvalue >= 0.05: 
					WTL["Optimized Test Acc"]['Tie'] += 1
				elif np.mean(np.subtract(optimized_test_acc, test_acc_baseline)) > 0: 
					WTL["Optimized Test Acc"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Test Acc"]["Optimizer losses"] += 1	

				# Ideal Test
				if ranksums(ideal_test_acc, test_acc_baseline).pvalue >= 0.05: 
					WTL["Ideal Test Acc"]['Tie'] += 1
				elif np.mean(np.subtract(ideal_test_acc, test_acc_baseline)) > 0: 
					WTL["Ideal Test Acc"]["Optimizer wins"] += 1
				else: 
					WTL["Ideal Test Acc"]["Optimizer losses"] += 1	

				# Optimized validation size
				if ranksums(0, ideal_validation_reduction_rates).pvalue >= 0.05: 
					WTL["Optimized Validation Size"]['Tie'] += 1
				elif np.mean(ideal_validation_reduction_rates) > 0.1: 
					WTL["Optimized Validation Size"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Validation Size"]["Optimizer losses"] += 1	

				# Optimized ideal test size
				if ranksums(0, ideal_test_reduction_rates).pvalue >= 0.05: 
					WTL["Optimized Test Size"]['Tie'] += 1
				elif np.mean(ideal_test_reduction_rates) > 0.1: 
					WTL["Optimized Test Size"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Test Size"]["Optimizer losses"] += 1

			except Exception as e:
				print(e)

WTL
# for key in WTL:
# 	print(key, f"\n{WTL[key]['Optimizer wins']}/{WTL[key]['Optimizer losses']}/{WTL[key]['Tie']}.\n\n")

{'Optimized Validation Acc': {'Optimizer wins': 67,
  'Optimizer losses': 0,
  'Tie': 1},
 'Optimized Test Acc': {'Optimizer wins': 3,
  'Optimizer losses': 36,
  'Tie': 29},
 'Ideal Test Acc': {'Optimizer wins': 25, 'Optimizer losses': 3, 'Tie': 40},
 'Optimized Validation Size': {'Optimizer wins': 0,
  'Optimizer losses': 0,
  'Tie': 68},
 'Optimized Test Size': {'Optimizer wins': 0,
  'Optimizer losses': 0,
  'Tie': 68}}

## Baseline vs Error

In [157]:
WTL = {
	"Optimized Validation Acc": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Optimized Test Acc": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Ideal Test Acc": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Optimized Validation Size": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0},
	"Optimized Test Size": {"Optimizer wins": 0, "Optimizer losses": 0, "Tie": 0}
}

for dataset in df['Dataset'].unique():
	for sampling in dataset_rows['Sampling'].unique():
		for population in dataset_rows['Population'].unique():
			try:
				rows = df.where(df["Dataset"] == dataset).dropna()
				rows = df.where(rows["Sampling"] == sampling).dropna() 
				rows = df.where(rows["Population"] == population).dropna()
				rows = df.where(rows["Optimization"] == "calculate_overall_error").dropna()

				validation_acc_baseline = np.array(list(rows['Validation Baseline Acc'])[0])
				optimized_validation_acc = np.array(list(rows['Optimized Validation Acc'])[0])

				test_acc_baseline = np.array(list(rows['Test Baseline Acc'])[0])
				optimized_test_acc = np.array(list(rows['Optimized Test Acc'])[0])
				ideal_test_acc = np.array(list(rows['Ideal Test Acc'])[0])

				dataset_size = np.array(list(rows['Total Size'])[0])
				validation_size = np.array(list(rows['Optimized Validation Size'])[0])
				ideal_test_size = np.array(list(rows['Optimized Test Size'])[0])

				ideal_validation_reduction_rates = []
				for size_of_ideal_validation_instance in validation_size:
					ideal_validation_reduction_rates.append((dataset_size - size_of_ideal_validation_instance) / dataset_size)

				ideal_test_reduction_rates = []
				for size_of_ideal_test_instance in ideal_test_size:
					ideal_test_reduction_rates.append((dataset_size - size_of_ideal_test_instance) / dataset_size)
					

				# Validation
				if ranksums(optimized_validation_acc, validation_acc_baseline).pvalue >= 0.05: 
					WTL["Optimized Validation Acc"]['Tie'] += 1
				elif np.mean(np.subtract(optimized_validation_acc, validation_acc_baseline)) > 0: 
					WTL["Optimized Validation Acc"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Validation Acc"]["Optimizer losses"] += 1	

				# Test
				if ranksums(optimized_test_acc, test_acc_baseline).pvalue >= 0.05: 
					WTL["Optimized Test Acc"]['Tie'] += 1
				elif np.mean(np.subtract(optimized_test_acc, test_acc_baseline)) > 0: 
					WTL["Optimized Test Acc"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Test Acc"]["Optimizer losses"] += 1	

				# Ideal Test
				if ranksums(ideal_test_acc, test_acc_baseline).pvalue >= 0.05: 
					WTL["Ideal Test Acc"]['Tie'] += 1
				elif np.mean(np.subtract(ideal_test_acc, test_acc_baseline)) > 0: 
					WTL["Ideal Test Acc"]["Optimizer wins"] += 1
				else: 
					WTL["Ideal Test Acc"]["Optimizer losses"] += 1	

				# Optimized validation size
				if ranksums(0, ideal_validation_reduction_rates).pvalue >= 0.05: 
					WTL["Optimized Validation Size"]['Tie'] += 1
				elif np.mean(ideal_validation_reduction_rates) > 0.1: 
					WTL["Optimized Validation Size"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Validation Size"]["Optimizer losses"] += 1	

				# Optimized ideal test size
				if ranksums(0, ideal_test_reduction_rates).pvalue >= 0.05: 
					WTL["Optimized Test Size"]['Tie'] += 1
				elif np.mean(ideal_test_reduction_rates) > 0.1: 
					WTL["Optimized Test Size"]["Optimizer wins"] += 1
				else: 
					WTL["Optimized Test Size"]["Optimizer losses"] += 1

			except Exception as e:
				# print(e)
				pass

WTL
# for key in WTL:
	# print(key, f"\n{WTL[key]['Optimizer wins']}/{WTL[key]['Optimizer losses']}/{WTL[key]['Tie']}.\n\n")

{'Optimized Validation Acc': {'Optimizer wins': 12,
  'Optimizer losses': 2,
  'Tie': 3},
 'Optimized Test Acc': {'Optimizer wins': 1, 'Optimizer losses': 1, 'Tie': 15},
 'Ideal Test Acc': {'Optimizer wins': 8, 'Optimizer losses': 2, 'Tie': 7},
 'Optimized Validation Size': {'Optimizer wins': 0,
  'Optimizer losses': 0,
  'Tie': 17},
 'Optimized Test Size': {'Optimizer wins': 0,
  'Optimizer losses': 0,
  'Tie': 17}}

# Visualizations

In [None]:
import matplotlib.pyplot as plt

result_dict = loaded_files["10_abalone19 overSample biasPop calculate_class0_error_calculate_class1_error.pickle"]
result = result_dict['result']

plt.figure(figsize=(7, 5))

print(result.F.shape)
plt.scatter(result.F[:, 0], result.F[:, 1], s=30, facecolors='none', edgecolors='blue')
plt.title("Pareto Front Space")
plt.show()