In [None]:
from pymoo.core.problem import Problem
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.core.sampling import Sampling
from pymoo.operators.crossover.hux import HUX
from pymoo.operators.mutation.bitflip import BitflipMutation
from pymoo.optimize import minimize
from pymoo.util.nds.non_dominated_sorting import NonDominatedSorting
from pymoo.operators.sampling.rnd import BinaryRandomSampling
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

from joblib import Parallel, delayed
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

from utils import *

display_pca = False
N_NEIGHBOURS = 5

blank_row = {"Label": '---', 
		    "Num Features": '---', 
		    "Num Examples": '---', 
		    "Training Size": '---', 
		    "Minority Class": '---', 
			"Base - Test C0 Acc": '---', 
		    "Base - Test C1 Acc": '---', 
		    "Base - Test Acc": '---', 
			"Base - Val C0 Acc": '---', 
		    "Base - Val C1 Acc": '---', 
		    "Base - Val Acc": '---', 
		    "Optimized - Test C0 Acc": '---', 
		    "Optimized - Test C1 Acc": '---', 
		    "Optimized - Test Acc": '---', 
			"Num Examples for Test Best": '---', 
			"Optimized - Val C0 Acc": '---', 
		    "Optimized - Val C1 Acc": '---', 
		    "Optimized - Val Acc": '---', 
			"Num Examples for Val Best": '---', 
			"Baseline IR": '---', 
		    "Optimized IR": '---'}

all_headers = ['Label', 
               'Num Features', 
               'Num Examples', 
               'Training Size', 
               'Minority Class', 
               'Base - Test C0 Acc', 
               'Base - Test C1 Acc', 
               'Base - Test Acc', 
               'Base - Val C0 Acc', 
               'Base - Val C1 Acc', 
               'Base - Val Acc', 
               'Optimized - Test C0 Acc', 
               'Optimized - Test C1 Acc', 
               'Optimized - Test Acc', 
               'Num Examples for Test Best', 
               'Optimized - Val C0 Acc', 
               'Optimized - Val C1 Acc', 
               'Optimized - Val Acc', 
               'Num Examples for Val Best', 
               'Baseline IR', 
               'Optimized IR']

# Overall function definitions

In [None]:
class BiasedBinarySampling(Sampling):
	def __init__(self, labels, major_prob, minor_prob):
		
		self.labels = labels
		counts = pd.DataFrame(labels).value_counts()
		if counts[0] > counts[1]:
			self.c0_thresh = major_prob
			self.c1_thresh = minor_prob
		else:
			self.c0_thresh = minor_prob
			self.c1_thresh = major_prob

		super().__init__()

	def _do(self, problem, n_samples, **kwargs):

		rands = np.random.random((n_samples, problem.n_var))
		init_pops = np.zeros((n_samples, problem.n_var), dtype=bool)

		for idx, label in enumerate(self.labels):
			if label == 0:
				init_pops[:, idx] = (rands[:, idx] < self.c0_thresh).astype(bool)
			if label == 1:
				init_pops[:, idx] = (rands[:, idx] < self.c1_thresh).astype(bool)


		return init_pops
	
class InstanceSelectionProblem_2_Obj(Problem):
	def __init__(self, X_train, y_train, X_val, y_val):
		
		self.X_train = X_train
		self.y_train = y_train

		self.X_val = X_val
		self.y_val = y_val

		self.training_data = X_train
		self.n_instances = X_train.shape[0]
		
		super().__init__(
			n_var=self.n_instances,
			n_obj=2,               
			n_constr=0,            
			xl=0,                  
			xu=1,                  
			type_var=np.bool_,     
		)

	def _evaluate(self, x, out, *args, **kwargs):
		# Calculate number of examples in each instance
		f1 = np.sum(x, axis=1)

		# Calculate inverse accuracy
		f2 = Parallel(n_jobs=-1)(delayed(self.train_model)(instance) for instance in x)

		out["F"] = np.column_stack([f1, f2])

	def train_model(self, instance):
		x_train_filtered, y_train_filtered = self.X_train[instance], self.y_train[instance]
		
		num_included_instances = x_train_filtered.shape[0]

		if num_included_instances >= N_NEIGHBOURS:
			optimization_knn = KNeighborsClassifier(n_neighbors=N_NEIGHBOURS)
			optimization_knn.fit(x_train_filtered, y_train_filtered)

			y_pred = optimization_knn.predict(self.X_val)
			acc = accuracy_score(self.y_val, y_pred)
			return 1-acc
		else:
			return 1

class InstanceSelectionProblem_3_Obj(Problem):
	def __init__(self, X_train, y_train, X_val, y_val):
		
		self.X_train = X_train
		self.y_train = y_train

		self.X_val = X_val
		self.y_val = y_val

		self.training_data = X_train
		self.n_instances = X_train.shape[0]
		
		super().__init__(
			n_var=self.n_instances,
			n_obj=3,               
			n_constr=0,            
			xl=0,                  
			xu=1,                  
			type_var=np.bool_,     
		)

	def _evaluate(self, x, out, *args, **kwargs):
		# Calculate number of examples in each instance
		f1 = np.sum(x, axis=1)

		# Calculate inverse accuracy
		f2 = Parallel(n_jobs=-1)(delayed(self.train_model)(instance) for instance in x)

		f3 = Parallel(n_jobs=-1)(delayed(self.calculate_instance_IR)(instance) for instance in x)
		
		out["F"] = np.column_stack([f1, f2, f3])

	def calculate_instance_IR(self, instance):
		num_1_class = np.sum(self.y_train[instance])
		num_0_class = self.n_instances - num_1_class
		IR = max(num_0_class, num_1_class) / min(num_0_class, num_1_class)
		return IR

	def train_model(self, instance):
		x_train_filtered, y_train_filtered = self.X_train[instance], self.y_train[instance]
		
		num_included_instances = x_train_filtered.shape[0]

		if num_included_instances >= N_NEIGHBOURS:
			optimization_knn = KNeighborsClassifier(n_neighbors=N_NEIGHBOURS)
			optimization_knn.fit(x_train_filtered, y_train_filtered)

			y_pred = optimization_knn.predict(self.X_val)
			acc = accuracy_score(self.y_val, y_pred)
			return 1-acc
		else:
			return 1

class InstanceSelectionProblem_2_Obj_MinMaxAcc(Problem):
	def __init__(self, X_train, y_train, X_val, y_val):
		
		self.X_train = X_train
		self.y_train = y_train

		self.X_val = X_val
		self.y_val = y_val

		self.training_data = X_train
		self.n_instances = X_train.shape[0]
		
		super().__init__(
			n_var=self.n_instances,
			n_obj=2,               
			n_constr=0,            
			xl=0,                  
			xu=1,                  
			type_var=np.bool_,     
		)

	def _evaluate(self, x, out, *args, **kwargs):
		objectives = Parallel(n_jobs=-1)(delayed(self.train_model)(instance) for instance in x)
		f1 = [obj[0] for obj in objectives] # Class 0 error
		f2 = [obj[1] for obj in objectives] # Class 1 error
		out["F"] = np.column_stack([f1, f2])

	def train_model(self, instance):
		print(f">>>> {instance}")
		x_train_filtered, y_train_filtered = self.X_train[instance], self.y_train[instance]
		num_included_instances = x_train_filtered.shape[0]

		if num_included_instances >= N_NEIGHBOURS:
			optimization_knn = KNeighborsClassifier(n_neighbors=N_NEIGHBOURS)
			optimization_knn.fit(x_train_filtered, y_train_filtered)

			class_1_indices = np.where(self.y_val==1)
			class_0_indices = np.where(self.y_val==0)

			class_1_x_val = self.X_val[class_1_indices]
			class_0_x_val = self.X_val[class_0_indices]

			class_1_y_val = self.y_val[class_1_indices]
			class_0_y_val = self.y_val[class_0_indices]
			
			class_1_pred = optimization_knn.predict(class_1_x_val)
			class_1_acc = accuracy_score(class_1_y_val, class_1_pred)

			class_0_pred = optimization_knn.predict(class_0_x_val)
			class_0_acc = accuracy_score(class_0_y_val, class_0_pred)

			return (1-class_0_acc, 1-class_1_acc)
		else:
			return (1, 1)

def report_table_1(results):
	display_columns = ['Label', 'Num Features', 'Num Examples', 'Training Size', 'Minority Class', '-----', 'Baseline IR', 'Optimized IR']
	data = []
	for result in results:
		data.append([])
		for idx, key in enumerate(display_columns):
			if key in result:
				data[-1].append(result[key])
			else:
				data[-1].append("--")
							
	return pd.DataFrame(data, columns=display_columns)

def report_table_2(results):
	display_columns = ['Label', 
					'Base - Test Acc', 
					'Base - Test C0 Acc', 
					'Base - Test C1 Acc', 
					'-----',
					
					'Optimized - Test Acc', 
					'Optimized - Test C0 Acc', 
					'Optimized - Test C1 Acc', 
					'Num Examples for Test Best', 
					'-----',
					
					'Optimized - Val Acc', 
					'Optimized - Val C0 Acc', 
					'Optimized - Val C1 Acc', 
					'Num Examples for Val Best']
	data = []
	for result in results:
		data.append([])
		for idx, key in enumerate(display_columns):
			if key in result:
				data[-1].append(result[key])
			else:
				data[-1].append("-----")
							
	return pd.DataFrame(data, columns=display_columns)
	
def parse_dataset(path, name, over_sample=False, random_state=None):
	try:
		df = pd.read_csv(path, delimiter=', ')
		X = df.drop(columns='Class')
		y = df['Class']
	except KeyError:
		df = pd.read_csv(path, delimiter=',')
		X = df.drop(columns='Class')
		y = df['Class']

	# Generate train, validation, and test sets
	label_encoder = LabelEncoder()
	y_encoded = label_encoder.fit_transform(y)
	X_train, X_val, X_test, y_train, y_val, y_test = split_and_scale_datasets(X, y_encoded, random_state=random_state)

	if over_sample:
		class_0_count, class_1_count, IR = set_summary(y_train, "y_train", False)
		if class_0_count > class_1_count:
			minority_class_indicies = np.where(y_train == 1)
		else:
			minority_class_indicies = np.where(y_train == 0)

		X_train = np.concatenate((X_train, X_train[minority_class_indicies]), axis=0)
		y_train = np.concatenate((y_train, y_train[minority_class_indicies]), axis=0)

	return [X, y, X_train, X_val, X_test, y_train, y_val, y_test, name]

def class_based_accuracy(model, x, y):
	class_1_indices = np.where(y==1)
	class_0_indices = np.where(y==0)

	class_1_x = x[class_1_indices]
	class_0_x = x[class_0_indices]

	class_1_y = y[class_1_indices]
	class_0_y = y[class_0_indices]
	
	class_1_pred = model.predict(class_1_x)
	class_1_acc = accuracy_score(class_1_y, class_1_pred)

	class_0_pred = model.predict(class_0_x)
	class_0_acc = accuracy_score(class_0_y, class_0_pred)

	overall_prediction = model.predict(x)
	overall_accuracy = accuracy_score(y, overall_prediction)

	return class_0_acc, class_1_acc, overall_accuracy

def execute_regular_optimization(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, problem_defn, sampler, random_state=None):
	print(f"Executing {problem_defn} on {name}...")
	
	num_examples, num_features = set_summary(X, 'x', print_res=False)
	train_class_0_count, train_class_1_count, train_baseline_IR = set_summary(y_train, 'y', print_res=False)
	minority_class = 0 if min(train_class_0_count, train_class_1_count) == train_class_0_count else 1

	# Determine baseline accuracy of classifier on all examples
	baseline_knn = KNeighborsClassifier(n_neighbors=N_NEIGHBOURS)
	baseline_knn.fit(X_train, y_train)
	class_0_baseline_testAcc, class_1_baseline_testAcc, baseline_testAcc = class_based_accuracy(baseline_knn, X_test, y_test)
	class_0_baseline_valAcc, class_1_baseline_valAcc, baseline_valAcc = class_based_accuracy(baseline_knn, X_val, y_val)
	
	# Execute the optimization algorithm
	algorithm = NSGA2(
		pop_size=100, 
		sampling=sampler, 
		crossover=HUX(), 
		mutation=BitflipMutation(), 
		eliminate_duplicates=True
	)
	problem = problem_defn(X_train, y_train, X_val, y_val)
	res = minimize(problem, algorithm, ('n_gen', 100), verbose=False, seed=random_state)

	# Calculate the best instance and extract its values
	fronts = NonDominatedSorting().do(res.F, only_non_dominated_front=True)
	unique_F, unique_indices = np.unique(res.F[fronts], axis=0, return_index=True)
	best_instance_idx = np.argmin(unique_F[:,1])
	num_instance_in_best_solution = unique_F[best_instance_idx, 0]
	best_instance = res.X[unique_indices[best_instance_idx]]
	
	# Calculate the optimized accuracy of the best instance
	x_train_filtered, y_train_filtered = X_train[best_instance], y_train[best_instance]
	_, _, optimized_IR = set_summary(y_train_filtered, 'y', print_res=False)

	# Calculate the optimized accuracy of the best instance on test set and validation set
	optimized_knn = KNeighborsClassifier(n_neighbors=N_NEIGHBOURS)
	optimized_knn.fit(x_train_filtered, y_train_filtered)
	class_0_optimized_testAcc, class_1_optimized_testAcc, optimized_testAcc = class_based_accuracy(optimized_knn, X_test, y_test)
	class_0_optimized_valAcc, class_1_optimized_valAcc, optimized_valAcc = class_based_accuracy(optimized_knn, X_val, y_val)
	
	test_pareto_front = []
	best_test_instance = [-1, 1]
	for instance in res.X[unique_indices]:
		x_filtered, y_filtered = X_train[instance], y_train[instance]
		if x_filtered.shape[0] < N_NEIGHBOURS: error = 1
		else:
			knn = KNeighborsClassifier(n_neighbors=N_NEIGHBOURS)
			knn.fit(x_filtered, y_filtered)
			y_pred = knn.predict(X_test)
			error = 1 - accuracy_score(y_test, y_pred)
		
		if best_test_instance[1] > error:
			best_test_instance = [x_filtered.shape[0], error]
		test_pareto_front.append([x_filtered.shape[0], error])
	x2, y2 = [row[0] for row in test_pareto_front], [row[1] for row in test_pareto_front]
	x1, y1 = [row[0] for row in unique_F], [row[1] for row in unique_F]
	plt.ylim((0, 1))
	plt.title(name)
	plt.ylabel("f2 *")
	plt.xlabel("f1")
	plt.scatter(x1, y1, c='b')
	plt.scatter(x2, y2, c='r')
	plt.show()

	return {"Label": name, 
		    "Num Features": num_features, 
		    "Num Examples": num_examples,
		    "Training Size": X_train.shape[0],
		    "Minority Class": minority_class,
		    
			"Base - Test C0 Acc": round(class_0_baseline_testAcc*100,2), 
		    "Base - Test C1 Acc": round(class_1_baseline_testAcc*100,2), 
		    "Base - Test Acc": round(baseline_testAcc*100,2), 

			"Base - Val C0 Acc": round(class_0_baseline_valAcc*100,2), 
		    "Base - Val C1 Acc": round(class_1_baseline_valAcc*100,2), 
		    "Base - Val Acc": round(baseline_valAcc*100,2), 

		    "Optimized - Test C0 Acc": round(class_0_optimized_testAcc*100,2), 
		    "Optimized - Test C1 Acc": round(class_1_optimized_testAcc*100,2), 
		    "Optimized - Test Acc": round(optimized_testAcc*100,2), 
			"Num Examples for Test Best": best_test_instance[0],
			
			"Optimized - Val C0 Acc": round(class_0_optimized_valAcc*100,2), 
		    "Optimized - Val C1 Acc": round(class_1_optimized_valAcc*100,2), 
		    "Optimized - Val Acc": round(optimized_valAcc*100,2), 
			"Num Examples for Val Best": num_instance_in_best_solution,
		    
			"Baseline IR": train_baseline_IR,
		    "Optimized IR": optimized_IR}

def execute_class_sensitive_optimization(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, problem_defn, sampler, random_state=None):
	print(f"Executing {problem_defn} on {name}...")
	
	num_examples, num_features = set_summary(X, 'x', print_res=False)
	train_class_0_count, train_class_1_count, train_baseline_IR = set_summary(y_train, 'y', print_res=False)
	minority_class = 0 if min(train_class_0_count, train_class_1_count) == train_class_0_count else 1

	# Determine baseline accuracy of classifier on all examples
	baseline_knn = KNeighborsClassifier(n_neighbors=N_NEIGHBOURS)
	baseline_knn.fit(X_train, y_train)
	class_0_baseline_testAcc, class_1_baseline_testAcc, baseline_testAcc = class_based_accuracy(baseline_knn, X_test, y_test)
	class_0_baseline_valAcc, class_1_baseline_valAcc, baseline_valAcc = class_based_accuracy(baseline_knn, X_val, y_val)

	# Execute the optimization algorithm
	problem = problem_defn(X_train, y_train, X_val, y_val)
	algorithm = NSGA2(
		pop_size=100, 
		sampling=sampler, 
		crossover=HUX(), 
		mutation=BitflipMutation(), 
		eliminate_duplicates=True)
	
	res = minimize(problem, algorithm, ('n_gen', 100), verbose=False, seed=random_state)

	# Calculate the first rank pareto front
	fronts = NonDominatedSorting().do(res.F, only_non_dominated_front=True)
	unique_F, unique_indices = np.unique(res.F[fronts], axis=0, return_index=True)

	# Extract best instances for minority class
	if minority_class == 0:
		best_instance_idx = np.argmin(unique_F[0,:])
	else:
		best_instance_idx = np.argmin(unique_F[:,0])

	best_instance = res.X[unique_indices[best_instance_idx]]
	x_train_filtered, y_train_filtered = X_train[best_instance], y_train[best_instance]
	num_instance_in_best_solution = y_train_filtered.shape[0]
	_, _, optimized_IR = set_summary(y_train_filtered, 'y', print_res=False)
	
	# Calculate the optimized accuracy of the best instance on test set and validation set
	optimized_knn = KNeighborsClassifier(n_neighbors=N_NEIGHBOURS)
	optimized_knn.fit(x_train_filtered, y_train_filtered)
	class_0_optimized_testAcc, class_1_optimized_testAcc, optimized_testAcc = class_based_accuracy(optimized_knn, X_test, y_test)
	class_0_optimized_valAcc, class_1_optimized_valAcc, optimized_valAcc = class_based_accuracy(optimized_knn, X_val, y_val)

	test_pareto_front = []
	num_examples_in_best_instance_on_test = -1
	best_error_instance_on_test = 1
	for instance in res.X[unique_indices]:
		x_filtered, y_filtered = X_train[instance], y_train[instance]
		if x_filtered.shape[0] < N_NEIGHBOURS: test_pareto_front.append([1, 1])
		else:
			knn = KNeighborsClassifier(n_neighbors=N_NEIGHBOURS)
			knn.fit(x_filtered, y_filtered)
			
			class_1_indices = np.where(y_test==1)
			class_0_indices = np.where(y_test==0)

			class_1_x_val = X_test[class_1_indices]
			class_0_x_val = X_test[class_0_indices]
			class_1_y_val = y_test[class_1_indices]
			class_0_y_val = y_test[class_0_indices]
			
			class_1_pred = knn.predict(class_1_x_val)
			class_1_acc = accuracy_score(class_1_y_val, class_1_pred)

			class_0_pred = knn.predict(class_0_x_val)
			class_0_acc = accuracy_score(class_0_y_val, class_0_pred)

			if minority_class == 0:
				if (1-class_0_acc) < best_error_instance_on_test:
					num_examples_in_best_instance_on_test = x_filtered.shape[0]
					best_error_instance_on_test = (1-class_0_acc)
			else:
				if (1-class_1_acc) < best_error_instance_on_test:
					num_examples_in_best_instance_on_test = x_filtered.shape[0]
					best_error_instance_on_test = (1-class_1_acc)

			test_pareto_front.append([1-class_0_acc, 1-class_1_acc])

	x1, y1 = [row[0] for row in unique_F], [row[1] for row in unique_F]
	x2, y2 = [row[0] for row in test_pareto_front], [row[1] for row in test_pareto_front]
	plt.ylim((0, 1))
	plt.xlim((0, 1))
	plt.title(name)
	plt.ylabel(f"f1 {'*' if minority_class == 0 else ''}")
	plt.xlabel(f"f2 {'*' if minority_class == 1 else ''}")
	plt.scatter(x1, y1, c='b')
	plt.scatter(x2, y2, c='r')
	plt.show()

	return {"Label": name, 
		    "Num Features": num_features, 
		    "Num Examples": num_examples,
		    "Training Size": X_train.shape[0],
		    "Minority Class": minority_class,
		    
			"Base - Test C0 Acc": round(class_0_baseline_testAcc*100,2), 
		    "Base - Test C1 Acc": round(class_1_baseline_testAcc*100,2), 
		    "Base - Test Acc": round(baseline_testAcc*100,2), 

			"Base - Val C0 Acc": round(class_0_baseline_valAcc*100,2), 
		    "Base - Val C1 Acc": round(class_1_baseline_valAcc*100,2), 
		    "Base - Val Acc": round(baseline_valAcc*100,2), 

		    "Optimized - Test C0 Acc": round(class_0_optimized_testAcc*100,2), 
		    "Optimized - Test C1 Acc": round(class_1_optimized_testAcc*100,2), 
		    "Optimized - Test Acc": round(optimized_testAcc*100,2), 
			"Num Examples for Test Best": num_examples_in_best_instance_on_test,
			
			"Optimized - Val C0 Acc": round(class_0_optimized_valAcc*100,2), 
		    "Optimized - Val C1 Acc": round(class_1_optimized_valAcc*100,2), 
		    "Optimized - Val Acc": round(optimized_valAcc*100,2), 
			"Num Examples for Val Best": num_instance_in_best_solution,
		    
			"Baseline IR": train_baseline_IR,
		    "Optimized IR": optimized_IR}

# Defining and packaging datasets

In [3]:
datasets = []
for folder in os.listdir('Datasets'):
	datasets.append(parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=False))

  df = pd.read_csv(path, delimiter=', ')
  df = pd.read_csv(path, delimiter=', ')
  df = pd.read_csv(path, delimiter=', ')
  df = pd.read_csv(path, delimiter=', ')
  df = pd.read_csv(path, delimiter=', ')
  df = pd.read_csv(path, delimiter=', ')


# Random and biased initial population for objectices --> Error and number of examples

In [None]:
runs = {}
for iter in range(10):
	for data_option in ['over_sample', 'regular_sample']:
		for init_pop in ['rand', 'bias']:
			for optimization in ['MinMajAcc', '2Obj', '3Obj']:
				for folder in os.listdir('Datasets'):
					over_sample = False if data_option == "regular_sample" else True
					X, y, X_train, X_val, X_test, y_train, y_val, y_test, name = parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=over_sample)
					
					if optimization == "MinMajAcc":
						if init_pop == "rand":
							result = execute_class_sensitive_optimization(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_2_Obj_MinMaxAcc, BinaryRandomSampling())
						else:
							result = execute_class_sensitive_optimization(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_2_Obj_MinMaxAcc, BiasedBinarySampling(y_train, 0.5, 0.7))

					elif optimization == "2Obj":
						if init_pop == "rand":
							result = execute_regular_optimization(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_2_Obj, BinaryRandomSampling())
						else:
							result = execute_regular_optimization(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_2_Obj, BiasedBinarySampling(y_train, 0.5, 0.7))

					else:
						if init_pop == "rand":
							result = execute_regular_optimization(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_3_Obj, BinaryRandomSampling())
						else:
							result = execute_regular_optimization(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_3_Obj, BiasedBinarySampling(y_train, 0.5, 0.7))

for folder in os.listdir('Datasets'):
	for iter in range(10):
		X, y, X_train, X_val, X_test, y_train, y_val, y_test, name = parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=False)
		name = f"RegSample-MinMajAcc-Rand-{name}"
		iter_results.append(execute_class_sensitive_optimization(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_2_Obj_MinMaxAcc, BinaryRandomSampling()))



for folder in os.listdir('Datasets'):
	for iter in range(10):
		X, y, X_train, X_val, X_test, y_train, y_val, y_test, name = parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=False)
		name = f"RegSample-2Obj-Bias-{name}"
		iter_results.append(execute_Iter1(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_2_Obj, BiasedBinarySampling(y_train, 0.5, 0.7)))

for folder in os.listdir('Datasets'):
	for iter in range(10):
		X, y, X_train, X_val, X_test, y_train, y_val, y_test, name = parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=False)
		name = f"RegSample-2Obj-Rand-{name}"
		iter_results.append(execute_Iter1(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_2_Obj, BinaryRandomSampling()))




for folder in os.listdir('Datasets'):
	for iter in range(10):
		X, y, X_train, X_val, X_test, y_train, y_val, y_test, name = parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=False)
		name = f"RegSample-3Obj-Bias-{name}"
		iter_results.append(execute_Iter1(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_3_Obj, BiasedBinarySampling(y_train, 0.5, 0.7)))

for folder in os.listdir('Datasets'):
	for iter in range(10):
		X, y, X_train, X_val, X_test, y_train, y_val, y_test, name = parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=False)
		name = f"RegSample-3Obj-Rand-{name}"
		iter_results.append(execute_Iter1(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_3_Obj, BinaryRandomSampling()))


#---------------------------------------

# iter_results = []
# for folder in os.listdir('Datasets'):
# 	for iter in range(10):
# 		X, y, X_train, X_val, X_test, y_train, y_val, y_test, name = parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=True)
# 		name = f"OverUnderSample-MinMajAcc-Bias-{name}"
# 		iter_results.append(execute_Iter1(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_2_Obj_MinMaxAcc, BiasedBinarySampling(y_train, 0.5, 0.7)))

# for folder in os.listdir('Datasets'):
# 	for iter in range(10):
# 		X, y, X_train, X_val, X_test, y_train, y_val, y_test, name = parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=True)
# 		name = f"OverUnderSample-MinMajAcc-Rand-{name}"
# 		iter_results.append(execute_Iter1(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_2_Obj_MinMaxAcc, BinaryRandomSampling()))





# for folder in os.listdir('Datasets'):
# 	for iter in range(10):
# 		X, y, X_train, X_val, X_test, y_train, y_val, y_test, name = parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=True)
# 		name = f"OverUnderSample-2Obj-Bias-{name}"
# 		iter_results.append(execute_Iter1(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_2_Obj, BiasedBinarySampling(y_train, 0.5, 0.7)))

# for folder in os.listdir('Datasets'):
# 	for iter in range(10):
# 		X, y, X_train, X_val, X_test, y_train, y_val, y_test, name = parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=True)
# 		name = f"OverUnderSample-2Obj-Rand-{name}"
# 		iter_results.append(execute_Iter1(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_2_Obj, BinaryRandomSampling()))





# for folder in os.listdir('Datasets'):
# 	for iter in range(10):
# 		X, y, X_train, X_val, X_test, y_train, y_val, y_test, name = parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=True)
# 		name = f"OverUnderSample-3Obj-Bias-{name}"
# 		iter_results.append(execute_Iter1(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_3_Obj, BiasedBinarySampling(y_train, 0.5, 0.7)))

# for folder in os.listdir('Datasets'):
# 	for iter in range(10):
# 		X, y, X_train, X_val, X_test, y_train, y_val, y_test, name = parse_dataset(os.path.join('Datasets', folder, f"{folder}.csv"), folder, over_sample=True)
# 		name = f"OverUnderSample-3Obj-Rand-{name}"
# 		iter_results.append(execute_Iter1(X, y, X_train, X_val, X_test, y_train, y_val, y_test, name, InstanceSelectionProblem_3_Obj, BinaryRandomSampling()))

SyntaxError: expected ':' (3638434715.py, line 11)

In [9]:
summations = {}
for result in iter_results:
	
	if result['Label'] not in summations:
		summations[result['Label']] = {}
		for key in result:
			if isinstance(result[key], float) or isinstance(result[key], int):
				summations[result['Label']][key] = 0
	
	for key in result:
		if key in summations[result['Label']]:
			summations[result['Label']][key] += result[key]
	
	

averages = []
for label in summations:
	temp = {}
	for key in summations[label]:
		temp[key] = summations[label][key] / 10
	
	temp["Label"] = label
	averages.append(temp)

pd.DataFrame(averages).to_csv("output.csv")