In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, balanced_accuracy_score

from pymoo.operators.mutation.bitflip import BitflipMutation, Mutation
from pymoo.util.nds.non_dominated_sorting import NonDominatedSorting
from pymoo.operators.sampling.rnd import BinaryRandomSampling, Sampling
from pymoo.operators.crossover.hux import HUX
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.indicators.hv import Hypervolume
from pymoo.core.problem import Problem
from pymoo.optimize import minimize

from torch.utils.data import Dataset, DataLoader, TensorDataset
import torch.nn as nn
import torch

from scipy.stats import ranksums

from joblib import Parallel, delayed
from pathlib import Path
from io import StringIO
import pandas as pd
import numpy as np
import pickle
import os
import re

with open('../data.pickle', 'rb') as fh:
	data_mapper = pickle.load(fh)

In [3]:
class GenericOptimizer(Problem):
	population_size = 100
	n_neighbours = 5
	sequential = False
	def __init__(self, X_train, y_train, X_val, y_val, objectives, exec_mode):
		self.mutation_history = {}
		self.generation_number = 0

		self.exec_mode = exec_mode

		self.X_train = X_train
		self.y_train = y_train

		self.X_val = X_val
		self.y_val = y_val

		self.training_data = X_train
		self.n_instances = X_train.shape[0]
		
		self.objectives = objectives

		super().__init__(
			n_var=self.n_instances,
			n_obj=len(objectives),               
			n_constr=0,            
			xl=0,                  
			xu=1,                  
			type_var=np.bool_,     
		)

	def _evaluate(self, x, out, *args, **kwargs):
		
		if self.exec_mode == "sequential":
			metrics = []
			for objective in self.objectives:
				metrics.append(self.eval_objective((objective, x)))
		else:
			metrics = Parallel(n_jobs=-1)(delayed(self.eval_objective)((objective, x)) for objective in self.objectives)
		
		self.generation_number += 1

		out["F"] = np.column_stack(metrics)

	def eval_objective(self, pack):
		objective, x = pack
			
		if "calculate_num_examples" in repr(objective):
			return GenericOptimizer.calculate_num_examples(x)

		elif "calculate_IR" in repr(objective):
			vals = []
			for instance in x:
				vals.append(GenericOptimizer.calculate_IR(self.y_train[instance]))
			return vals
		
		else:
			vals = []
			for instance in x:
				vals.append(objective(
					self.X_train[instance],
					self.y_train[instance],
					self.X_val,
					self.y_val,
					GenericOptimizer.n_neighbours
				))
			return vals

	@classmethod
	def calculate_IR(cls, y):
		df = pd.DataFrame(y).value_counts()
		return (df[1]/df[0]) if df.min() == 0 else (df[0]/df[1])
	
	@classmethod
	def filter_by_class(cls, x, y, label):
		indices = np.where(y==label)
		return x[indices], y[indices]
	
	@classmethod
	def calculate_overall_error(cls, x_train, y_train, x_val, y_val, n):
				
		num_included_instances = x_train.shape[0]

		if num_included_instances >= n:
			optimization_knn = KNeighborsClassifier(n_neighbors=n)
			optimization_knn.fit(x_train, y_train)

			y_pred = optimization_knn.predict(x_val)
			balanced_acc = balanced_accuracy_score(y_val, y_pred)
			return 1-balanced_acc
		else:
			return 1

	@classmethod
	def calculate_overall_classBalanced_error(cls, x_train, y_train, x_val, y_val, n):
				
		num_included_instances = x_train.shape[0]

		if num_included_instances >= n:
			optimization_knn = KNeighborsClassifier(n_neighbors=n)
			optimization_knn.fit(x_train, y_train)

			y_pred = optimization_knn.predict(x_val)
			acc = accuracy_score(y_val, y_pred)
			return 1-acc
		else:
			return 1
	
	@classmethod
	def calculate_mean_class_error(cls, x_train, y_train, x_val, y_val, n):
		errors = []
		for label in np.unique(y_val):
			label_x_val, label_y_val = cls.filter_by_class(x_val, y_val, label)
			errors.append(cls.calculate_overall_error(
				x_train,
				y_train,
				label_x_val,
				label_y_val,
				n
			))
		return np.mean(errors)
			
	@classmethod
	def calculate_class0_error(cls, x_train, y_train, x_val, y_val, n):
		class0_x_val, class0_y_val = cls.filter_by_class(x_val, y_val, 0)
		err = cls.calculate_overall_error(
			x_train,
			y_train,
			class0_x_val,
			class0_y_val,
			n
		)
		return err

	@classmethod
	def calculate_class1_error(cls, x_train, y_train, x_val, y_val, n):
		class1_x_val, class1_y_val = cls.filter_by_class(x_val, y_val, 1)
		err = cls.calculate_overall_error(
			x_train,
			y_train,
			class1_x_val,
			class1_y_val,
			n
		)
		return err

	@classmethod
	def calculate_overall_inverse_f1(cls, x_train, y_train, x_val, y_val, n):
				
		num_included_instances = x_train.shape[0]
		counts = pd.DataFrame(y_train).value_counts()
		if num_included_instances >= n:
			optimization_knn = KNeighborsClassifier(n_neighbors=n)
			optimization_knn.fit(x_train, y_train)

			y_pred = optimization_knn.predict(x_val)
			f1 = f1_score(y_val, y_pred, average='binary')
			return 1-f1
		else:
			return 1

	@classmethod
	def calculate_class0_inverse_f1(cls, x_train, y_train, x_val, y_val, n):
		class0_x_val, class0_y_val = cls.filter_by_class(x_val, y_val, 0)
		inv_f1 = cls.calculate_overall_inverse_f1(
			x_train,
			y_train,
			class0_x_val,
			class0_y_val,
			n
		)
		return inv_f1

	@classmethod
	def calculate_class1_inverse_f1(cls, x_train, y_train, x_val, y_val, n):
		class1_x_val, class1_y_val = cls.filter_by_class(x_val, y_val, 1)
		inv_f1 = cls.calculate_overall_inverse_f1(
			x_train,
			y_train,
			class1_x_val,
			class1_y_val,
			n
		)
		return inv_f1
	
	@classmethod
	def calculate_num_examples(cls, instances):
		return np.sum(instances, axis=1)

class BiasedBinarySampling(Sampling):
	def __init__(self, labels, major_prob, minor_prob):
		
		self.labels = labels
		counts = pd.DataFrame(labels).value_counts()
		if counts[0] > counts[1]:
			self.c0_thresh = major_prob
			self.c1_thresh = minor_prob
		else:
			self.c0_thresh = minor_prob
			self.c1_thresh = major_prob

		super().__init__()

	def _do(self, problem, n_samples, **kwargs):

		rands = np.random.random((n_samples, problem.n_var))
		init_pops = np.zeros((n_samples, problem.n_var), dtype=bool)
		for idx, label in enumerate(self.labels):
			if label == 0:
				init_pops[:, idx] = (rands[:, idx] < self.c0_thresh).astype(bool)
			if label == 1:
				init_pops[:, idx] = (rands[:, idx] < self.c1_thresh).astype(bool)


		return init_pops
	
class DiverseCustomSampling(Sampling):
	def __init__(self):
		super().__init__()

	def _do(self, problem, n_samples, **kwargs):

		target_inclusions = np.random.randint(
			problem.n_var // 3,
			problem.n_var,
			n_samples
		)
		init_pops = []
		for target in target_inclusions:
			array = np.array([1]*target + [0]*(problem.n_var - target))
			np.random.shuffle(array)
			init_pops.append(array)
		init_pops = np.array(init_pops, dtype=np.bool)
		return init_pops

In [None]:
def execute(package):
	data_key, run_type = package
	save_name = f"results/{data_key} {run_type}.pickle"

	if os.path.exists(save_name):
		return None, data_key
	
	try:
		if 'randomPopulation' in save_name:
			initial_population = BinaryRandomSampling()
		
		elif 'diversePopulation' in save_name:
			initial_population = DiverseCustomSampling()
		
		elif 'biasePopulation' in save_name:
			initial_population = BiasedBinarySampling(data_mapper[data_key]['y_train'], 0.4, 0.7)

		problem = GenericOptimizer(
			data_mapper[data_key]['x_train'], 
			data_mapper[data_key]['y_train'], 
			data_mapper[data_key]['x_validation'], 
			data_mapper[data_key]['y_validation'], 
			[GenericOptimizer.calculate_class0_error, GenericOptimizer.calculate_class1_error],
			"sequential"
		)
		algorithm = NSGA2(
			pop_size=GenericOptimizer.population_size, 
			sampling=initial_population,
			crossover=HUX(), 
			mutation=BitflipMutation(), 
			eliminate_duplicates=True,
		)
		result = minimize(
			problem, 
			algorithm, 
			('n_gen', GenericOptimizer.population_size),
			save_history=False
		)
		return result, data_key
	except:
		return None, data_key
	
run_type = "randomPopulation"
for result, data_key in Parallel(n_jobs=-1, return_as='generator')(delayed(execute)((data_key, run_type)) for data_key in data_mapper):
	if result is not None:
		with open(os.path.join("results", f'{data_key} {run_type}.pickle'), 'wb') as fh:
			pickle.dump(result, fh)
			print(f'Saved {data_key} {run_type}.pickle')
	else:
		print(f'Passed {data_key} {run_type}.pickle')

run_type = "diversePopulation"
for result, data_key in Parallel(n_jobs=-1, return_as='generator')(delayed(execute)((data_key, run_type)) for data_key in data_mapper):
	if result is not None:
		with open(os.path.join("results", f'{data_key} {run_type}.pickle'), 'wb') as fh:
			pickle.dump(result, fh)
			print(f'Saved {data_key} {run_type}.pickle')
	else:
		print(f'Passed {data_key} {run_type}.pickle')

run_type = "biasePopulation"
for result, data_key in Parallel(n_jobs=-1, return_as='generator')(delayed(execute)((data_key, run_type)) for data_key in data_mapper):
	if result is not None:
		with open(os.path.join("results", f'{data_key} {run_type}.pickle'), 'wb') as fh:
			pickle.dump(result, fh)
			print(f'Saved {data_key} {run_type}.pickle')
	else:
		print(f'Passed {data_key} {run_type}.pickle')

In [4]:
random_population = {}
bias_population = {}
diverse_population = {}

for save_name in os.listdir('../results'):
	name = save_name.replace(".pickle", "")
	segments = name.split(" ")
	database_iter = segments[0]
	database_name = '_'.join(database_iter.split("_")[1:])
	split_number = int(database_iter.split("_")[0])
	population_type = segments[1]
	
	with open(f'../results/{save_name}', 'rb') as fh:
		execution_data = pickle.load(fh)

	if population_type == 'randomPopulation':
		if database_name not in random_population:
			random_population[database_name] = {}
		random_population[database_name][split_number] = execution_data

	elif population_type == 'diversePopulation':
		if database_name not in diverse_population:
			diverse_population[database_name] = {}
		diverse_population[database_name][split_number] = execution_data
		
	else: # Bias population
		if database_name not in bias_population:
			bias_population[database_name] = {}
		bias_population[database_name][split_number] = execution_data

In [None]:
def parallel_error(instance, x_train, y_train, x_compare, y_compare):

	x_filtered, y_filtered = x_train[instance], y_train[instance]
	if x_filtered.shape[0] < GenericOptimizer.n_neighbours: 
		error = 1
	else:
		knn = KNeighborsClassifier(n_neighbors=GenericOptimizer.n_neighbours)
		knn.fit(x_filtered, y_filtered)
		y_pred = knn.predict(x_compare)
		error = 1 - accuracy_score(y_compare, y_pred)
	return error

def calculate_metrics(x_train, y_train, x_validation, y_validation, x_test, y_test, result):
	baseline_validation_err = GenericOptimizer.calculate_overall_error(
		x_train, y_train,
		x_validation, y_validation,
		GenericOptimizer.n_neighbours
	)
	baseline_test_err = GenericOptimizer.calculate_overall_error(
		x_train, y_train,
		x_test, y_test,
		GenericOptimizer.n_neighbours
	)

	result_pareto_front = result.X
	if len(result_pareto_front.shape) == 1:
		result_pareto_front = np.array([result_pareto_front])

	validation_F = Parallel(n_jobs=-1)(delayed(parallel_error)(instance, x_train, y_train, x_validation, y_validation) for instance in result_pareto_front)
	ideal_validation = result_pareto_front[np.argmin(validation_F)]
	validation_inclusions = np.sum(ideal_validation)
	optimized_validation_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_validation],
		y_train[ideal_validation],
		x_validation,
		y_validation,
		GenericOptimizer.n_neighbours
	)
	optimized_test_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_validation],
		y_train[ideal_validation],
		x_test,
		y_test,
		GenericOptimizer.n_neighbours
	)

	test_F = Parallel(n_jobs=-1)(delayed(parallel_error)(instance, x_train, y_train, x_test, y_test) for instance in result_pareto_front)
	ideal_test = result_pareto_front[np.argmin(test_F)]
	test_inclusions = np.sum(ideal_test)
	ideal_optimized_test_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_test],
		y_train[ideal_test],
		x_test,
		y_test,
		GenericOptimizer.n_neighbours
	)
	return validation_inclusions, test_inclusions, baseline_validation_err, baseline_test_err, optimized_validation_err, optimized_test_err, ideal_optimized_test_err

In [4]:
baselines = {}
for data_key in data_mapper:
	
	x_train, y_train = data_mapper[data_key]['x_train'], data_mapper[data_key]['y_train']
	x_validation, y_validation = data_mapper[data_key]['x_validation'], data_mapper[data_key]['y_validation']
	x_test, y_test = data_mapper[data_key]['x_test'], data_mapper[data_key]['y_test']

	try:
		baseline_validation_err = GenericOptimizer.calculate_overall_error(
			x_train, y_train,
			x_validation, y_validation,
			GenericOptimizer.n_neighbours
		)
		baseline_test_err = GenericOptimizer.calculate_overall_error(
			x_train, y_train,
			x_test, y_test,
			GenericOptimizer.n_neighbours
		)
		
		baselines[data_key] = (baseline_validation_err, baseline_test_err)
	except:
		print("Error, ", data_key)

Error,  0_zoo-3
Error,  1_zoo-3
Error,  2_zoo-3
Error,  3_zoo-3
Error,  4_zoo-3
Error,  5_zoo-3
Error,  6_zoo-3
Error,  7_zoo-3
Error,  8_zoo-3
Error,  9_zoo-3
Error,  10_zoo-3
Error,  11_zoo-3
Error,  12_zoo-3
Error,  13_zoo-3
Error,  14_zoo-3
Error,  15_zoo-3
Error,  16_zoo-3
Error,  17_zoo-3
Error,  18_zoo-3
Error,  19_zoo-3
Error,  20_zoo-3
Error,  21_zoo-3
Error,  22_zoo-3
Error,  23_zoo-3
Error,  24_zoo-3
Error,  25_zoo-3
Error,  26_zoo-3
Error,  27_zoo-3
Error,  28_zoo-3
Error,  29_zoo-3
Error,  30_zoo-3


In [None]:
def calculate(input_data):
	instance_dict = {}
	for dataset in input_data:
		for iter_num in input_data[dataset]:
			result = input_data[dataset][iter_num]

			data_key = f"{iter_num}_{dataset}"
			x_train, y_train = data_mapper[data_key]['x_train'], data_mapper[data_key]['y_train']
			x_validation, y_validation = data_mapper[data_key]['x_validation'], data_mapper[data_key]['y_validation']
			x_test, y_test = data_mapper[data_key]['x_test'], data_mapper[data_key]['y_test']

			result_pareto_front = result.X
			if len(result_pareto_front.shape) == 1:
				result_pareto_front = np.array([result_pareto_front])

			validation_F = Parallel(n_jobs=-1)(delayed(parallel_error)(instance, x_train, y_train, x_validation, y_validation) for instance in result_pareto_front)
			ideal_validation = result_pareto_front[np.argmin(validation_F)]
			
			test_F = Parallel(n_jobs=-1)(delayed(parallel_error)(instance, x_train, y_train, x_test, y_test) for instance in result_pareto_front)
			ideal_test = result_pareto_front[np.argmin(test_F)]

			instance_dict[data_key] = (ideal_validation, ideal_test)
	return instance_dict

randPop_best_instances = calculate(random_population)
diversePop_metrics_best_instances = calculate(diverse_population)
biasPop_metrics_best_instances = calculate(bias_population)

with open("ideal_instances.pickle", 'wb') as fh:
    pickle.dump({
        "rand": randPop_best_instances,
        "diverse": diversePop_metrics_best_instances,
        "bias": biasPop_metrics_best_instances
    }, fh)

In [6]:
with open("ideal_instances.pickle", "rb") as fh:
	instances_dict = pickle.load(fh)

In [None]:
average_metrics = {}
for data_key in data_mapper:
	x_train, y_train = data_mapper[data_key]['x_train'], data_mapper[data_key]['y_train']
	x_validation, y_validation = data_mapper[data_key]['x_validation'], data_mapper[data_key]['y_validation']
	x_test, y_test = data_mapper[data_key]['x_test'], data_mapper[data_key]['y_test']

	database_name = '_'.join(data_key.split("_")[1:])
	split_number = int(data_key.split("_")[0])

	if data_key not in instances_dict['rand']:
		continue

	###########################################
	#		Baseline metrics
	###########################################

	baseline_validation_err = GenericOptimizer.calculate_overall_error(
		x_train, y_train,
		x_validation, y_validation,
		GenericOptimizer.n_neighbours
	)
	baseline_test_err = GenericOptimizer.calculate_overall_error(
		x_train, y_train,
		x_test, y_test,
		GenericOptimizer.n_neighbours
	)

	train_size = x_train.shape[0]

	###########################################
	#		Random Population
	###########################################
	
	ideal_validation, ideal_test = instances_dict['rand'][data_key]

	random_population_optimized_valiation_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_validation], y_train[ideal_validation],
		x_validation, y_validation,
		GenericOptimizer.n_neighbours
	)
	random_population_optimized_test_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_validation], y_train[ideal_validation],
		x_test, y_test,
		GenericOptimizer.n_neighbours
	)
	random_population_ideal_test_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_test], y_train[ideal_test],
		x_test, y_test,
		GenericOptimizer.n_neighbours
	)
	
	rand_validation_instance_size = np.sum(ideal_validation)
	rand_test_instance_size = np.sum(ideal_test)

	###########################################
	#		Diverse Population
	###########################################
	
	ideal_validation, ideal_test = instances_dict['diverse'][data_key]

	diverse_population_optimized_valiation_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_validation], y_train[ideal_validation],
		x_validation, y_validation,
		GenericOptimizer.n_neighbours
	)
	diverse_population_optimized_test_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_validation], y_train[ideal_validation],
		x_test, y_test,
		GenericOptimizer.n_neighbours
	)
	diverse_population_ideal_test_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_test], y_train[ideal_test],
		x_test, y_test,
		GenericOptimizer.n_neighbours
	)

	diverse_validation_instance_size = np.sum(ideal_validation)
	diverse_test_instance_size = np.sum(ideal_test)

	###########################################
	#		Biased Population
	###########################################
	
	ideal_validation, ideal_test = instances_dict['bias'][data_key]

	bias_population_optimized_valiation_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_validation], y_train[ideal_validation],
		x_validation, y_validation,
		GenericOptimizer.n_neighbours
	)
	bias_population_optimized_test_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_validation], y_train[ideal_validation],
		x_test, y_test,
		GenericOptimizer.n_neighbours
	)
	bias_population_ideal_test_err = GenericOptimizer.calculate_overall_error(
		x_train[ideal_test], y_train[ideal_test],
		x_test, y_test,
		GenericOptimizer.n_neighbours
	)

	biased_validation_instance_size = np.sum(ideal_validation)
	biased_test_instance_size = np.sum(ideal_test)

	###########################################
	#		Record creation
	###########################################
	
	if database_name not in average_metrics:
		average_metrics[database_name] = {
			"Baseline validation acc": [],
			"Baseline test acc": [],
			"Train size": [train_size],

			"randPop optimized validation acc": [],
			"randPop optimized test acc": [],
			"randPop optimized ideal test acc": [],
			"randPop diff": [],
			"randPop validation size": [],
			"randPop test size": [],

			"diversePop optimized validation acc": [],
			"diversePop optimized test acc": [],
			"diversePop optimized ideal test acc": [],
			"diversePop diff": [],
			"diversePop validation size": [],
			"diversePop test size": [],

			"biasedPop optimized validation acc": [],
			"biasedPop optimized test acc": [],
			"biasedPop optimized ideal test acc": [],
			"biasedPop diff": [],
			"biasedPop validation size": [],
			"biasedPop test size": [],
		}

	average_metrics[database_name]['Baseline validation acc'].append(1 - baseline_validation_err)
	average_metrics[database_name]['Baseline test acc'].append(1 - baseline_test_err)

	average_metrics[database_name]['randPop optimized validation acc'].append(1 - random_population_optimized_valiation_err)
	average_metrics[database_name]['randPop optimized test acc'].append(1 - random_population_optimized_test_err)
	average_metrics[database_name]['randPop optimized ideal test acc'].append(1 - random_population_ideal_test_err)
	average_metrics[database_name]['randPop diff'].append(baseline_test_err - random_population_ideal_test_err)
	average_metrics[database_name]['randPop validation size'].append(rand_validation_instance_size)
	average_metrics[database_name]['randPop test size'].append(rand_test_instance_size)

	average_metrics[database_name]['diversePop optimized validation acc'].append(1 - diverse_population_optimized_valiation_err)
	average_metrics[database_name]['diversePop optimized test acc'].append(1 - diverse_population_optimized_test_err)
	average_metrics[database_name]['diversePop optimized ideal test acc'].append(1 - diverse_population_ideal_test_err)
	average_metrics[database_name]['diversePop diff'].append(baseline_test_err - diverse_population_ideal_test_err)
	average_metrics[database_name]['diversePop validation size'].append(diverse_validation_instance_size)
	average_metrics[database_name]['diversePop test size'].append(diverse_test_instance_size)

	average_metrics[database_name]['biasedPop optimized validation acc'].append(1 - bias_population_optimized_valiation_err)
	average_metrics[database_name]['biasedPop optimized test acc'].append(1 - bias_population_optimized_test_err)
	average_metrics[database_name]['biasedPop optimized ideal test acc'].append(1 - bias_population_ideal_test_err)
	average_metrics[database_name]['biasedPop diff'].append(baseline_test_err - bias_population_ideal_test_err)
	average_metrics[database_name]['biasedPop validation size'].append(biased_validation_instance_size)
	average_metrics[database_name]['biasedPop test size'].append(biased_test_instance_size)

In [None]:
diversity_rows = []
index = []
for database_name in average_metrics:
    index.append(database_name)
    diversity_rows.append({
        "train size": round(np.mean(average_metrics[database_name]['Train size']), 0),      
        "randPop validation size": round(np.mean(average_metrics[database_name]['randPop validation size']), 2),      
        "randPop test size": round(np.mean(average_metrics[database_name]['randPop test size']), 2),      

        "diversePop validation size": round(np.mean(average_metrics[database_name]['diversePop validation size']), 2),      
        "diversePop test size": round(np.mean(average_metrics[database_name]['diversePop test size']), 2),      

        "biasedPop validation size": round(np.mean(average_metrics[database_name]['biasedPop validation size']), 2),      
        "biasedPop test size": round(np.mean(average_metrics[database_name]['biasedPop test size']), 2),      
	})
pd.DataFrame.from_records(diversity_rows, index=index)

Unnamed: 0,train size,randPop validation size,randPop test size,diversePop validation size,diversePop test size,biasedPop validation size,biasedPop test size
abalone-20_vs_8-9-10,958.0,472.23,474.19,602.06,601.87,392.1,394.26
abalone19,2087.0,1040.97,1040.97,1379.48,1379.48,854.87,854.06
abalone9-18,365.0,180.03,178.84,218.26,217.52,152.16,154.81
cleveland-0_vs_4,86.0,42.35,40.97,47.26,46.19,38.81,37.68
ecoli-0-1-4-7_vs_2-3-5-6,168.0,82.58,84.39,107.42,106.74,75.9,76.77
ecoli-0-2-6-7_vs_3-5,112.0,56.61,56.58,62.9,62.68,50.32,53.06
ecoli-0-4-6_vs_5,101.0,49.77,49.94,56.16,53.84,48.06,48.0
ecoli-0-6-7_vs_3-5,111.0,56.42,56.65,63.26,63.13,51.97,52.94
ecoli1,168.0,81.1,83.77,95.32,94.0,79.42,79.19
ecoli4,168.0,84.29,84.68,99.97,101.0,75.68,75.13


In [13]:
rand_rows = []
diverse_rows = []
biased_rows = []
index = []
for database_name in average_metrics:
    index.append(database_name)
    rand_rows.append({
        # "Baseline validation acc": np.mean(average_metrics[database_name]["Baseline validation acc"]),      
        # "Baseline test acc": np.mean(average_metrics[database_name]["Baseline test acc"]),      
        "randPop ideal improvement": np.mean(average_metrics[database_name]["randPop diff"]),      
        "randPop pval": round(ranksums(average_metrics[database_name]["randPop optimized ideal test acc"], average_metrics[database_name]["Baseline test acc"]).pvalue, 5),
	})
    diverse_rows.append({
        # "Baseline validation acc": np.mean(average_metrics[database_name]["Baseline validation acc"]),      
        # "Baseline test acc": np.mean(average_metrics[database_name]["Baseline test acc"]), 
        "diversePop ideal improvement": np.mean(average_metrics[database_name]["diversePop diff"]),
        "diversePop pval": round(ranksums(average_metrics[database_name]["diversePop optimized ideal test acc"], average_metrics[database_name]["Baseline test acc"]).pvalue, 5),
	})
    biased_rows.append({
		# "Baseline validation acc": np.mean(average_metrics[database_name]["Baseline validation acc"]),      
        # "Baseline test acc": np.mean(average_metrics[database_name]["Baseline test acc"]), 
        "biasedPop ideal improvement": np.mean(average_metrics[database_name]["biasedPop diff"]),
        "biasedPop pval": round(ranksums(average_metrics[database_name]["biasedPop optimized ideal test acc"], average_metrics[database_name]["Baseline test acc"]).pvalue, 5)
	})
    rand_rows[-1]["randPop sig?"] = "YES" if rand_rows[-1]["randPop pval"] < 0.05 else "NO"
    diverse_rows[-1]["diversePop sig?"] = "YES" if diverse_rows[-1]["diversePop pval"] < 0.05 else "NO"
    biased_rows[-1]["biasedPop sig?"] = "YES" if biased_rows[-1]["biasedPop pval"] < 0.05 else "NO"

In [14]:
pd.DataFrame.from_records(rand_rows, index=index)

Unnamed: 0,randPop ideal improvement,randPop pval,randPop sig?
abalone-20_vs_8-9-10,0.053064,0.00161,YES
abalone19,4.7e-05,0.51269,NO
abalone9-18,0.06501,5e-05,YES
cleveland-0_vs_4,0.16129,0.00027,YES
ecoli-0-1-4-7_vs_2-3-5-6,0.031923,0.22064,NO
ecoli-0-2-6-7_vs_3-5,0.075857,0.00034,YES
ecoli-0-4-6_vs_5,0.024614,0.13561,NO
ecoli-0-6-7_vs_3-5,0.082258,0.00436,YES
ecoli1,0.032906,0.00658,YES
ecoli4,0.035688,0.03411,YES


In [16]:
pd.DataFrame.from_records(diverse_rows, index=index)

Unnamed: 0,diversePop ideal improvement,diversePop pval,diversePop sig?
abalone-20_vs_8-9-10,0.049539,0.00265,YES
abalone19,4.7e-05,0.51269,NO
abalone9-18,0.06937,4e-05,YES
cleveland-0_vs_4,0.155914,0.00035,YES
ecoli-0-1-4-7_vs_2-3-5-6,0.023361,0.41418,NO
ecoli-0-2-6-7_vs_3-5,0.076502,0.00023,YES
ecoli-0-4-6_vs_5,0.02209,0.12663,NO
ecoli-0-6-7_vs_3-5,0.072903,0.00631,YES
ecoli1,0.023896,0.05036,NO
ecoli4,0.026827,0.06412,NO


In [18]:
pd.DataFrame.from_records(diverse_rows, index=index)['diversePop sig?'].value_counts()

diversePop sig?
YES    12
NO      7
Name: count, dtype: int64

In [15]:
pd.DataFrame.from_records(biased_rows, index=index)

Unnamed: 0,biasedPop ideal improvement,biasedPop pval,biasedPop sig?
abalone-20_vs_8-9-10,0.06787,0.00015,YES
abalone19,0.002063,0.39435,NO
abalone9-18,0.100863,0.0,YES
cleveland-0_vs_4,0.177016,0.0001,YES
ecoli-0-1-4-7_vs_2-3-5-6,0.033468,0.24545,NO
ecoli-0-2-6-7_vs_3-5,0.081246,0.00019,YES
ecoli-0-4-6_vs_5,0.02784,0.08333,NO
ecoli-0-6-7_vs_3-5,0.094516,0.002,YES
ecoli1,0.031238,0.0021,YES
ecoli4,0.029849,0.04335,YES


In [17]:
win_tie_loss = {
    "Biased wins over random": 0,
	"Biased loses to random": 0,
	"Biased and random tie": 0,

    "Diverse wins over random": 0,
	"Diverse loses to random": 0,
	"Diverse and random tie": 0,

    "Diverse wins over biased": 0,
	"Diverse loses to biased": 0,
	"Diverse and biased tie": 0
}

for database_name in average_metrics:
    
	randpop = average_metrics[database_name]["randPop optimized ideal test acc"]
	diversepop = average_metrics[database_name]["diversePop optimized ideal test acc"]
	biasedpop = average_metrics[database_name]["biasedPop optimized ideal test acc"]

	biased_vs_rand =  ranksums(biasedpop, randpop).pvalue  
	diverse_vs_rand = ranksums(diversepop, randpop).pvalue
	diverse_vs_biased = ranksums(diversepop, biasedpop).pvalue

	if diverse_vs_rand < 0.05:
		if np.mean(randpop) < np.mean(diversepop):
			win_tie_loss["Diverse wins over random"] += 1
		else:
			win_tie_loss["Diverse loses to random"] += 1
	else:
		win_tie_loss["Diverse and random tie"] += 1



	if biased_vs_rand < 0.05:
		if np.mean(randpop) < np.mean(diversepop):
			win_tie_loss["Biased wins over random"] += 1
		else:
			win_tie_loss["Biased loses to random"] += 1
	else:
		win_tie_loss["Biased and random tie"] += 1



	if diverse_vs_biased < 0.05:
		if np.mean(randpop) < np.mean(diversepop):
			win_tie_loss["Diverse wins over biased"] += 1
		else:
			win_tie_loss["Diverse loses to biased"] += 1
	else:
		win_tie_loss["Diverse and biased tie"] += 1

win_tie_loss

{'Biased wins over random': 1,
 'Biased loses to random': 0,
 'Biased and random tie': 18,
 'Diverse wins over random': 0,
 'Diverse loses to random': 0,
 'Diverse and random tie': 19,
 'Diverse wins over biased': 1,
 'Diverse loses to biased': 1,
 'Diverse and biased tie': 17}