In [None]:
from pymoo.operators.sampling.rnd import BinaryRandomSampling
from pymoo.operators.mutation.bitflip import BitflipMutation
from pymoo.operators.crossover.hux import HUX
from pymoo.algorithms.moo.nsga2 import NSGA2
from pymoo.optimize import minimize

from generic_optimizer import GenericOptimizer, prepare_splits
from novel_sampling import BiasedBinarySampling, over_sample

from joblib import Parallel, delayed
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pickle
import os
import re

from sklearn.preprocessing import LabelEncoder

In [3]:
configs = []
from generic_optimizer import *

evaluation_schemes = [
	[GenericOptimizer.calculate_overall_error, GenericOptimizer.calculate_num_examples],
	[GenericOptimizer.calculate_overall_error, GenericOptimizer.calculate_overall_inverse_f1, GenericOptimizer.calculate_num_examples],
	[GenericOptimizer.calculate_class0_error, GenericOptimizer.calculate_class1_error],
]

pattern = r'\.([a-zA-Z_][a-zA-Z0-9_]*)\s'

sampling_methods = [
	"over_sample",
	"regular_sample"
]

population_initialization_names = [
    "rand_population",
    "bias_population"
]

for sampling_name in sampling_methods:
	for initial_population in population_initialization_names:
		for objectives in evaluation_schemes:
			objectives_names = [re.search(pattern, str(objective_name)).group(1) for objective_name in objectives]
			for folder in os.listdir('../Datasets'):
				x, y, splits = prepare_splits(os.path.join('../Datasets', folder, f"{folder}.csv"))

				num_iters = 0
				for train_idx, test_idx, validation_idx in splits: # 31 splits for Wilcoxon rank-sum
					
					objectives_id = "_".join(objectives_names)
					run_name = f"{num_iters}_{sampling_name}_{initial_population}_{folder}_{objectives_id}"
					
					config = {
						"Dataset": folder,
						"Over Sample": sampling_name,
						"Initial Population": initial_population,
						"Objective Names": objectives_names,
						"Iter Number": num_iters,
						"Run Name": run_name,
						"Executable Objectives": objectives,
						"x": x, "y": y,
						"train_idx": train_idx,
						"validation_idx": validation_idx,
						"test_idx": test_idx,
					}
					num_iters += 1
					configs.append(config)

print(f"Created {len(configs)} executions")

Created 3720 executions


In [None]:
def execute(execution_config):
	
	train_idx = execution_config["train_idx"]
	validation_idx = execution_config["validation_idx"]
	x_train, y_train = execution_config["x"][train_idx], execution_config["y"][train_idx]
	x_validation, y_validation = execution_config["x"][validation_idx], execution_config["y"][validation_idx]
	
	if execution_config["Over Sample"]:
		x_train, y_train = over_sample(x_train, y_train)

	objectives = execution_config["Executable Objectives"]

	problem = GenericOptimizer(
		x_train, 
		y_train, 
		x_validation, 
		y_validation,
		objectives,
		"Sequential"
	)

	initial_population = BinaryRandomSampling() if execution_config["Initial Population"] == "rand_population" else BiasedBinarySampling(x_train, 0.5, 0.7)

	algorithm = NSGA2(
		pop_size=GenericOptimizer.population_size, 
		sampling=initial_population, 
		crossover=HUX(), 
		mutation=BitflipMutation(), 
		eliminate_duplicates=True)
	
	result = minimize(
		problem, 
		algorithm, 
		('n_gen', GenericOptimizer.population_size)
	)

	execution_config['Result'] = result

	with open(f"optimizations/{execution_config['Run Name']}.pickle", 'wb') as fh:
		pickle.dump(execution_config, fh, pickle.HIGHEST_PROTOCOL)

# for config in configs[:100]:
# 	execute(config)
	
returns = Parallel(n_jobs=-1)(delayed(execute)((config)) for config in configs[:100])
print(f"Sucessfully completed {len(returns)} optimization runs")


KeyboardInterrupt: 