In [1]:
import operator as op
import src.ea.ea_lib as ea
import src.ea.problem_data as pb
import src.ea.utilities as ut
import random as rd
import time
import numpy as np
import matplotlib.pyplot as plt
import os
from datetime import datetime
from datetime import date

In [2]:
#EXPERIMENT 1: 

## CGP params
levels_back = 200
n_rows = 1
n_columns = 200
n_outputs = 1
allow_input_to_output = True
inputs_available_to_all_columns = True
functions = [op.add,op.sub,op.mul,ut.safe_divide_one]
functions_as_string = "[op.add,op.sub,op.mul,ut.safe_divide_one]"

## MOEA params
trials = 30
seed = 0
node_max_evals = 50000000
population_size = 200
tournament_size = 5
mutation_percentage = 9
#problem_name = "spect"
problem_name = "yst_m3"
objectives = [
    ea.Objective(name="acc0", to_max = True, best=1, worst=0),
    ea.Objective(name="acc1", to_max = True, best=1, worst=0)
    ]
generation_objective = ea.Objective(name="generation", to_max = True)
nsgaii_objectives = ea.get_nsgaii_objectives()
front_objective = nsgaii_objectives[0]
cd_objective = nsgaii_objectives[1]

## Experiment parameters
rd.seed(seed)
now = datetime.now()
time_string = now.strftime('%Y_%m_%d-%H_%M_%S')
output_path = "outputs/NSGAII_cgp-" + problem_name + "-" + time_string + "/"

#Instantiation
dataset = pb.Dataset()
dataset.load_problem(name = problem_name)
data_rows = dataset.x_train.shape[0]
cgp = ea.CGP_Representation(
            dataset.x_train.shape[1]
            ,n_outputs
            ,levels_back
            ,n_rows 
            ,n_columns
            ,allow_input_to_output
            ,inputs_available_to_all_columns
            ,*functions)
labels = list(set(dataset.y_train))

#Save parameters for reference
param_logs = [["rd.seed" , seed]
            ,["node_max_evals", node_max_evals]
            ,["population_size ", population_size]
            ,["tournament_size ", tournament_size]
            ,["mutation_percentage ", mutation_percentage]
            ,["levels_back ", levels_back]
            ,["n_rows ", n_rows]
            ,["n_columns ", n_columns]
            ,["n_outputs ", n_outputs]
            ,["allow_input_to_output ", allow_input_to_output]
            ,["inputs_available_to_all_columns ", inputs_available_to_all_columns]
            ,["functions ", functions_as_string]]
ut.logs_to_file(param_logs, "param_logs", output_path)

def evaluate_ind(ind):
    """
    Evaluate the individual with acc0 and acc1
    """
    outputs = {}
    for i,data_row in enumerate(dataset.x_train):
        output_dict = ind.representation.evaluate(data_row = data_row)
        #Extracting the 0th index output gene
        output = output_dict[0]
        #The raw output needs to be transformed
        #transformed_output = ut.custom_round(output)
        transformed_output = ut.threshold_map(value = output,threshold = 0.5, output_up = labels[1], output_down = labels[0])
        outputs[i] = transformed_output
    #Each objective has its own evaluation method
    acc0 = ut.accuracy_in_label(y = dataset.y_train, y_output = outputs, label = 0)
    acc1 = ut.accuracy_in_label(y = dataset.y_train, y_output = outputs, label = 1)
    ind.update_evaluation(objective = objectives[0], value = acc0)
    ind.update_evaluation(objective = objectives[1], value = acc1)
    ind.update_semantics_all(semantics_all = outputs)

    #evaluated_nodes = len(ind.representation.active_genotype)
    #return evaluated_nodes



#Generation NSGA-II
def run_NSGA_II_gen(population, current_gen):
    start_t = time.time()

    #Sort the population. The generation of creation is the latest tiebreak (suggested in CGP), so fast_nondominated_sort cannot be used as it is
    sorted_population = ea.fast_nondominated_sort(population = population, conflicting_objectives = objectives, nsgaii_objectives = nsgaii_objectives)

    """
    3 criteria sort:
    ea.set_ranks(population = population, conflicting_objectives = objectives, front_objective = front_objective)
    ea.set_crowding_distances_by_front(population = population, conflicting_objectives = objectives, front_objective = front_objective, cd_objective = cd_objective)
    sorted_population = ea.sort_population(population = population, objectives = [front_objective, cd_objective, generation_objective])
    """

    #Logs (for previous gen)
    hyperarea = ea.hyperarea(population, objectives, front_objective)
    header, logs, g_header, g_logs = ea.get_cgp_log(sorted_population, cgp, current_gen)
    g_logs += [hyperarea]
    g_header += ["Hyperarea"]

    #Elitism
    parent_population = sorted_population[:population_size]

    #Offspring generation
    offspring_population = []
    for i in range(population_size):
        parent_index = ea.tournament_selection_index(population_size = len(parent_population), tournament_size = tournament_size)
        parent = parent_population[parent_index]
        new_graph, active_altered = cgp.point_mutation(graph = parent.representation, percentage = mutation_percentage)
        offspring = ea.Individual(representation = new_graph, created_in_gen = current_gen)

        #If the active graph was not altered, the individual does not need to be evaluated again:
        if active_altered:
            evaluate_ind(offspring)
        else:
            offspring.representation.evaluation_skipped = True
            for objective in objectives:
                offspring.update_evaluation(objective = objective, value = parent.evaluations[objective.name])
            offspring.update_semantics_all(semantics_all = parent.semantics_all)
        offspring_population.append(offspring)

    #Update the gen_of_creation of the offsprings
    for offspring in offspring_population:
        offspring.update_evaluation(objective = generation_objective, value = current_gen)

    #Formation of the population for the next gen
    population = offspring_population + parent_population

    #print("Gen " , str(current_gen), " time: ", str(time.time()-start_t))
    #print_logs(population)
    return population, header, logs, g_header, g_logs

  data = np.genfromtxt("src\ea\data\yst_m3.dat"


In [3]:

for exp_idx in range(trials):
    print("run:", str(exp_idx+1))
    path = output_path + "run" + str(exp_idx) + "/"
    #Initial generation
    generation = 0
    individual_level_logs = []
    gen_level_logs = []
    nodes_evaluated = 0

    #Random initial population. Specific initial conditions for the population can be specified here
    graphs = [cgp.create_random(seed = rd.random()) for _ in range(population_size)]

    #create instances of Individual to be grouped in the population
    parent_population = [ea.Individual(r, created_in_gen = generation) for r in graphs]

    #Evaluate and sort the population according to non-domination
    for ind in parent_population:
        evaluate_ind(ind)
    sorted_nsga2_population = ea.fast_nondominated_sort(population = parent_population, conflicting_objectives = objectives, nsgaii_objectives = nsgaii_objectives)

    #Create the offsprings of the initial generation
    population = parent_population
    for i in range(population_size):

        #Binary tournament selection is used in the initial generation only according to NSGA-II. The offspring is evaluated and added to the population
        parent_index = ea.tournament_selection_index(population_size = population_size, tournament_size = 2)
        parent = sorted_nsga2_population[parent_index]
        new_graph, active_altered = cgp.point_mutation(graph = parent.representation, percentage = mutation_percentage)
        offspring = ea.Individual(representation = new_graph, created_in_gen = generation)

        #If the active graph was not altered, the individual does not need to be evaluated again:
        if active_altered:
            evaluate_ind(offspring)
        else:
            offspring.representation.evaluation_skipped = True
            for objective in objectives:
                offspring.update_evaluation(objective = objective, value = parent.evaluations[objective.name])
            offspring.update_semantics_all(semantics_all = parent.semantics_all)
        population.append(offspring)

    for ind in population:
        ind.update_evaluation(objective = generation_objective, value = generation)



    #Main loop
    while True:
        generation += 1
        population, header, logs, g_header, g_logs = run_NSGA_II_gen(population, generation)
        

        #Logs
        individual_level_logs += logs
        gen_level_logs += [g_logs]

        #Plots
        if generation%20==0:
            
            ea.plot_pareto(population, objectives, "size", path = path, name = "plt_size_g"+str(generation))
            ea.plot_pareto(population, objectives, "color", path = path, name = "plt_color_g"+str(generation))

            ut.logs_to_file(individual_level_logs, "Ind_logs", path)
            ut.logs_to_file(gen_level_logs, "Gen_logs", path)
        
        #stop_criteria:
        nodes_idx = g_header.index("Eval_nodes")
        nodes_evaluated += g_logs[nodes_idx] * data_rows
        print("progress: ", str(nodes_evaluated*100/node_max_evals), " nodes_evaluated: ", str(nodes_evaluated))
        if nodes_evaluated > node_max_evals:
            break



    #Final plots
    population = ea.fast_nondominated_sort(population = population, conflicting_objectives = objectives, nsgaii_objectives = nsgaii_objectives)
    ea.plot_pareto(population, objectives, "size", path = path, name = "plt_size_g"+str(generation))
    ea.plot_pareto(population, objectives, "color", path = path, name = "plt_color_g"+str(generation))

    individual_level_logs.insert(0, header)
    ut.logs_to_file(individual_level_logs, "Ind_logs", path)
    gen_level_logs.insert(0, g_header)
    ut.logs_to_file(gen_level_logs, "Gen_logs", path)
#pickle

run: 1


KeyError: 'front'

In [5]:
print(population[0].representation)

Graph:
  (_)22 f3 i 17 14
  (_)23 f2 i 19 19
  (_)24 f3 i 7 11
  (_)25 f1 i 2 3
  (_)26 f2 i 20 6
  (_)27 f3 i 15 17
  (A)28 f0 i 9 0
  (_)29 f3 i 14 13
  (_)30 f3 i 5 3
  (_)31 f3 i 2 29
  (_)32 f1 i 18 0
  (_)33 f3 i 19 1
  (A)34 f2 i 15 20
  (_)35 f3 i 18 24
  (_)36 f3 i 34 32
  (_)37 f0 i 32 22
  (_)38 f0 i 10 21
  (_)39 f0 i 17 25
  (_)40 f1 i 5 22
  (_)41 f0 i 6 0
  (_)42 f3 i 20 26
  (_)43 f2 i 34 30
  (_)44 f3 i 15 38
  (_)45 f3 i 37 7
  (_)46 f1 i 11 15
  (_)47 f1 i 28 43
  (_)48 f1 i 44 26
  (_)49 f2 i 21 44
  (_)50 f3 i 15 34
  (_)51 f3 i 5 24
  (_)52 f1 i 19 35
  (A)53 f1 i 4 4
  (_)54 f1 i 23 40
  (_)55 f1 i 40 7
  (_)56 f0 i 0 41
  (_)57 f2 i 7 11
  (_)58 f2 i 56 32
  (_)59 f0 i 49 42
  (A)60 f3 i 53 34
  (_)61 f2 i 59 7
  (_)62 f1 i 47 27
  (_)63 f2 i 1 48
  (_)64 f0 i 7 29
  (_)65 f1 i 25 28
  (_)66 f2 i 0 48
  (_)67 f1 i 62 32
  (_)68 f1 i 16 2
  (_)69 f1 i 49 4
  (_)70 f3 i 36 40
  (_)71 f3 i 64 7
  (_)72 f3 i 35 25
  (_)73 f1 i 44 6
  (_)74 f3 i 23 33
  (_)75 f2 i 43

In [17]:
import operator as op
import src.ea.ea_lib as ea
import src.ea.problem_data as pb
import src.ea.utilities as ut
import random as rd
import time
import numpy as np
import matplotlib.pyplot as plt

### Evolution strategy - vanilla CGP
rd.seed(1)
generations = 20000
population_size = 4
mutation_percentage = 9
problem_name = "spect"
objective = ea.Objective(name="acc", to_max = True, best=1, worst=0)
generation_objective = ea.Objective(name="generation", to_max = True)

#Setup
dataset = pb.Dataset()
dataset.load_problem(name = problem_name)

## CGP params
levels_back = 2
n_rows = 4
n_columns = 9
allow_input_to_output = False
inputs_available_to_all_columns = False
functions = [op.add,op.sub,op.mul,ut.safe_divide_one]

## Definition
dataset = pb.Dataset()
dataset.load_problem(name = problem_name)
cgp = ea.CGP_Representation(
            dataset.x_train.shape[1]
            ,1
            ,levels_back
            ,n_rows 
            ,n_columns
            ,allow_input_to_output
            ,inputs_available_to_all_columns
            ,*functions)
labels = list(set(dataset.y_train))

In [18]:
def evaluate_ind(ind):
    outputs = {}
    for i,data_row in enumerate(dataset.x_train):
        output_dict = ind.representation.evaluate(data_row = data_row)
        #Extracting the 0th index output gene
        output = output_dict[0]
        #The raw output needs to be transformed
        #transformed_output = ut.custom_round(output)
        transformed_output = ut.threshold_map(value = output,threshold = 0.5, output_up = labels[1], output_down = labels[0])
        outputs[i] = transformed_output
    #Each objective has its own evaluation method
    acc = ut.accuracy(y = dataset.y_train, y_output = outputs)
    ind.update_evaluation(objective, value = acc)
    ind.update_semantics_all(semantics_all = outputs)

def evaluate_population(population):
    for ind in population:
        evaluate_ind(ind)


In [19]:
### Evolution strategy - vanilla CGP

#Random initial population. Specific initial conditions for the population can be specified here
graphs = [cgp.create_random(seed = rd.random()) for _ in range(population_size)]

#create instances of Individual to be grouped in the population
population = [ea.Individual(r) for r in graphs]

#Evaluate and sort the population according to non-domination
evaluate_population(population)
for ind in population:
    ind.update_evaluation(objective = generation_objective, value = 0)
        

gen  0  acc  0.6217228464419475 actives  1
gen  500  acc  0.7565543071161048 actives  3
gen  1000  acc  0.7565543071161048 actives  4
gen  1500  acc  0.7640449438202247 actives  5
gen  2000  acc  0.7677902621722846 actives  5
gen  2500  acc  0.7677902621722846 actives  5
gen  3000  acc  0.7677902621722846 actives  5
gen  3500  acc  0.7752808988764045 actives  6
gen  4000  acc  0.7752808988764045 actives  6
gen  4500  acc  0.7752808988764045 actives  6
gen  5000  acc  0.7752808988764045 actives  6
gen  5500  acc  0.7865168539325843 actives  7
gen  6000  acc  0.7865168539325843 actives  7
gen  6500  acc  0.7865168539325843 actives  7
gen  7000  acc  0.7865168539325843 actives  7
gen  7500  acc  0.7865168539325843 actives  7
gen  8000  acc  0.7865168539325843 actives  7
gen  8500  acc  0.7940074906367042 actives  10
gen  9000  acc  0.7940074906367042 actives  10
gen  9500  acc  0.8052434456928839 actives  10
gen  10000  acc  0.8052434456928839 actives  10
gen  10500  acc  0.80524344569288

In [None]:
for generation in range(generations):
    sorted_population = ea.sort_population(population,[objective, generation_objective])
    parent = sorted_population[0]
    population = [parent]

    #Create the offsprings of the initial generation
    for i in range(population_size-1):
        new_graph, active_altered = cgp.point_mutation(graph = parent.representation, percentage = mutation_percentage)
        offspring = ea.Individual(representation = new_graph)
        offspring.update_evaluation(objective = generation_objective, value = generation+1)

        #If the active graph was not altered, the individual does not need to be evaluated again:
        if active_altered:
            evaluate_ind(offspring)
        else:
            offspring.representation.evaluation_skipped = True
            offspring.update_evaluation(objective = objective, value = parent.evaluations[objective.name])
            offspring.update_semantics_all(semantics_all = parent.semantics_all)
        
        #add offspring to the population
        population.append(offspring)

    if generation%500==0:
        print("gen ", generation
        , " acc ", str(parent.evaluations[objective.name])
        , "actives ", str(len(parent.representation.active_genotype)))
        #print(parent.representation)
