### Description

Generates analysis given logs from a study of parameters.

### Setup

In [None]:
%matplotlib inline

import os, csv, itertools
import numpy as np
import matplotlib.pyplot as plt

# Path to experimental data. Should contain a 'logs' directory.
exp_path = "../../../flexbench-data/3_NSGA-II_varying_goals/3-3_study_epoch_length_gene_length/"

# Target path, in case it is different from the experiment path.
# target_path = exp_path
target_path = "../../../flexbench-data/2_baseline_NSGA-II/2-2_tournament/"

# Parameters and values under study.
# params = ["eta_cross", "eta_mut"]
# params = ["pop_size"]
params = ["gens_epoch", "gene_length"]
dict_values = {
    # "eta_cross": [20, 40, 80, 120, 140, 180],
    # "eta_mut": [20, 40, 80, 120, 140, 180],
    # "pop_size": [5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
    "gens_epoch": [5, 10, 15, 20, 25],
    "gene_length": [2, 3, 5],
}
params_labels = {
    "eta_cross": "Crossover eta",
    "eta_mut": "Mutation eta",
    "pop_size": "Population Size",
    "gens_epoch": "Epoch Length E",
    "gene_length": "Gene Length l",
}

# Materials considered as source and target.
# materials_source = ["steel","inconel_718"]
# materials_pairs = ["steel_to_inconel_718"]
materials_source = ["steel_and_tungsten_alloy"]
materials_pairs = ["steel_and_tungsten_alloy_to_inconel_718"]

# Experiment values.
repetitions = 50
nGens = 50
popSize = 100

# For calculation of Computational Effort (CE).
z = 0.99

# False if using population size above.
# If one of the paremeters varied is the population size, index of the parameter.
popSize_varied = False
popSize_idx = 0

# True if target was included in parameter study.
include_target = False

# If True, additionally calculates computational effort and evaluations
# based on a percentage of the target hypervolume.
include_relaxed_target = True
percentage = 0.99

### Load CSV

In [None]:
def load_data(path):
    """Returns the maximum of each row from 'path'."""
    print("Loading %s..." % path)
    with open(path, "r", newline="") as f:
        reader = csv.reader(f, delimiter=",")
        data = np.array([max([float(x) for x in row]) for row in reader])
    return data[:repetitions]

def load_evals_relaxed(path_hypervolume_target, path_hypervolume_current, popSize):
    """
    Return an array with the number of evaluations needed for finding a percentage
    of target hypervolumes.
    """ 
    print("Loading %s..." % path_hypervolume_current)
    
    with open(path_hypervolume_target, "r", newline="") as f:
        reader = csv.reader(f, delimiter=",")
        hypervolume_target = np.array([max([float(x) for x in row]) for row in reader]) * relaxed_target
        hypervolume_target = hypervolume_target[:repetitions]
    
    with open(path_hypervolume_current, "r", newline="") as f:
        reader = csv.reader(f, delimiter=",")
        data = [[float(x) for x in row] for row in reader]
        data = data[:repetitions]
    
    evals_relaxed = [-1 for _ in range(len(data))]
    for idx in range(len(data)):
        for gen in range(nGens+1):
            if data[idx][gen] >= hypervolume_target[idx]:
                evals_relaxed[idx] = (gen+1)*popSize
                break
    
    return np.array(evals_relaxed)

### Computational Effort (CE) calculation

In [None]:
def calc_min_effort(evals, nRuns, nGens, popSize, z) :

    #Initialize generations vector.
    gensVec = [0 for i in range(nGens+1)]
    
    #Fill generations vector by iterating evaluations vector.
    for x in evals :
        if x != -1 :
            idx = int(x/popSize) - 1
            gensVec[idx] = gensVec[idx] + 1
    
    #Calculate cumulative vector.
    for i in range(1,len(gensVec)) :
        gensVec[i] = gensVec[i] + gensVec[i-1]
    
    #Minimum effort initial infinite.
    minEff = float('+Inf')
    
    #For each generation.
    for i in range(len(gensVec)) :
    
        #Calculate probability based on frequencies.
        prob = gensVec[i]/float(nRuns)
        
        if prob == 0:
            prob = 1e-7
        
        #Calculate effort.
        if prob == 1.0 :
            r = 1.0
        else :
            r = np.ceil(np.log(1-z)/np.log(1-prob))
            
        currEff = popSize * (i+1) * r
        
        #Update minimum effort.
        if currEff < minEff :
            minEff = currEff

            
    #Return minimum effort.
    return minEff

### Load data in dictionary.

In [None]:
def load_dict():

    # Initialize empty dictionary.
    data_dict = {}

    # For each combination of task and parameter values.
    for task in materials_source + materials_pairs:
        for values in itertools.product(*[dict_values[key] for key in params]):

            # Check if population size is being varied.
            global popSize
            if popSize_varied:
                popSize = values[popSize_idx]

            # Key for combination.
            pre_key = "_".join(["%s_%d" % (key, value) for (key, value) in zip(params, values)])
            key = pre_key + "_" + task

            # Load data.
            if "_to_" in task:
                path = load_path + key + "/adaption/"
                if include_target:
                    path_target = load_path + pre_key + "_" + task.split("_to_")[-1] + "/training/"
                else:
                    path_target = load_path_target + task.split("_to_")[-1] + "/training/"
                data_cost = load_evals_relaxed(
                    path_target + "hypervolume.csv",
                    path + "hypervolume.csv",
                    popSize,
                )
            else:
                path = load_path + key + "/training/"
                data_cost = load_evals_relaxed(
                    path + "hypervolume.csv",
                    path + "hypervolume.csv",
                    popSize,
                )
            data_hypervolume = load_data(path + "hypervolume.csv")

            # Add data to dictionary.
            data_dict[key] = {}
            data_dict[key]["CE"] = calc_min_effort(data_cost,repetitions,nGens,popSize,z)
            data_dict[key]["hypervolume"] = np.mean(data_hypervolume)
            
    return data_dict

### Generate tables

In [None]:
def save_table(table, path):
    print("Saving %s..." % path)
    with open(path, "w", newline="") as f:
        writer = csv.writer(f, delimiter=",")
        writer.writerows(table)

def gen_all_tables(data_dict):

    # Initialize table for CE and hypervolume.
    table = [["Task"] + params + ["Min. CE","Hypervolume"]]

    # For each combination of task and parameter values.
    for task in materials_source + materials_pairs:
        for values in itertools.product(*[dict_values[key] for key in params]):

            # Initialize new row.
            row = [task] + list(values)

            # Key for combination.
            key = "_".join(["%s_%d" % (key, value) for (key, value) in zip(params, values)]) + "_" + task

            # Add values to row.
            row.append(data_dict[key]["CE"])
            row.append(data_dict[key]["hypervolume"])

            # Add row to table.
            table.append(row)
    
    # Save table for CE and hypervolume.
    save_table(table, save_path+"table_CE_hypervolume.csv")

    # Initialize table for best setups.
    table = [["Task","Measure","Setup","Min. CE","Hypervolume"]]

    # For each combination of task and measure.
    for task in materials_source + materials_pairs:
        for measure in ["CE", "hypervolume"]:

            # Initialize row.
            row = [task, measure]

            # Select keys to consider.
            keys = [x for x in data_dict.keys() if x.endswith(task)]

            # Find best setup.
            if measure == "CE":
                best_setup = min(keys, key = lambda k: data_dict[k][measure])
            elif measure == "hypervolume":
                best_setup = max(keys, key = lambda k: data_dict[k][measure])

            # Extract values.
            best_CE = data_dict[best_setup]["CE"]
            best_hypervolume = data_dict[best_setup]["hypervolume"]

            # Add setup and values to row.
            row.extend([best_setup, best_CE, best_hypervolume])

            # Add row to table.
            table.append(row)

    # Save table for best setups.
    save_table(table, save_path+"table_best_setups.csv")

### Generate plots (CE and hypervolume).

In [None]:
def gen_plot(measure, measure_label, data_dict):

    # Initialize subplots.
    ncols = len(materials_source + materials_pairs)
    if len(params) == 1:
        fig, axsVec = plt.subplots(nrows=1, ncols=ncols, figsize=(16,6), constrained_layout=True)
    elif len(params) == 2:
        fig, axsVec = plt.subplots(nrows=1, ncols=ncols, figsize=(16,6), subplot_kw={'projection':'3d'}, constrained_layout=True)
    else:
        print("Can't plot for more than 2 parameters at the moment.")
        return
    axsVec = [axs for axs in np.array(axsVec).flat]

    # For each task.
    tasks = materials_source + materials_pairs
    for idx in range(len(tasks)):
        
        # Select task and subplot.
        task = tasks[idx]
        axs = axsVec[idx]
        
        # Extract data to be plotted.
        data = []
        for values in itertools.product(*[dict_values[key] for key in params]):
            key = "_".join(["%s_%d" % (key, value) for (key, value) in zip(params, values)]) + "_" + task
            data.append(list(values) + [data_dict[key][measure]])
        data = np.array(data)
                        
        # Plot data.
        axs.scatter(*[data[:,i] for i in range(len(data[0]))])

        # Add subplot title and labels.
        fontsize = 16
        fontsize_label = 15
        axs.set_title(task, fontsize=fontsize, fontweight='bold')
        axs.tick_params(axis='x', labelsize=fontsize)
        axs.tick_params(axis='y', labelsize=fontsize)
        if len(params) == 1:
            axs.set_xlabel(params_labels[params[0]], fontsize=fontsize_label, fontweight='bold', labelpad=10)
            axs.set_ylabel(measure_label, fontsize=fontsize_label, fontweight='bold', labelpad=15)
        elif len(params) == 2:
            axs.set_xlabel(params_labels[params[0]], fontsize=fontsize_label, fontweight='bold', labelpad=10)
            axs.set_ylabel(params_labels[params[1]], fontsize=fontsize_label, fontweight='bold', labelpad=10)
            axs.set_zlabel(measure_label, fontsize=fontsize_label, fontweight='bold', labelpad=15)
            axs.tick_params(axis='z', labelsize=fontsize)

    # Save plot.
    fig.savefig(save_path+"plots_"+measure+".eps")
    print("Plot saved in %s." % save_path)

### Run analysis.

In [None]:
load_path = exp_path + "logs/"
load_path_target = target_path + "logs/"
save_path = exp_path + "analysis/"

try:
    os.mkdir(save_path)
except FileExistsError:
    pass

relaxed_target = 1.0

data_dict = load_dict()
gen_all_tables(data_dict)
gen_plot("CE", "CE", data_dict)
gen_plot("hypervolume", "Hypervolume", data_dict)

if include_relaxed_target:
    save_path = exp_path + "analysis_relaxed_target_%.2f/" % percentage

    try:
        os.mkdir(save_path)
    except FileExistsError:
        pass

    relaxed_target = percentage

    data_dict = load_dict()
    gen_all_tables(data_dict)
    gen_plot("CE", "CE", data_dict)
    gen_plot("hypervolume", "Hypervolume", data_dict)