### Description

Given a flexibility experiment, generates tables and plots for solution quality, learning costs, and adaption costs.

### Setup

In [None]:
%matplotlib inline

import os, csv
import numpy as np
import matplotlib.pyplot as plt

# Path to experimental data. Should contain a 'logs' directory.
exp_path = "../../../flexbench-data/3_NSGA-II_varying_goals/3-2_varying_goals_active_inactive/"

# Source and target paths, in case they are different from the experiment path.
source_path = exp_path
# target_path = exp_path
# source_path = "../../../flexbench-data/2_baseline_NSGA-II/2-2_tournament/"
target_path = "../../../flexbench-data/2_baseline_NSGA-II/2-2_tournament/"

# Experiment data.
tasks_target = ["steel", "tungsten_alloy", "steel_dummy", "inconel_718"]
# tasks_source = tasks_target
tasks_source = [
    "steel_and_tungsten_alloy",
    "steel_and_steel_dummy",
    "steel_and_inconel_718",
    "tungsten_alloy_and_steel_dummy",
    "tungsten_alloy_and_inconel_718",
    "steel_dummy_and_inconel_718",
]
repetitions = 100
nGens = 50
popSize = 100
popSizeAdaption = 100

# Value for calculating computational effort.
z = 0.99

# If True, additionally calculates computational effort and evaluations
# based on a percentage of the target hypervolume.
include_relaxed_target = True
percentage = 0.99

### Load CSV

In [None]:
def load_data(path):
    """Returns an array containing the maximum of each row in 'path'."""
    print("Loading %s..." % path)
    with open(path, "r", newline="") as f:
        reader = csv.reader(f, delimiter=",")
        data = np.array([max([float(x) for x in row]) for row in reader])
    return data[:repetitions]

def load_hypervolume_gens(path, only_increases=True):
    """
    Returns an array that is the mean of columns in 'path', in order to
    show the development of the best hypervolume accross generations.
    """   
    print("Loading %s..." % path)
    
    with open(path, "r", newline="") as f:
        reader = csv.reader(f, delimiter=",")
        data = [[float(x) for x in row] for row in reader]
        data = data[:repetitions]
    
    if only_increases: 
        data_processed = []
        for row in data:
            row_processed = []
            hypervolume_max = 0
            for gen in range(nGens+1):
                if gen < len(row):
                    if row[gen] > hypervolume_max:
                        hypervolume_max = row[gen]
                row_processed.append(hypervolume_max)
            data_processed.append(row_processed)
            
        return np.mean(np.array(data_processed), axis=0)
    
    else:
        return np.mean(np.array(data), axis=0)

def load_evals_relaxed(path_hypervolume_target, path_hypervolume_current, popSize):
    """
    Return an array with the number of evaluations needed for finding a percentage
    of target hypervolumes.
    """ 
    print("Loading %s..." % path_hypervolume_current)
    
    with open(path_hypervolume_target, "r", newline="") as f:
        reader = csv.reader(f, delimiter=",")
        hypervolume_target = np.array([max([float(x) for x in row]) for row in reader]) * relaxed_target
        hypervolume_target = hypervolume_target[:repetitions]
    
    with open(path_hypervolume_current, "r", newline="") as f:
        reader = csv.reader(f, delimiter=",")
        data = [[float(x) for x in row] for row in reader]
        data = data[:repetitions]
    
    evals_relaxed = [-1 for _ in range(len(data))]
    for idx in range(len(data)):
        for gen in range(nGens+1):
            if data[idx][gen] >= hypervolume_target[idx]:
                evals_relaxed[idx] = (gen+1)*popSize
                break
    
    return np.array(evals_relaxed)

### Computational Effort (CE) calculation

In [None]:
def calc_min_effort(evals, nRuns, nGens, popSize, z) :

    #Initialize generations vector.
    gensVec = [0 for i in range(nGens+1)]
    
    #Fill generations vector by iterating evaluations vector.
    for x in evals :
        if x != -1 :
            idx = int(x/popSize) - 1
            gensVec[idx] = gensVec[idx] + 1
    
    #Calculate cumulative vector.
    for i in range(1,len(gensVec)) :
        gensVec[i] = gensVec[i] + gensVec[i-1]
    
    #Minimum effort initial infinite.
    minEff = float('+Inf')
    
    #For each generation.
    for i in range(len(gensVec)) :
    
        #Calculate probability based on frequencies.
        prob = gensVec[i]/float(nRuns)
        
        if prob == 0:
            prob = 1e-7
        
        #Calculate effort.
        if prob == 1.0 :
            r = 1.0
        else :
            r = np.ceil(np.log(1-z)/np.log(1-prob))
            
        currEff = popSize * (i+1) * r
        
        #Update minimum effort.
        if currEff < minEff :
            minEff = currEff
        
    #Return minimum effort.
    return minEff

### Generate tables

In [None]:
def gen_empty_table():
    return [["","From Scratch"]+tasks_source]

def aggregated_table(table_original):
    """Build table with aggregated flexibility measures."""
    # Initialize table.
    table_aggregated = [["", "Worst Case", "Average Case", "Best Case"]]
    
    # Values from scratch.
    values = [table_original[i+1][1] for i in range(len(tasks_target))]
    table_aggregated.append(["From Scratch", max(values), np.mean(values), min(values)])
    
    # Adaption for each material individually.
    for idx in range(len(tasks_target)):
        values = [
            table_original[idx+1][i+2] 
            for i in range(len(tasks_source))
            if table_original[idx+1][i+2] != "-"
        ]
        table_aggregated.append([
            "Adaption to %s"%tasks_target[idx],
            max(values),
            np.mean(values),
            min(values)
        ])
    
    # Adaption for all.
    worst_case = max([table_aggregated[i+2][1] for i in range(len(tasks_target))])
    avg_case = np.mean([table_aggregated[i+2][2] for i in range(len(tasks_target))])
    best_case = min([table_aggregated[i+2][3] for i in range(len(tasks_target))])
    table_aggregated.append(["Adaption", worst_case, avg_case, best_case])
    
    return table_aggregated

def save_table(table, path):
    print("Saving %s..." % path)
    with open(path, "w", newline="") as f:
        writer = csv.writer(f, delimiter=",")
        writer.writerows(table)

def gen_all_tables():
    # Initialize tables.
    table_comp_effort = gen_empty_table()
    table_evals_avg = gen_empty_table()
    table_evals_std = gen_empty_table()
    table_hypervolumes_avg = gen_empty_table()
    table_hypervolumes_std = gen_empty_table()
    table_success = gen_empty_table()
    table_success_init = gen_empty_table()

    # For each material.
    for target in tasks_target:

        # Initialize rows.
        row_comp_effort = [target]
        row_evals_avg = [target]
        row_evals_std = [target]
        row_hypervolumes_avg = [target]
        row_hypervolumes_std = [target]
        row_success = [target]
        row_success_init = [target]

        # Load data from scratch.
        target_hypervolumes = load_data(load_path_target+target+"/training/hypervolume.csv")
        if relaxed_target >= 1.0:
            target_costs = load_data(load_path_target+target+"/training/cost.csv")
        else:
            target_costs = load_evals_relaxed(
                load_path_target+target+"/training/hypervolume.csv",
                load_path_target+target+"/training/hypervolume.csv",
                popSize,
            )

        # Add computational effort.
        row_comp_effort.append(calc_min_effort(target_costs,repetitions,nGens,popSize,z))

        # Add number of evaluations from scratch (avg and std).
        row_evals_avg.append(np.mean(np.array([x for x in target_costs if x != -1])))
        row_evals_std.append(np.std(np.array([x for x in target_costs if x != -1])))

        # Add hypervolume from scratch (avg and std).
        row_hypervolumes_avg.append(np.mean(target_hypervolumes))
        row_hypervolumes_std.append(np.std(target_hypervolumes))

        # Success and succes on initialization empty for source.
        row_success.append("-")
        row_success_init.append("-")

        # For each combination.
        for source in tasks_source:

            # Empty if target is source.
            if target in source.split("_and_"):
                row_comp_effort.append("-")
                row_evals_avg.append("-")
                row_evals_std.append("-")
                row_hypervolumes_avg.append("-")
                row_hypervolumes_std.append("-")
                row_success.append("-")
                row_success_init.append("-")

            else:

                # Load adaption data.
                adapt_hypervolumes = load_data(load_path+source+"_to_"+target+"/adaption/hypervolume.csv")
                if relaxed_target >= 1.0:
                    adapt_costs = load_data(load_path+source+"_to_"+target+"/adaption/cost.csv")
                else:
                    adapt_costs = load_evals_relaxed(
                        load_path_target+target+"/training/hypervolume.csv",
                        load_path+source+"_to_"+target+"/adaption/hypervolume.csv",
                        popSizeAdaption,
                    )

                # Add computational effort.
                row_comp_effort.append(calc_min_effort(adapt_costs,repetitions,nGens,popSizeAdaption,z))

                # Add number of evaluations for adaption (avg and std).
                row_evals_avg.append(np.mean(np.array([x for x in adapt_costs if x != -1])))
                row_evals_std.append(np.std(np.array([x for x in adapt_costs if x != -1])))

                # Add hypervolume for adaption (avg and std).
                row_hypervolumes_avg.append(np.mean(adapt_hypervolumes))
                row_hypervolumes_std.append(np.std(adapt_hypervolumes))

                # Add success.
                row_success.append(len([x for x in adapt_costs if x != -1])/len(adapt_costs))

                # Add percentage successfull already in initialization.
                row_success_init.append(len([x for x in adapt_costs if x == 100])/len(adapt_costs))

        # Add rows to tables.
        table_comp_effort.append(row_comp_effort)
        table_evals_avg.append(row_evals_avg)
        table_evals_std.append(row_evals_std)
        table_hypervolumes_avg.append(row_hypervolumes_avg)
        table_hypervolumes_std.append(row_hypervolumes_std)
        table_success.append(row_success)
        table_success_init.append(row_success_init)

    table_comp_effort_aggr = aggregated_table(table_comp_effort)
    table_evals_avg_aggr = aggregated_table(table_evals_avg)

    save_table(table_comp_effort, save_path+"table_min_comp_effort.csv")
    save_table(table_evals_avg, save_path+"table_evals_avg.csv")
    save_table(table_comp_effort_aggr, save_path+"table_min_comp_effort_aggregated.csv")
    save_table(table_evals_avg_aggr, save_path+"table_evals_avg_aggregated.csv")
    save_table(table_evals_std, save_path+"table_evals_std.csv")
    save_table(table_hypervolumes_avg, save_path+"table_hypervolumes_avg.csv")
    save_table(table_hypervolumes_std, save_path+"table_hypervolumes_std.csv")
    save_table(table_success, save_path+"table_success.csv")
    save_table(table_success_init, save_path+"table_success_init.csv")

### Generate plots (hypervolume accross generations).

In [None]:
def gen_plots_hypervolume_all():
    # Initialize subplots.
    px = 1/plt.rcParams['figure.dpi']
    fig, axsVec = plt.subplots(nrows=2, ncols=2, figsize=(1000*px,600*px), constrained_layout=True)
    # fig, axsVec = plt.subplots(nrows=2, ncols=2, figsize=(15,10), constrained_layout=True)
    axsVec = [axs for axs in np.array(axsVec).flat]

    # For each target material.
    for idx in range(len(tasks_target)):

        # Select subplot.
        axs = axsVec[idx]

        # Load data for search from scratch.
        data = load_hypervolume_gens(load_path_target+tasks_target[idx]+"/training/hypervolume.csv")

        # Add to subplot.
        axs.scatter(x=[gen for gen in range(len(data))], y=data, label="From scratch", marker='o')

        # Markers to be used.
        markers = ['s', '*', 'X', 'D']
        idxMarker = 0
        
        # For each source material.
        for material_source in tasks_source:

            # If it's not the source material.
            if tasks_target[idx] not in material_source.split("_and_"):

                # Load data for adaption.
                data = load_hypervolume_gens(
                    load_path+material_source+"_to_"+tasks_target[idx]+"/adaption/hypervolume.csv"
                )

                # Add to subplot.
                axs.scatter(
                    x=[gen for gen in range(len(data))], 
                    y=data, 
                    label="Source=%s"%material_source, 
                    marker=markers[idxMarker],
                )
                
                idxMarker = idxMarker + 1

        # Add subplot title, labels, and legends.
        fontsize = 12
        fontsize_legend = 10
        axs.set_xlabel("Generations", fontsize=fontsize, fontweight='bold')
        axs.set_ylabel("Hypervolume", fontsize=fontsize, fontweight='bold')
        axs.set_title("Target=%s" % tasks_target[idx], fontsize=fontsize, fontweight='bold')
        axs.tick_params(axis='x', labelsize=fontsize)
        axs.tick_params(axis='y', labelsize=fontsize)
        axs.legend(fontsize=fontsize_legend)

    # Save plot.
    fig.savefig(save_path+"plots_hypervolume.eps")
    print("Plots saved in %s." % save_path)

### Generate plots, only for source.

In [None]:
def gen_plots_hypervolume_source():

    # Initialize subplots.
    fig, axsVec = plt.subplots(nrows=2, ncols=3, figsize=(20,10), constrained_layout=True)
    axsVec = [axs for axs in np.array(axsVec).flat]

    # For each source material.
    for idx in range(len(tasks_source)):

        # Select subplot.
        axs = axsVec[idx]

        # Load data for search from scratch.
        data = load_hypervolume_gens(
            load_path_source+tasks_source[idx]+"/training/hypervolume.csv",
            only_increases=False,
        )

        # Add to subplot.
        axs.scatter(x=[gen for gen in range(len(data))], y=data)

        # Add subplot title, labels, and legends.
        fontsize = 20
        axs.set_xlabel("Generations", fontsize=fontsize, fontweight='bold')
        axs.set_ylabel("Hypervolume", fontsize=fontsize, fontweight='bold')
        axs.set_title(tasks_source[idx], fontsize=fontsize, fontweight='bold')
        axs.tick_params(axis='x', labelsize=fontsize)
        axs.tick_params(axis='y', labelsize=fontsize)

    # Save plot.
    fig.savefig(save_path+"plots_hypervolume_source.eps")
    print("Plots saved in %s." % save_path)

### Runs analysis.

In [None]:
load_path = exp_path + "logs/"
load_path_source = source_path + "logs/"
load_path_target = target_path + "logs/"
save_path = exp_path + "analysis/"

try:
    os.mkdir(save_path)
except FileExistsError:
    pass

relaxed_target = 1.0

gen_all_tables()
gen_plots_hypervolume_all()
gen_plots_hypervolume_source()

if include_relaxed_target:
    save_path = exp_path + "analysis_relaxed_target_%.2f/" % percentage

    try:
        os.mkdir(save_path)
    except FileExistsError:
        pass

    relaxed_target = percentage

    gen_all_tables()
    gen_plots_hypervolume_all()
    gen_plots_hypervolume_source()