In [None]:
import pandas as pd
import os

In [None]:
from utils import load_experiment, get_experiment_run

In [None]:

hours="5"
simulations="5"
reqs="50"

base_dir = os.path.join(os.getcwd(), f'results', f'{hours}hours_incremental_data')
print(os.path.isdir(base_dir))

MIN_REQ = '50'
MAX_REQ = '50'

In [None]:
all_individuals, all_rec_individuals, all_results, all_time_df = load_experiment(base_dir=f'{base_dir}/results', objectives_labels=['delivery_rate', 'utilization_rate', 'num_risks'], variables_labels=['robot_0_start', 'robot_0_end', 'robot_1_start', 'robot_1_end', 'robot_2_start', 'robot_2_end', 'robot_3_start', 'robot_3_end', 'robot_4_start', 'robot_4_end', 'robot_5_start', 'robot_5_end', 'robot_6_start', 'robot_6_end', 'robot_7_start', 'robot_7_end', 'robot_8_start', 'robot_8_end', 'robot_9_start', 'robot_9_end', 'robot_speed_kmh'])

In [None]:
all_individuals['total_virtual_time'] = all_individuals['simulation_duration'] * all_individuals['simulations']
all_rec_individuals['total_virtual_time'] = all_rec_individuals['simulation_duration'] * all_rec_individuals['simulations']

Filtering the results that have the same number of requests (i.e., they are solutions to the same problem).

In [None]:
all_results = all_results[(all_results.min_req==MIN_REQ) & (all_results.max_req==MAX_REQ)]
all_individuals = all_individuals[(all_individuals.min_req==MIN_REQ) & (all_individuals.max_req==MAX_REQ)]
all_rec_individuals = all_rec_individuals[(all_rec_individuals.min_req==MIN_REQ) & (all_rec_individuals.max_req==MAX_REQ)]
all_time_df = all_time_df[(all_time_df.min_req==MIN_REQ) & (all_time_df.max_req==MAX_REQ)]

In [None]:
all_results = all_results[(all_results.num_risks != 100000.00)]
all_individuals = all_individuals[(all_individuals.num_risks != 100000.00)]
all_rec_individuals = all_rec_individuals[(all_rec_individuals.num_risks != 100000.00)]

In [None]:
all_individuals.to_csv(os.path.join(base_dir, f"all_individuals.csv"), index=False)
all_results.to_csv(os.path.join(base_dir, f"all_results.csv"), index=False)

In [None]:
import numpy as np
from jmetal.core.solution import Solution
from jmetal.util.solution import get_non_dominated_solutions

def get_non_dom_from_numpy(solutions):
    real_solutions = []
    for row in solutions:
        x = row[3:]
        y = row[:3]
        solution = Solution(2, 3, 0)
        solution.objectives = list(y)
        real_solutions.append(solution)
    non_dominated_solutions = get_non_dominated_solutions(real_solutions)
    non_dominated_solutions = np.array([np.array(sol.objectives) for sol in non_dominated_solutions])
    real_row = []
    for non_dominated_solution in non_dominated_solutions:
        row = np.where((non_dominated_solution==solutions[:,:3]).all(axis=1))
        row = np.unique(row)
        if row.shape[0] != 0:
            real_row.append(row[0])
    
    return solutions[real_row]

def get_solutions_as_numpy_array(exp_df):
    return np.array([exp_df['delivery_rate'] * -1.0, exp_df['utilization_rate'] * -1.0, exp_df['num_risks'], exp_df['robot_0_start'], exp_df['robot_0_end'],
                     exp_df['robot_1_start'], exp_df['robot_1_end'], exp_df['robot_2_start'], exp_df['robot_2_end'], exp_df['robot_3_start'], exp_df['robot_3_end'], 
                     exp_df['robot_4_start'], exp_df['robot_4_end'], exp_df['robot_5_start'], exp_df['robot_5_end'], exp_df['robot_6_start'], exp_df['robot_6_end'],
                     exp_df['robot_7_start'], exp_df['robot_7_end'], exp_df['robot_8_start'], exp_df['robot_8_end'], exp_df['robot_9_start'], exp_df['robot_9_end'],
                     exp_df['robot_speed_kmh']]).transpose()

def get_need_PF(df):
    list_row = []
    count = 0
    for index, (problem, approach, run) in df[['problem', 'approach', 'run']].drop_duplicates().iterrows():
        row = {'problem': problem, 'simulator': approach, 'run': run} 
        exp_df = get_experiment_run(df=df, problem=problem, approach=approach, run=run)
        solutions = get_solutions_as_numpy_array(exp_df)
        non_dominated_solutions = get_non_dom_from_numpy(solutions)
        for i in range(non_dominated_solutions.shape[0]):
                list_row.append(row)
        if count==0:
            data = non_dominated_solutions
        else:
            data = np.concatenate((data,non_dominated_solutions))
        count += 1
    return data, list_row

Get need reeval data

In [None]:
generations = ['17', '34', '51', '68']
count = 0
for idx, generation in enumerate(generations):
    M = [i for i in range(1, int(generation))]
    for using_M in M:
        individuals = all_individuals[(all_individuals['generations']==generation) & (all_individuals['generation']<=using_M) & (all_individuals['approach'] == 'incremental-data_lab28_special_utilization_model12.pth')]
        if count == 0:
            data, df_row = get_need_PF(df=individuals)
        else:
            non_dominated_solutions, list_row = get_need_PF(df=individuals)
            data = np.concatenate((data,non_dominated_solutions)) 
            df_row += list_row
        count += 1
for idx, generation in enumerate(generations):
    M = [i for i in range(1, int(generation))]
    for using_M in M:
        individuals = all_individuals[(all_individuals['generations']==generation) & (all_individuals['generation']==using_M) & (all_individuals['approach'] == 'incremental-data_lab28_special_utilization_model12.pth')]
        non_dominated_solutions, list_row = get_need_PF(df=individuals)
        data = np.concatenate((data,non_dominated_solutions)) 
        df_row += list_row

In [None]:
u, indices = np.unique(data, axis=0, return_index=True)
df_row = pd.DataFrame(df_row)
data_df = pd.DataFrame(data, columns=['delivery_rate', 'utilization_rate', 'num_risk', 'robot_0_start', 'robot_0_end', 'robot_1_start', 'robot_1_end', 'robot_2_start', 'robot_2_end',
                                      'robot_3_start', 'robot_3_end', 'robot_4_start', 'robot_4_end', 'robot_5_start', 'robot_5_end',
                                      'robot_6_start', 'robot_6_end', 'robot_7_start', 'robot_7_end', 'robot_8_start', 'robot_8_end',
                                      'robot_9_start', 'robot_9_end', 'speed_kmh'])
data_df = pd.concat((data_df, df_row), axis=1)
data_df = data_df.iloc[indices]
data_df.to_csv(os.path.join(base_dir, "need_reeval.csv"), index=False)

# Quality Indicators

In [None]:
from jmetal.core.quality_indicator import InvertedGenerationalDistance

Defining the reference point and the reference point based on all data available

In [None]:
# reference front based on the best individual point 
reference_front_orig = [[all_individuals['delivery_rate'].max() * -1.0, all_individuals['utilization_rate'].max() * -1.0,  all_individuals['num_risks'].min()]]
reference_front_rec = [[all_rec_individuals['delivery_rate'].max() * -1.0, all_rec_individuals['utilization_rate'].max() * -1.0,  all_rec_individuals['num_risks'].min()]]
print("reference_front_orig: " + str(reference_front_orig) + "  reference_front_rec: " + str(reference_front_rec))
reference_front = reference_front_rec

In [None]:
# reference front based on all non dominated solutions
solutions_orig = get_solutions_as_numpy_array(all_individuals)
reference_front_orig = get_non_dom_from_numpy(solutions_orig)
solutions_rec = get_solutions_as_numpy_array(all_rec_individuals)
reference_front_rec = get_non_dom_from_numpy(solutions_rec)
print("reference_front_orig.shape: " + str(reference_front_orig.shape) + "  reference_front_rec.shape: " + str(reference_front_rec.shape))
reference_front = reference_front_rec[:,:3]

In [None]:
np.save("used_reference_front.npy", reference_front)

In [None]:
quality_indicators = [InvertedGenerationalDistance(reference_front)] 

In [None]:
LUT = pd.read_csv(os.path.join(base_dir,"look_up_table_var_robot.csv"))

def reeval(robots_working_period, speed):
    
    used_row = LUT
    for idx in range(robots_working_period.shape[0]//2):
        used_row = used_row[used_row[f'robot_{idx}_start'] == robots_working_period[2*idx]]
        used_row = used_row[used_row[f'robot_{idx}_end'] == robots_working_period[2*idx+1]]
    used_row = used_row[used_row['robot_speed_kmh'] == speed].drop_duplicates()
    
    return used_row['num_delivery_rate'].item() * -1.0, used_row['utilization_rate'].item() * -1.0, used_row['num_risk'].item()

Computing the quality indicators on all the runs 

In [None]:
def compute_quality_indicators(df, quality_indicator, using_H, using_M):
    data = []
    for index, (problem, approach, run) in df[['problem', 'approach', 'run']].drop_duplicates().iterrows():
        row = {'problem': problem, 'approach': f'{approach}_H_{using_H}_M_{using_M}', 'run': run} 
        for quality_indicator in quality_indicators:
            exp_df = get_experiment_run(df=df, problem=problem, approach=approach, run=run)
            solutions = get_solutions_as_numpy_array(exp_df)
            non_dominated_solutions = get_non_dom_from_numpy(solutions)
            if approach != 'standard':
                reeval_front = np.empty((non_dominated_solutions.shape[0],3))
                for i, solution in enumerate(non_dominated_solutions):
                    robots_working_period = solution[3:23]
                    robot_speed_kmh = solution[23]
                    reeval_front[i] = reeval(robots_working_period,robot_speed_kmh)
                reeval_front = get_non_dom_from_numpy(reeval_front)
                row[quality_indicator.get_short_name()] =  quality_indicator.compute(reeval_front[:,:3])
            else:
                row[quality_indicator.get_short_name()] =  quality_indicator.compute(non_dominated_solutions[:,:3])
            # row[quality_indicator.get_short_name()] =  quality_indicator.compute(non_dominated_solutions[:,:3])
        data.append(row)
    qi_df = pd.DataFrame(data)
    return qi_df

In [None]:
generations = ['17', '34', '51', '68']
H = [1,2,3,4]
M = [i for i in range(1,18)]
individuals = all_individuals[(all_individuals['approach'] == 'standard')]
quality_indicators_df = compute_quality_indicators(df=individuals, quality_indicator=quality_indicators, using_H=0, using_M=0)
data = quality_indicators_df

for idx, generation in enumerate(generations):
    M = [i for i in range(1, int(generation))]
    using_H = H[idx]
    for using_M in M:
        print(f'now each_gen using_generationH : {generation}, using_M: {using_M}')
        individuals = all_individuals[(all_individuals['generations']==generation) & (all_individuals['generation']==using_M) & (all_individuals['approach'] == 'incremental-data_lab28_special_utilization_model12.pth')]
        quality_indicators_df = compute_quality_indicators(df=individuals, quality_indicator=quality_indicators, using_H=using_H, using_M=using_M)
        data = pd.concat((data, quality_indicators_df))

quality_indicators_df = data
quality_indicators_df.to_csv(os.path.join(base_dir, 're_eval_each_gen_quality_indicators.csv'), index=False)
quality_indicators_df




generations = ['17', '34', '51', '68']
H = [1,2,3,4]
M = [i for i in range(1,18)]
individuals = all_individuals[(all_individuals['approach'] == 'standard')]
quality_indicators_df = compute_quality_indicators(df=individuals, quality_indicator=quality_indicators, using_H=0, using_M=0)
data = quality_indicators_df

for idx, generation in enumerate(generations):
    M = [i for i in range(1, int(generation))]
    using_H = H[idx]
    for using_M in M:
        print(f'now using_generationH : {generation}, using_M: {using_M}')
        individuals = all_individuals[(all_individuals['generations']==generation) & (all_individuals['generation']<=using_M) & (all_individuals['approach'] == 'incremental-data_lab28_special_utilization_model12.pth')]
        quality_indicators_df = compute_quality_indicators(df=individuals, quality_indicator=quality_indicators, using_H=using_H, using_M=using_M)
        data = pd.concat((data, quality_indicators_df))

quality_indicators_df = data
quality_indicators_df.to_csv(os.path.join(base_dir, 're_eval_quality_indicators.csv'), index=False)
quality_indicators_df
            

This can be done on all the individuals that were evaluated in an experiment. 

Or on all individuals that belong to the final result. 

Statistical comparison among the indicators 

In [None]:
all_quality_indicators_df = pd.DataFrame()
for generation in np.array(["20"]):
    quality_indicators_df = compute_quality_indicators(df=reconciled_results[reconciled_results.generations==generation], quality_indicators=quality_indicators)
    quality_indicators_df['generations'] = generation 
    all_quality_indicators_df = all_quality_indicators_df.append(quality_indicators_df, ignore_index=True)

In [None]:
all_quality_indicators_df

Read the quality indicators

In [None]:
all_quality_indicators_df = pd.read_csv(os.path.join(base_dir, 'quality_indicators.csv'))
all_quality_indicators_df

In [None]:
from itertools import product

In [None]:
from a12 import a12
from scipy.stats import mannwhitneyu

In [None]:
generations = np.array([20])
approaches = list(all_quality_indicators_df['approach'].unique())
indicators = ['IGD']

In [None]:
statistical_comparison_data = []
for indicator in indicators: 
    for generation_1, approach_1 in product(generations, approaches):
        for generation_2, approach_2 in product(generations, approaches):
            if (generation_1 == generation_2) and (approach_1 != approach_2) and approaches.index(approach_2) > approaches.index(approach_1): 
                indicator_1 = all_quality_indicators_df[(all_quality_indicators_df.approach==approach_1)][indicator]
                indicator_2 = all_quality_indicators_df[(all_quality_indicators_df.approach==approach_2)][indicator]
                try:
                    a12_stats = a12(indicator_1, indicator_2)
                except ZeroDivisionError:
                    continue
                try:
                    utest_stats, utest_pvalue = mannwhitneyu(indicator_1, indicator_2)
                except ValueError:
                    utest_stats, utest_pvalue = np.nan, np.nan
                better = ("SAME" if utest_pvalue > 0.05 else ((approach_1 if a12_stats > 0.5 else approach_2) if indicator == "HV" else (approach_1 if a12_stats < 0.5 else approach_2)))
                statistical_comparison_data.append({'max_generation_1' : generation_1, 'approach_1': approach_1, 
                                                   'max_generation_2' : generation_2, 'approach_2': approach_2,
                                                    'indicator': indicator, 
                                                   'a12_stats': a12_stats, 'utest_stats': utest_stats, 'utest_pvalue': utest_pvalue, 'better': better})

In [None]:
stats_df = pd.DataFrame(statistical_comparison_data)

In [None]:
stats_df[(stats_df.utest_pvalue < 0.05)]

In [None]:
stats_df[(stats_df.indicator == 'IGD')]

In [None]:
stats_df.to_csv(os.path.join(base_dir, 'A12.csv'),index=False)
stats_df

In [None]:
stats_df_IGD = stats_df[(stats_df.utest_pvalue < 0.05)&(stats_df.indicator == 'IGD')]

conditions = [(stats_df_IGD.better=="SAME"),
            ((stats_df_IGD.a12_stats>0.5)&(stats_df_IGD.a12_stats<0.556)),
           ((stats_df_IGD.a12_stats>=0.556)&(stats_df_IGD.a12_stats<0.638)),
          ((stats_df_IGD.a12_stats>=0.638)&(stats_df_IGD.a12_stats<0.714)),
          ((stats_df_IGD.a12_stats>=0.714)),
          ((stats_df_IGD.a12_stats>0.494)&(stats_df_IGD.a12_stats<0.5)),
          ((stats_df_IGD.a12_stats>0.362)&(stats_df_IGD.a12_stats<=0.494)),
          ((stats_df_IGD.a12_stats>0.286)&(stats_df_IGD.a12_stats<=0.362)),
          ((stats_df_IGD.a12_stats<=0.286))]
choices = ["same", "worseNegl", "worseSmall", "worseMedium", "worseLarge", "betterNegl", "betterSmall", "betterMedium", "betterLarge"]
stats_df_IGD['A12cat'] = np.select(conditions, choices, "ERROR!")
stats_df_IGD

In [None]:
# Updating the ending time because the variables are start time and how many hours will work.
for i in range(10):
    all_results[f'robot_{i}_end'] = all_results[f'robot_{i}_start'] + all_results[f'robot_{i}_end'] 
    all_results[f'robot_{i}_end'] = all_results[f'robot_{i}_end'].apply(lambda x: min(x, 14))

In [None]:
pd.set_option('display.max_columns', None)