In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import scipy as sp

from experiment_storage.file_paths import FilePaths
from experiment_storage.storage import EvaluationStorage

# all_exp_ids = ['C101200_varinstsizeTSP', 'R101200_varinstsizeTSP', 'O101200_varinstsizeTSP',
#               'C101200_varinstsizeCVRP', 'R101200_varinstsizeCVRP', 'O101200_varinstsizeCVRP',
#               'C101200_varinstsizeVRPTW', 'R101200_varinstsizeVRPTW', 'O101200_varinstsizeVRPTW',
#               'C101200_varinstsizeHVRP', 'R101200_varinstsizeHVRP', 'O101200_varinstsizeHVRP',
#               'C101200_varinstsizeLRP', 'R101200_varinstsizeLRP', 'O101200_varinstsizeLRP']


# experiment result analysis 2023-7-24:
all_exp_ids = ['O101200_diffinitcwnoiseTSP', 'O101200_diffinitcwnoiseCVRP', 'O101200_diffinitcwnoiseHVRP', 'O101200_diffinitcwnoiseVRPTW', 'O101200_diffinitcwnoiseLRP',
               'O101200_diffinitrandTSP', 'O101200_diffinitrandCVRP', 'O101200_diffinitrandHVRP', 'O101200_diffinitrandVRPTW', 'O101200_diffinitrandLRP']


# parent_dir = "/Users/vdarvariu/experiment_data/ates"
# parent_dir = '/Users/shunee/github/ATES2022/experiment_data'
parent_dir = '/Users/shunee/github/ATES2022/all_variants_proportional_tours'

fp_out = FilePaths(parent_dir, 'aggregate', setup_directories=False)


# Final test set results

In [None]:
def compute_ci(data, confidence=0.95):
    print(data)
    a = np.array(data)
    n = len(a)
    se = sp.stats.sem(a)
    h = se * sp.stats.t.ppf((1 + confidence) / 2., n-1)
    return h

def get_results_table(storage, exp_id):
    results = storage.get_evaluation_data(exp_id)
    results = [r for r in results if r["is_best_hyps"]]
    rows_df = pd.DataFrame(results)
    pivot = pd.pivot_table(rows_df, values='cummulative_reward', columns=["algorithm"])

    format_ci_dict = {}

    for agent_name in storage.get_experiment_details(exp_id)["agents"]:
        print(agent_name)
        cis = compute_ci(rows_df[rows_df["algorithm"] == agent_name]["cummulative_reward"])
        pivot[agent_name + "_ci"] = cis
        format_ci_dict[agent_name + "_ci"] = (lambda x: "±{:.3f}".format(abs(x)))

    #print(pivot)
    #pivot.style.format("{:.3f}").format(format_ci_dict)

    return pivot

for exp_id in all_exp_ids:
    print("=" * 20)
    print(exp_id)
    print("=" * 20)
    fp_in = FilePaths(parent_dir, exp_id, setup_directories=False)
    storage = EvaluationStorage(fp_in)
    
    results_table = get_results_table(storage, exp_id)

    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 1000)
    print(results_table)
    print()

In [None]:
def compute_ci(data, confidence=0.95):
    a = np.array(data)
    n = len(a)
    se = sp.stats.sem(a)
    h = se * sp.stats.t.ppf((1 + confidence) / 2., n-1)
    return h

def get_results_table(storage, exp_id):
    results = storage.get_evaluation_data(exp_id)
    results = [r for r in results if r["is_best_hyps"]]
    print(f"results={results}")
    rows_df = pd.DataFrame(results)
    print(rows_df)
    pivot = pd.pivot_table(rows_df, values='cummulative_reward', columns=["algorithm"], index=["cust_number"]).reset_index()
        #.drop(columns=["algorithm"])

    format_ci_dict = {}
    exp_details = storage.get_experiment_details(exp_id)
    print(exp_details)

    for agent_name in exp_details["agents"]:
        if agent_name.startswith("dqnprob") or agent_name.startswith("classicrw"):
            continue

        cis_data = []
        for cust_number in exp_details["experiment_conditions"]["num_customers_test"]:
            data = rows_df[(rows_df["algorithm"] == agent_name) & (rows_df["cust_number"] == cust_number)]["cummulative_reward"]
            # print(data)

            cis = compute_ci(data)
            # print(cis)
            cis_data.append(cis)


        srs = pd.Series(cis_data)
        pivot[agent_name + "_ci"] = srs
        pivot["win_dqn"] = pivot["dqn"] > pivot["vrwmdp"]
        pivot.sort_index(axis=1, inplace=True)

    return pivot


############################################################

for exp_id in all_exp_ids:
    print("=" * 20)
    print(exp_id)
    print("=" * 20)
    fp_in = FilePaths(parent_dir, exp_id, setup_directories=False)
    storage = EvaluationStorage(fp_in)

    results_table = get_results_table(storage, exp_id)

    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 1000)
    print(results_table)
    print()

# Learning curves

In [None]:
# from state.visualization import plot_eval_histories
# from alns.rl.dqn.dqn_agent import DQNAgent
#
# separate_seeds = False
# only_optimal = True
#
# lc_exp_ids = ['C101200_allopsTSP', 'R101200_allopsTSP', 'O101200_allopsTSP',
#               'C101200_allopsCVRP', 'R101200_allopsCVRP', 'O101200_allopsCVRP',
#               'C101200_allopsVRPTW', 'R101200_allopsVRPTW', 'O101200_allopsVRPTW',
#               'C101200_allopsHVRP', 'R101200_allopsHVRP', 'O101200_allopsHVRP',
#               'C101200_allopsLRP', 'R101200_allopsLRP', 'O101200_allopsLRP']
#
# for lc_exp_id in lc_exp_ids:
#     fp_in = FilePaths(parent_dir, lc_exp_id, setup_directories=False)
#     storage = EvaluationStorage(fp_in)
#
#
#     experiment_details = storage.get_experiment_details(lc_exp_id)
#     network_generators = experiment_details["network_generators"]
#     all_objectives = experiment_details["objective_functions"]
#     agent_name = DQNAgent.algorithm_name
#     # agent_name = RouletteWheelMDPAgent.algorithm_name
#
#     optimal_hyps = storage.retrieve_optimal_hyperparams(lc_exp_id, False, fp_in=fp_in)
#     # print(latest_experiment)
#
#     all_hyp_data = storage.get_grouped_hyp_data(lc_exp_id, False, fp_in=fp_in)[1]
#
#     for network_generator_name in network_generators:
#         for obj_fun_name in all_objectives:
#             hyp_data = all_hyp_data[obj_fun_name]
#             opt_hyps_setting = optimal_hyps[(network_generator_name, obj_fun_name, agent_name)]
#             optimal_hyps_id = int(opt_hyps_setting[1])
#
#             all_hyp_ids = [int(hid) for hid in hyp_data[agent_name].keys()]
#             for hyp_id in all_hyp_ids:
#
#                 if only_optimal and optimal_hyps_id != hyp_id:
#                     continue
#
#                 experiment_conditions = experiment_details['experiment_conditions']
#                 steps_used = experiment_conditions['agent_budgets'][obj_fun_name][agent_name]
#                 model_seeds = experiment_conditions['experiment_params']['model_seeds']
#
#                 data_df = storage.fetch_all_eval_curves(agent_name, hyp_id, fp_in, [obj_fun_name],
#                                                                     [network_generator_name],
#                                                                     model_seeds,
#                                                                     train_individually=False,
#                                                                 )
#                 eval_plot_filename = f'{lc_exp_id}-eval_curves_{agent_name}-{hyp_id}{"_OPT" if hyp_id == optimal_hyps_id else ""}.pdf'
#                 plot_eval_histories(data_df, fp_out.figures_dir / eval_plot_filename, separate_seeds=separate_seeds)

# Hyperparameter optimization data

In [None]:
# non_hyp_cols = {"avg_perf", "network_generator", "objective_function"}
# agent_hyperparam_dfs = {}
#
# for lc_exp_id in lc_exp_ids:
#     fp_in = FilePaths(parent_dir, lc_exp_id, setup_directories=False)
#     storage = EvaluationStorage(fp_in)
#
#     param_spaces, df = storage.get_hyperparameter_optimisation_data(lc_exp_id, train_individually=False, fp_in=fp_in)
#     # print(df)
#
#     for network_generator_name in network_generators:
#         for obj_fun_name in all_objectives:
#             expanded_data = []
#
#             subset = df[(df['agent_name'] == agent_name)]
#             subset.drop(columns=['agent_name'])
#
#             for idx, row in subset.iterrows():
#                 row_copy = dict(row)
#                 hyperparams_id = row['hyperparams_id']
#                 hyperparams = param_spaces[obj_fun_name][agent_name][hyperparams_id]
#                 row_copy.update(hyperparams)
#                 expanded_data.append(row_copy)
#
#             hyp_df = pd.DataFrame(expanded_data).drop(columns=['hyperparams_id'])
#             agent_hyperparam_dfs[agent_name] = hyp_df
#
#
#     hyperparams_df = agent_hyperparam_dfs[agent_name]
#     hyperparams_df.replace({False: 0, True: 1}, inplace=True)
#
#     for network_generator_name in network_generators:
#         for obj_fun_name in all_objectives:
#
#             hyperparam_cols = list(set(hyperparams_df.columns) - non_hyp_cols)
#             for hyperparam_name in hyperparam_cols:
#                 if len(set(hyperparams_df[hyperparam_name])) < 2:
#                     continue
#
#                 plt.figure()
#                 title = f"{agent_name}-{network_generator_name}-{obj_fun_name}-{hyperparam_name}-all"
#                 filename = f"{lc_exp_id}-hyperparams-{title}.pdf"
#                 plt.title(title)
#                 sns.lineplot(data=hyperparams_df, x=hyperparam_name, y="avg_perf")
#                 plt.savefig(fp_out.figures_dir / filename, bbox_inches='tight')
#                 plt.close()