In [1]:
import optuna
import json
import pprint

# Third Party Library
import pandas as pd
from ex_utils.config.paths import get_dataset_path
from ex_utils.config.quality_metrics import qm_names
from ex_utils.share import (
    ex_path,
    generate_seed_median_df,
    generate_sscalers,
)
from ex_utils.utils.graph import (
    load_nx_graph,
    nx_graph_preprocessing,
)

# Standard Library
import argparse
from time import perf_counter

# Third Party Library
import matplotlib.pyplot as plt
import networkx as nx
import optuna
import pandas as pd
from egraph import Drawing, all_sources_bfs
from ex_utils.config.dataset import dataset_names
from ex_utils.config.paths import get_dataset_path
from ex_utils.config.quality_metrics import qm_names
from ex_utils.share import draw, ex_path, generate_base_df_data, rate2pivots
from ex_utils.utils.graph import (
    egraph_graph,
    load_nx_graph,
    nx_graph_preprocessing,
)
from tqdm import tqdm
import math

n_bins = 50
EDGE_WEIGHT = 30
threshold = 0.025

d_names = sorted(["1138_bus", "USpowerGrid", "dwt_1005", "poli", "qh882"])

seeds = list(range(15))

n_split = 10

base_prefs = [
    # [1.0, 1.0, 1.0, 5.0, 1.0, 0.5, 4.0, 0.5, 4.0, 0.5],
    [0.5, 0.5, 0.5, 5.0, 2.0, 0.5, 4.0, 0.5, 4.0, 0.5]
]
prefs = [0] * len(base_prefs)
for i, pref in enumerate(base_prefs):
    s = sum(pref)
    prefs[i] = [p / s for p in pref]

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import random

seed = random.randint(1, 10000)

In [3]:
dict(zip(qm_names, prefs[0]))

{'angular_resolution': 0.027777777777777776,
 'aspect_ratio': 0.027777777777777776,
 'crossing_angle': 0.027777777777777776,
 'crossing_number': 0.2777777777777778,
 'gabriel_graph_property': 0.1111111111111111,
 'ideal_edge_length': 0.027777777777777776,
 'neighborhood_preservation': 0.2222222222222222,
 'node_resolution': 0.027777777777777776,
 'stress': 0.2222222222222222,
 'time_complexity': 0.027777777777777776}

In [4]:
eg_distance_matrices = {}

In [5]:
for d_name in d_names:
    dataset_path = get_dataset_path(d_name)
    nx_graph = nx_graph_preprocessing(
        load_nx_graph(dataset_path=dataset_path), EDGE_WEIGHT
    )
    p_max = max(1, int(len(nx_graph.nodes) * 0.25))
    eg_graph, eg_indices = egraph_graph(nx_graph=nx_graph)
    eg_distance_matrices[d_name] = all_sources_bfs(eg_graph, EDGE_WEIGHT)

In [6]:
eg_distance_matrices

{'1138_bus': <DistanceMatrix at 0x127981fb0>,
 'USpowerGrid': <DistanceMatrix at 0x1272b1d70>,
 'dwt_1005': <DistanceMatrix at 0x127923b10>,
 'poli': <DistanceMatrix at 0x1266e5ad0>,
 'qh882': <DistanceMatrix at 0x127013f90>}

In [7]:
poss = {}

In [18]:
multi_n_trials = 300


fig, axes = plt.subplots(
    figsize=(8 * (len(d_names)), 8),
    dpi=300,
    facecolor="white",
    nrows=len(prefs),
    ncols=len(d_names),
    squeeze=False,
)
ps = []
mm = []

for d_name, ax in zip(d_names, axes.flatten()):
    df_paths = [
        ex_path.joinpath(
            f"data/grid/{d_name}/n_split={n_split}/seed={seed}.pkl"
        )
        for seed in seeds
    ]
    df = pd.concat([pd.read_pickle(path) for path in df_paths])
    df = generate_seed_median_df(df)

    scalers = generate_sscalers(df)

    dataset_path = get_dataset_path(d_name)
    nx_graph = nx_graph_preprocessing(
        load_nx_graph(dataset_path=dataset_path), EDGE_WEIGHT
    )
    p_max = max(1, int(len(nx_graph.nodes) * 0.25))
    eg_graph, eg_indices = egraph_graph(nx_graph=nx_graph)
    eg_distance_matrix = eg_distance_matrices[d_name]

    multi_obj_study_name = f"multi-obj-{multi_n_trials}"
    multi_db_uri = f'sqlite:///{ex_path.joinpath(f"data/optimization/{d_name}-multi-obj.db")}'
    multi_obj_study = optuna.load_study(
        storage=multi_db_uri, study_name=multi_obj_study_name
    )
    pareto_front = list(multi_obj_study.best_trials)

    for p in prefs:
        pref = dict(zip(qm_names, p))
        params = {}

        multi = -float("inf")
        for pareto_optimal in pareto_front:
            pareto_optimal_quality_metrics = pareto_optimal.user_attrs[
                "median_quality_metrics"
            ]
            pareto_optimal_scaled_quality_metrics = dict(
                [
                    (
                        qm_name,
                        scalers[qm_name].transform(
                            [[pareto_optimal_quality_metrics[qm_name]]]
                        )[0][0],
                    )
                    for qm_name in qm_names
                ]
            )
            pareto_optimal_scaled_quality_metrics_sum = sum(
                [
                    pareto_optimal_scaled_quality_metrics[qm_name]
                    * pref[qm_name]
                    for qm_name in qm_names
                ]
            )

            if multi < pareto_optimal_scaled_quality_metrics_sum:
                multi = pareto_optimal_scaled_quality_metrics_sum
                params["multi"] = pareto_optimal.params

        eg_drawing = Drawing.initial_placement(eg_graph)
        mm.append(multi)

        # if d_name not in poss:
        #     pos = draw(
        #         pivots=params["multi"]["pivots"],
        #         iterations=params["multi"]["iterations"],
        #         eps=params["multi"]["eps"],
        #         eg_graph=eg_graph,
        #         eg_indices=eg_indices,
        #         eg_drawing=eg_drawing,
        #         edge_weight=EDGE_WEIGHT,
        #         seed=seed,
        #     )
        #     poss[d_name] = pos
        # else:
        #     pos = poss[d_name]

        # ax.set_title(f"{d_name}", fontsize=20, y=1)
        # ax.set_aspect("equal")

        # ax.set_title(
        #     f"""{d_name} pivots={params["multi"]["pivots"]},iter={params["multi"]["iterations"]},eps={round(params["multi"]["eps"], 4)}""",
        #     fontsize=5,
        # )

        # ps.append(params["multi"])

        # nx.draw(
        #     nx_graph,
        #     pos=pos,
        #     node_size=3,
        #     # node_color="#AB47BC",
        #     width=1,
        #     edge_color="#CFD8DC",
        #     ax=ax,
        # )
    plt.close()
# pprint.pprint(ps)
# # plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)
# plt.tight_layout()
# plt.show()
pprint.pprint(mm)

[0.6812593404894949,
 0.6409512301679431,
 0.46911960565540106,
 0.578505875565283,
 0.6787284313882489]


In [19]:
ps = []
ss = []
for d_name in d_names:
    fig, axes = plt.subplots(
        figsize=(20, 20),
        dpi=300,
        facecolor="white",
        # nrows=2,
        # ncols=3,
        nrows=1,
        ncols=1,
        squeeze=False,
    )
    ax = axes[0][0]
    df_paths = [
        ex_path.joinpath(
            f"data/grid/{d_name}/n_split={n_split}/seed={seed}.pkl"
        )
        for seed in seeds
    ]
    df = pd.concat([pd.read_pickle(path) for path in df_paths])
    df = generate_seed_median_df(df)

    scalers = generate_sscalers(df)

    dataset_path = get_dataset_path(d_name)
    nx_graph = nx_graph_preprocessing(
        load_nx_graph(dataset_path=dataset_path), EDGE_WEIGHT
    )
    p_max = max(1, int(len(nx_graph.nodes) * 0.25))
    eg_graph, eg_indices = egraph_graph(nx_graph=nx_graph)
    eg_distance_matrix = eg_distance_matrices[d_name]

    for p in base_prefs:
        single_obj_study_name = f'single-obj_{",".join(map(str, p))}'
        single_db_uri = f'sqlite:///{ex_path.joinpath(f"data/optimization/{d_name}-user-experiment.db")}'
        sinigle_obj_study = optuna.load_study(
            storage=single_db_uri, study_name=single_obj_study_name
        )

        params = sinigle_obj_study.best_trial.params

        eg_drawing = Drawing.initial_placement(eg_graph)

        ss.append(sinigle_obj_study.best_value)

        # pos = draw(
        #     pivots=params["pivots"],
        #     iterations=params["iterations"],
        #     eps=params["eps"],
        #     eg_graph=eg_graph,
        #     eg_indices=eg_indices,
        #     eg_drawing=eg_drawing,
        #     edge_weight=EDGE_WEIGHT,
        #     seed=seed,
        # )
        # ax.set_aspect("equal")

        # ax.set_title(
        #     f"""{d_name.replace('_','-')}""",
        #     fontsize=20,
        # )

        # nx.draw(
        #     nx_graph,
        #     pos=pos,
        #     node_size=5,
        #     # node_color="#AB47BC",
        #     width=2,
        #     edge_color="#CFD8DC",
        #     ax=ax,
        #     margins=0,
        # )
        ps.append(params)
    # pprint.pprint(ps)
    # plt.tight_layout(pad=-1)
    # plt.savefig(
    #     ex_path.parent.parent.parent.joinpath(
    #         f"thesis/thesis-fuga/figs/{d_name.replace('_','-')}.png"
    #     )
    # )
    plt.close()
pprint.pprint(ss)

[0.6817167536338749,
 0.6918972882901694,
 0.4819022534992888,
 0.6205897285889468,
 0.7137686759414597]


In [22]:
for m, s in zip(mm, ss):
    print(100 * abs(m - s) / s)

0.06709724265125365
7.363240033520761
2.652539545326402
6.781268056007162
4.909187771093027


In [10]:
# for d_name in d_names:
#     fig, axes = plt.subplots(
#         figsize=(20, 20),
#         dpi=300,
#         facecolor="white",
#         # nrows=2,
#         # ncols=3,
#         nrows=1,
#         ncols=1,
#         squeeze=False,
#     )
#     ax = axes[0][0]
#     ps = []
#     df_paths = [
#         ex_path.joinpath(
#             f"data/grid/{d_name}/n_split={n_split}/seed={seed}.pkl"
#         )
#         for seed in seeds
#     ]
#     df = pd.concat([pd.read_pickle(path) for path in df_paths])
#     df = generate_seed_median_df(df)

#     scalers = generate_sscalers(df)

#     dataset_path = get_dataset_path(d_name)
#     nx_graph = nx_graph_preprocessing(
#         load_nx_graph(dataset_path=dataset_path), EDGE_WEIGHT
#     )
#     p_max = max(1, int(len(nx_graph.nodes) * 0.25))
#     eg_graph, eg_indices = egraph_graph(nx_graph=nx_graph)
#     eg_distance_matrix = eg_distance_matrices[d_name]

#     for p in base_prefs:
#         single_obj_study_name = f'single-obj_{",".join(map(str, p))}'
#         single_db_uri = f'sqlite:///{ex_path.joinpath(f"data/optimization/{d_name}-user-experiment.db")}'
#         sinigle_obj_study = optuna.load_study(
#             storage=single_db_uri, study_name=single_obj_study_name
#         )

#         params = sinigle_obj_study.best_trial.params

#         eg_drawing = Drawing.initial_placement(eg_graph)

#         pos = draw(
#             pivots=1,
#             iterations=100,
#             eps=0.1,
#             eg_graph=eg_graph,
#             eg_indices=eg_indices,
#             eg_drawing=eg_drawing,
#             edge_weight=EDGE_WEIGHT,
#             seed=seed,
#         )
#         ax.set_aspect("equal")

#         # ax.set_title(
#         #     f"""{d_name.replace('_','-')}""",
#         #     fontsize=20,
#         # )

#         nx.draw(
#             nx_graph,
#             pos=pos,
#             node_size=5,
#             # node_color="#AB47BC",
#             width=2,
#             edge_color="#CFD8DC",
#             ax=ax,
#             margins=0,
#         )
#         ps.append(params)
#     pprint.pprint(ps)
#     plt.tight_layout(pad=-1)
#     plt.savefig(
#         ex_path.parent.parent.parent.joinpath(
#             f"thesis/thesis-fuga/figs/{d_name.replace('_','-')}_p=1.png"
#         )
#     )

In [11]:
# for d_name in d_names:
#     fig, axes = plt.subplots(
#         figsize=(20, 20),
#         dpi=300,
#         facecolor="white",
#         # nrows=2,
#         # ncols=3,
#         nrows=1,
#         ncols=1,
#         squeeze=False,
#     )
#     ax = axes[0][0]
#     ps = []
#     df_paths = [
#         ex_path.joinpath(
#             f"data/grid/{d_name}/n_split={n_split}/seed={seed}.pkl"
#         )
#         for seed in seeds
#     ]
#     df = pd.concat([pd.read_pickle(path) for path in df_paths])
#     df = generate_seed_median_df(df)

#     scalers = generate_sscalers(df)

#     dataset_path = get_dataset_path(d_name)
#     nx_graph = nx_graph_preprocessing(
#         load_nx_graph(dataset_path=dataset_path), EDGE_WEIGHT
#     )
#     p_max = max(1, int(len(nx_graph.nodes) * 0.25))
#     eg_graph, eg_indices = egraph_graph(nx_graph=nx_graph)
#     eg_distance_matrix = eg_distance_matrices[d_name]

#     for p in base_prefs:
#         single_obj_study_name = f'single-obj_{",".join(map(str, p))}'
#         single_db_uri = f'sqlite:///{ex_path.joinpath(f"data/optimization/{d_name}-user-experiment.db")}'
#         sinigle_obj_study = optuna.load_study(
#             storage=single_db_uri, study_name=single_obj_study_name
#         )

#         params = sinigle_obj_study.best_trial.params

#         eg_drawing = Drawing.initial_placement(eg_graph)

#         pos = draw(
#             pivots=p_max // 2,
#             iterations=1,
#             eps=0.1,
#             eg_graph=eg_graph,
#             eg_indices=eg_indices,
#             eg_drawing=eg_drawing,
#             edge_weight=EDGE_WEIGHT,
#             seed=seed,
#         )
#         ax.set_aspect("equal")

#         # ax.set_title(
#         #     f"""{d_name.replace('_','-')}""",
#         #     fontsize=20,
#         # )

#         nx.draw(
#             nx_graph,
#             pos=pos,
#             node_size=5,
#             # node_color="#AB47BC",
#             width=2,
#             edge_color="#CFD8DC",
#             ax=ax,
#             margins=0,
#         )
#         ps.append(params)
#     pprint.pprint(ps)
#     plt.tight_layout(pad=-1)
#     plt.savefig(
#         ex_path.parent.parent.parent.joinpath(
#             f"thesis/thesis-fuga/figs/{d_name.replace('_','-')}_i=1.png"
#         )
#     )

In [12]:
# d_names = sorted(
#     [
#         "dwt_1005",
#         "poli",
#     ]
# )
# fig, axes = plt.subplots(
#     figsize=(20 * len(d_names), 20),
#     dpi=300,
#     facecolor="white",
#     # nrows=2,
#     # ncols=3,
#     nrows=2,
#     ncols=1,
#     squeeze=False,
# )
# ps = []


# for d_name, ax in zip(d_names, axes.flatten()):
#     df_paths = [
#         ex_path.joinpath(
#             f"data/grid/{d_name}/n_split={n_split}/seed={seed}.pkl"
#         )
#         for seed in seeds
#     ]
#     df = pd.concat([pd.read_pickle(path) for path in df_paths])
#     df = generate_seed_median_df(df)

#     scalers = generate_sscalers(df)

#     dataset_path = get_dataset_path(d_name)
#     nx_graph = nx_graph_preprocessing(
#         load_nx_graph(dataset_path=dataset_path), EDGE_WEIGHT
#     )
#     p_max = max(1, int(len(nx_graph.nodes) * 0.25))
#     eg_graph, eg_indices = egraph_graph(nx_graph=nx_graph)
#     eg_distance_matrix = eg_distance_matrices[d_name]

#     for p in base_prefs:
#         single_obj_study_name = f'single-obj_{",".join(map(str, p))}'
#         single_db_uri = f'sqlite:///{ex_path.joinpath(f"data/optimization/{d_name}-user-experiment.db")}'
#         sinigle_obj_study = optuna.load_study(
#             storage=single_db_uri, study_name=single_obj_study_name
#         )

#         params = sinigle_obj_study.best_trial.params

#         eg_drawing = Drawing.initial_placement(eg_graph)

#         pos = draw(
#             pivots=params["pivots"],
#             iterations=params["iterations"],
#             eps=params["eps"],
#             eg_graph=eg_graph,
#             eg_indices=eg_indices,
#             eg_drawing=eg_drawing,
#             edge_weight=EDGE_WEIGHT,
#             seed=seed,
#         )
#         ax.set_aspect("equal")

#         ax.set_title(
#             f"""{d_name}""",
#             fontsize=20,
#         )

#         nx.draw(
#             nx_graph,
#             pos=pos,
#             node_size=5,
#             # node_color="#AB47BC",
#             width=2,
#             edge_color="#CFD8DC",
#             ax=ax,
#             margins=0,
#         )
#         ps.append(params)
# plt.tight_layout(pad=-1)
# pprint.pprint(ps)
# plt.show()