In [None]:
EDGE_WEIGHT = 30

In [None]:
# Standard Library
import argparse
from statistics import median
import json

# Third Party Library
import optuna
import pandas as pd
from egraph import Drawing, all_sources_bfs
from ex_utils.config.dataset import dataset_names
from ex_utils.config.paths import get_dataset_path
from ex_utils.config.quality_metrics import qm_names
from ex_utils.share import (
    draw_and_measure,
    ex_path,
    generate_seed_median_df,
    generate_sscalers,
    pivots2rate,
)
from ex_utils.utils.graph import (
    egraph_graph,
    load_nx_graph,
    nx_graph_preprocessing,
)

In [None]:
with open(ex_path.joinpath("data/random.json")) as f:
    random_data = json.load(f)

In [None]:
seeds = list(range(15))
n_split = 10
n_samples = [50, 100, 500]
d_name = "USpowerGrid"

for random_pref in random_data:
    pref = {}
    pref_sum = sum(random_pref)
    for qm_name, p in zip(qm_names, random_pref):
        pref[qm_name] = p / pref_sum

    print(pref)

    baseline_df_paths = [
        ex_path.joinpath(
            f"data/grid/{d_name}/n_split={n_split}/seed={data_seed}.pkl"
        )
        for data_seed in seeds
    ]
    baseline_df = generate_seed_median_df(
        pd.concat([pd.read_pickle(df_path) for df_path in baseline_df_paths])
    )
    baseline_sscalers = generate_sscalers(baseline_df)

    baseline_df[f"sscaled_{qm_name}"] = baseline_sscalers[qm_name].transform(
        baseline_df[f"values_{qm_name}"].values.reshape(-1, 1)
    )

    baseline_df["weighted_sscaled_sum"] = sum(
        [
            target_df[f"sscaled_{qm_name}"] * pref[qm_name]
            for qm_name in qm_names
        ]
    )
    baseline_max = baseline_df.loc[
        baseline_df["weighted_sscaled_sum"].idxmax()
    ]

    result = {}

    for n_sample in n_samples:
        result["all"] = 0
        result[n_sample] = 0
        points_dir = ex_path.joinpath(
            f"data/sampled_points/{d_name}/n_split={n_split}/n_sample={n_sample}/"
        )
        for path in points_dir.iterdir():
            result["all"] += 1
            target_df = pd.read_pickle(path)
            sscalers = generate_sscalers(target_df)

            for qm_name in qm_names:
                target_df[f"sscaled_{qm_name}"] = sscalers[qm_name].transform(
                    target_df[f"values_{qm_name}"].values.reshape(-1, 1)
                )

            target_df["weighted_sscaled_sum"] = sum(
                [
                    target_df[f"sscaled_{qm_name}"] * pref[qm_name]
                    for qm_name in qm_names
                ]
            )
            max_row = target_df.loc[target_df["weighted_sscaled_sum"].idxmax()]

            scaled_qm = dict(
                [
                    (
                        qm_name,
                        baseline_sscalers[qm_name].transform(
                            [[max_row[f"values_{qm_name}"]]]
                        )[0][0],
                    )
                    for qm_name in qm_names
                ]
            )
            weighted_sacled_qm_sum = sum(
                [scaled_qm[qm_name] * pref[qm_name] for qm_name in qm_names]
            )

            if baseline_max["weighted_sscaled_sum"] < weighted_sacled_qm_sum:
                result[n_sample] += 1

In [None]:
target_df

In [None]:
pd.read_pickle(
    "/Users/fuga_takata/dev/vdslab-project/hyperparameter-in-graph-drawing/experiments/japan-vis/data/sampled_points/1138_bus/n_split=10/n_sample=50/0aeab76d-55a8-48a3-9978-f0b1017f6c0c.pkl"
)