In [1]:
EDGE_WEIGHT = 30

In [2]:
# Standard Library
import argparse
from statistics import median
import json

# Third Party Library
import optuna
import pandas as pd
from egraph import Drawing, all_sources_bfs
from ex_utils.config.dataset import dataset_names
from ex_utils.config.paths import get_dataset_path
from ex_utils.config.quality_metrics import qm_names
from ex_utils.share import (
    draw_and_measure,
    ex_path,
    generate_seed_median_df,
    generate_sscalers,
    pivots2rate,
)
from ex_utils.utils.graph import (
    egraph_graph,
    load_nx_graph,
    nx_graph_preprocessing,
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
with open(ex_path.joinpath("data/random.json")) as f:
    random_data = json.load(f)

In [4]:
seeds = list(range(15))
n_split = 10
n_samples = [50, 100, 500]
d_name = "USpowerGrid"

results = []

d_names = [
    "1138_bus",
    "USpowerGrid",
    "dwt_1005",
    "poli",
    "qh882",
]
for d_name in d_names:
    for random_pref in random_data:
        pref = {}
        pref_sum = sum(random_pref)
        for qm_name, p in zip(qm_names, random_pref):
            pref[qm_name] = p / pref_sum

        print(pref)

        baseline_df_paths = [
            ex_path.joinpath(
                f"data/grid/{d_name}/n_split={n_split}/seed={data_seed}.pkl"
            )
            for data_seed in seeds
        ]
        baseline_df = generate_seed_median_df(
            pd.concat([pd.read_pickle(df_path) for df_path in baseline_df_paths])
        )
        baseline_sscalers = generate_sscalers(baseline_df)

        for qm_name in qm_names:
            baseline_df[f"sscaled_{qm_name}"] = baseline_sscalers[
                qm_name
        ].transform(baseline_df[f"values_{qm_name}"].values.reshape(-1, 1))

        baseline_df["weighted_sscaled_sum"] = sum(
            [
                baseline_df[f"sscaled_{qm_name}"] * pref[qm_name]
                for qm_name in qm_names
            ]
        )
        baseline_max = baseline_df.loc[
            baseline_df["weighted_sscaled_sum"].idxmax()
        ]

        result = {}
        result['dataset'] = d_name
        result["baseline"] = baseline_max["weighted_sscaled_sum"]

        for n_sample in n_samples:
            result[n_sample] = 0
            points_dir = ex_path.joinpath(
                f"data/sampled_points/{d_name}/n_split={n_split}/n_sample={n_sample}/"
            )
            for path in points_dir.iterdir():
                target_df = pd.read_pickle(path)
                sscalers = generate_sscalers(target_df)

                for qm_name in qm_names:
                    target_df[f"sscaled_{qm_name}"] = sscalers[qm_name].transform(
                        target_df[f"values_{qm_name}"].values.reshape(-1, 1)
                    )

                target_df["weighted_sscaled_sum"] = sum(
                    [
                        target_df[f"sscaled_{qm_name}"] * pref[qm_name]
                        for qm_name in qm_names
                    ]
                )
                max_row = target_df.loc[target_df["weighted_sscaled_sum"].idxmax()]

                scaled_qm = dict(
                    [
                        (
                            qm_name,
                            baseline_sscalers[qm_name].transform(
                                [[max_row[f"values_{qm_name}"]]]
                            )[0][0],
                        )
                        for qm_name in qm_names
                    ]
                )
                weighted_sacled_qm_sum = sum(
                    [scaled_qm[qm_name] * pref[qm_name] for qm_name in qm_names]
                )

                result[n_sample] += weighted_sacled_qm_sum
                if baseline_max["weighted_sscaled_sum"] < weighted_sacled_qm_sum:
                    result[n_sample] += 1
            result[n_sample] /= len(list(points_dir.iterdir()))
        results.append(result)

result_df = pd.DataFrame(results)

{'angular_resolution': 0.07005193856395217, 'aspect_ratio': 0.048683671444146585, 'crossing_angle': 0.02259706452600273, 'crossing_number': 0.0778221706908293, 'gabriel_graph_property': 0.043494554380670986, 'ideal_edge_length': 0.1296386455302672, 'neighborhood_preservation': 0.0428713607961284, 'node_resolution': 0.24723874592393258, 'stress': 0.09198066416663564, 'time_complexity': 0.2256211839774344}
{'angular_resolution': 0.210120511010825, 'aspect_ratio': 0.0318498334264708, 'crossing_angle': 0.07500762307765559, 'crossing_number': 0.1178732812942978, 'gabriel_graph_property': 0.007920478812051391, 'ideal_edge_length': 0.1450618816549619, 'neighborhood_preservation': 0.1767546141989604, 'node_resolution': 0.054046556013498614, 'stress': 0.1508674723033216, 'time_complexity': 0.030497748207956872}
{'angular_resolution': 0.18334969631524767, 'aspect_ratio': 0.07113049004723765, 'crossing_angle': 0.04144779948756015, 'crossing_number': 0.12259203208280932, 'gabriel_graph_property': 

In [18]:
result_df

Unnamed: 0,dataset,baseline,50,100,500
0,1138_bus,0.662441,0.607539,0.63845,0.658164
1,1138_bus,0.729241,0.693519,0.703918,0.724256
2,1138_bus,0.507308,0.446898,0.472175,0.504712
3,1138_bus,0.637631,0.514937,0.556889,0.621034
4,1138_bus,0.470196,0.438034,0.452905,0.467827
5,1138_bus,0.371121,0.310958,0.325562,0.359851
6,1138_bus,0.533065,0.477913,0.496181,0.529735
7,1138_bus,0.506925,0.443675,0.476939,0.501361
8,1138_bus,0.477104,0.373305,0.414178,0.458514
9,1138_bus,0.683671,0.62429,0.657451,0.680453


In [19]:
d_names = [
    "1138_bus",
    "USpowerGrid",
    "dwt_1005",
    "poli",
    "qh882",
]
n_samples = [50, 100, 500]
for d_name in d_names:
    for n_sample in n_samples:
        df = result_df.query(f'dataset == "{d_name}"')
        df["d"] = df[n_sample] / df["baseline"]
        print(d_name, n_sample, df['d'].mean())

1138_bus 50 0.8795923051699536
1138_bus 100 0.9268231547830866
1138_bus 500 0.9861371296003183
USpowerGrid 50 0.8541417974300294
USpowerGrid 100 0.9049680430981151
USpowerGrid 500 0.9822296220608124
dwt_1005 50 0.9449128185425227
dwt_1005 100 0.9709004588950486
dwt_1005 500 0.9950767666256367
poli 50 0.7441635753493889
poli 100 0.8316400392311065
poli 500 0.9740582356999902
qh882 50 0.9083178096460081
qh882 100 0.9461462091427967
qh882 500 0.9923390032614761


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["d"] = df[n_sample] / df["baseline"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["d"] = df[n_sample] / df["baseline"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["d"] = df[n_sample] / df["baseline"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row

In [17]:
pd.set_option('display.max_rows', None)

In [16]:
result_df.query('dataset == "1138_bus"').sort_values(by=[100])

Unnamed: 0,dataset,baseline,50,100,500
84,1138_bus,0.327223,0.285026,0.301165,0.324583
53,1138_bus,0.363168,0.305249,0.320847,0.353623
5,1138_bus,0.371121,0.310958,0.325562,0.359851
93,1138_bus,0.354972,0.324895,0.334409,0.351485
16,1138_bus,0.362163,0.320566,0.335318,0.359396
...,...,...,...,...,...
21,1138_bus,0.781105,0.628263,0.686104,0.761069
1,1138_bus,0.729241,0.693519,0.703918,0.724256
60,1138_bus,0.745338,0.721390,0.726693,0.741016
95,1138_bus,0.946868,0.862752,0.902989,0.936731


In [5]:
results

[{'dataset': '1138_bus',
  'baseline': 0.6624414497233113,
  50: 0.6075391497474523,
  100: 0.6384497601925769,
  500: 0.6581638332009188},
 {'dataset': '1138_bus',
  'baseline': 0.7292409744901287,
  50: 0.6935186772219204,
  100: 0.703918034438292,
  500: 0.7242557086564413},
 {'dataset': '1138_bus',
  'baseline': 0.5073078435767051,
  50: 0.4468975268717215,
  100: 0.4721752928728818,
  500: 0.5047123199259197},
 {'dataset': '1138_bus',
  'baseline': 0.6376310725571079,
  50: 0.5149366066274872,
  100: 0.5568886357109096,
  500: 0.6210342971539078},
 {'dataset': '1138_bus',
  'baseline': 0.4701957587553287,
  50: 0.4380343071111678,
  100: 0.4529054717260586,
  500: 0.46782724822571653},
 {'dataset': '1138_bus',
  'baseline': 0.37112119155659407,
  50: 0.31095816632921147,
  100: 0.32556163916126174,
  500: 0.359850674140576},
 {'dataset': '1138_bus',
  'baseline': 0.5330650803402742,
  50: 0.4779134435362152,
  100: 0.4961811581158382,
  500: 0.529735087255754},
 {'dataset': '1138_

In [6]:
target_df

Unnamed: 0,edge_weight,params_eps,params_iterations,params_pivots,values_angular_resolution,values_aspect_ratio,values_crossing_angle,values_crossing_number,values_gabriel_graph_property,values_ideal_edge_length,...,sscaled_aspect_ratio,sscaled_crossing_angle,sscaled_crossing_number,sscaled_gabriel_graph_property,sscaled_ideal_edge_length,sscaled_neighborhood_preservation,sscaled_node_resolution,sscaled_stress,sscaled_time_complexity,weighted_sscaled_sum
883,30,0.046416,177,121,-1687.185547,0.903166,-0.437483,-4051.0,-2.088480e+05,-217.968857,...,0.933742,0.318374,0.410907,0.315693,0.341627,1.289012,-0.695745,0.672303,-1.918614,0.356487
9,30,1.000000,1,1,-2065.652344,0.942934,-0.361538,-64710.0,-2.687071e+08,-16058.925781,...,1.365429,2.499801,-4.318804,-0.799354,-0.612422,-1.671356,2.329745,-2.285289,0.625625,-0.742550
183,30,0.046416,177,2,-1767.371460,0.666000,-0.499384,-6600.0,-9.792001e+05,-524.368225,...,-1.640698,-1.459696,0.212156,0.312494,0.323174,-0.935652,0.744960,-0.529829,0.551798,-0.392885
200,30,0.010000,1,4,-2119.644531,0.887258,-0.362388,-53077.0,-7.649896e+08,-52560.191406,...,0.761059,2.475392,-3.411754,-2.860365,-2.810774,-1.570430,1.993211,-2.850606,0.621488,-1.201672
475,30,0.129155,155,11,-1791.121216,0.839192,-0.466410,-4740.0,-2.786869e+05,-197.820862,...,0.239305,-0.512527,0.357184,0.315403,0.342841,-0.052042,-0.633130,0.540478,0.395820,0.156213
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8,30,0.599484,1,1,-2065.652344,0.942934,-0.361538,-64710.0,-2.687071e+08,-16058.925781,...,1.365429,2.499801,-4.318804,-0.799354,-0.612422,-1.671356,2.329745,-2.285289,0.625625,-0.742550
343,30,0.046416,89,7,-1796.222168,0.775323,-0.469603,-4867.0,-3.159235e+05,-295.963318,...,-0.453991,-0.604243,0.347282,0.315248,0.336930,-0.270034,-0.598850,0.410828,0.532675,0.041561
372,30,0.027826,155,7,-1794.021362,0.761406,-0.457807,-4957.0,-3.187016e+05,-253.378815,...,-0.605063,-0.265419,0.340264,0.315237,0.339495,-0.335524,-0.561546,0.442061,0.469168,0.080472
27,30,0.359381,45,1,-1661.528442,0.642741,-0.420646,-8624.0,-4.761008e+05,-89.617065,...,-1.893171,0.801976,0.054341,0.314583,0.349358,-1.206417,2.065890,-0.998151,0.612358,-0.069088


In [7]:
pd.read_pickle(
    "/Users/fuga_takata/dev/vdslab-project/hyperparameter-in-graph-drawing/experiments/japan-vis/data/sampled_points/1138_bus/n_split=10/n_sample=50/0aeab76d-55a8-48a3-9978-f0b1017f6c0c.pkl"
)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/fuga_takata/dev/vdslab-project/hyperparameter-in-graph-drawing/experiments/japan-vis/data/sampled_points/1138_bus/n_split=10/n_sample=50/0aeab76d-55a8-48a3-9978-f0b1017f6c0c.pkl'