In [1]:
DATASET_NAME = "USpowerGrid"

EDGE_WEIGHT = 30

NUM_OF_ITERATIONS = 100
NUM_OF_PIVOTS = 50
EPS = 0.1

N_SEED = 10


In [2]:
import pandas as pd
from tqdm import tqdm


In [3]:
from uuid import uuid4


def generate_data_id():
    return str(uuid4())


In [4]:
from random import randint


def generate_seed():
    return randint(0, 2**32)


In [5]:
def generate_data_object(data_id, pos, quality_metrics, params):
    return {"id": data_id, "pos": pos, **quality_metrics, **params}


In [6]:
from config import paths

EXPERIMENT_DATA_DIR = (
    paths.get_project_root_path()
    .joinpath("data")
    .joinpath("experiments")
    .joinpath("regression_analysis")
)
EXPERIMENT_DATA_DIR.mkdir(exist_ok=True, parents=True)

NAME_ABBREVIATIONS = {
    "angular_resolution": "ANR",
    "aspect_ratio": "AR",
    "crossing_angle": "CA",
    "crossing_number": "CN",
    "gabriel_graph_property": "GB",
    "ideal_edge_lengths": "IE",
    "node_resolution": "NR",
    "run_time": "RT",
    "neighborhood_preservation": "NP",
    "stress": "ST",
}


In [7]:
from utils.graph import load_nx_graph

dataset_path = paths.get_dataset_path(dataset_name=DATASET_NAME)
nx_graph = load_nx_graph(dataset_name=DATASET_NAME, edge_weight=EDGE_WEIGHT)


In [8]:
from generators.graph import egraph_graph
from egraph import warshall_floyd

eg_graph, eg_indices = egraph_graph(nx_graph=nx_graph)
eg_distance_matrix = warshall_floyd(eg_graph, lambda _: EDGE_WEIGHT)


In [9]:
from itertools import product
from config.parameters import domain_ss

params_steps = {
    "number_of_pivots": 5,
    "number_of_iterations": 10,
    "eps": 0.05,
}

empirical_params = {
    "number_of_pivots": 50,
    "number_of_iterations": 100,
    "eps": 0.1,
}

params_candidates = {}
params_names = ["number_of_pivots", "number_of_iterations", "eps"]
for params_name in params_names:
    lower = domain_ss[params_name]["l"]
    upper = domain_ss[params_name]["u"]

    params_candidates[params_name] = [
        v * params_steps[params_name] for v in list(range(1, 20 + 1))
    ]



In [10]:
from egraph import Coordinates, Rng, SparseSgd, crossing_edges
from utils.quality_metrics import measure_qualities
from config.quality_metrics import ALL_QM_NAMES

for params_name in params_names:
    data = []
    for params_candidate in tqdm(params_candidates[params_name]):
        params = {**empirical_params, params_name: params_candidate}
        mean_quality_metrics = {}
        for qm_name in ALL_QM_NAMES:
            mean_quality_metrics[qm_name] = []

        for _ in range(N_SEED):
            eg_drawing = Coordinates.initial_placement(eg_graph)
            seed = generate_seed()
            rng = Rng.seed_from(seed)
            sparse_sgd = SparseSgd(
                eg_graph,
                lambda _: EDGE_WEIGHT,
                params["number_of_pivots"],
                rng,
            )
            scheduler = sparse_sgd.scheduler(
                params["number_of_iterations"], params["eps"]
            )

            def step(eta):
                sparse_sgd.shuffle(rng)
                sparse_sgd.apply(eg_drawing, eta)

            scheduler.run(step)

            eg_crossings = crossing_edges(eg_graph, eg_drawing)
            quality_metrics = measure_qualities(
                target_qm_names=ALL_QM_NAMES,
                eg_graph=eg_graph,
                eg_drawing=eg_drawing,
                eg_crossings=eg_crossings,
                eg_distance_matrix=eg_distance_matrix,
            )
            quality_metrics["aspect_ratio"] *= -1
            quality_metrics["neighborhood_preservation"] *= -1
            for qm_name in ALL_QM_NAMES:
                mean_quality_metrics[qm_name].append(quality_metrics[qm_name])

            pos = {
                u: (eg_drawing.x(i), eg_drawing.y(i))
                for u, i in eg_indices.items()
            }
            data_id = generate_data_id()
            data_object = {
                "data_id": data_id,
                "pos": pos,
                **quality_metrics,
                **params,
            }
            data.append(data_object)
    data_export_path = EXPERIMENT_DATA_DIR.joinpath("params").joinpath(
        f"{DATASET_NAME}-{params_name}.pkl"
    )
    data_export_path.parent.mkdir(parents=True, exist_ok=True)

    data_df = pd.DataFrame(data)
    data_df.to_pickle(data_export_path)


100%|██████████| 20/20 [04:12<00:00, 12.64s/it]
100%|██████████| 20/20 [03:59<00:00, 11.97s/it]
100%|██████████| 20/20 [03:53<00:00, 11.69s/it]


In [11]:
list(data_df['number_of_pivots'])

[50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50,
 50]