In [1]:
DATASET_NAMES = sorted(["les_miserables", "1138_bus", "USpowerGrid"])
PARAMS_NAMES = sorted(["number_of_pivots", "number_of_iterations", "eps"])
COLOR_MAP = {
    DATASET_NAMES[0]: "red",
    DATASET_NAMES[1]: "green",
    DATASET_NAMES[2]: "blue",
}
print(DATASET_NAMES, PARAMS_NAMES)

['1138_bus', 'USpowerGrid', 'les_miserables'] ['eps', 'number_of_iterations', 'number_of_pivots']


In [2]:
# Third Party Library
import pandas as pd

# First Party Library
from config.paths import get_project_root_path


def generate_data_df_dict(dataset_names):
    EXPERIENT_DATA_DIR = (
        get_project_root_path()
        .joinpath("data")
        .joinpath("experiments")
        .joinpath("regression_analysis")
    )

    data_df_dict = {}
    for dataset_name in dataset_names:
        data_path = EXPERIENT_DATA_DIR.joinpath("grid").joinpath(
            f"{dataset_name}-without-pos.pkl"
        )
        data_df_dict[dataset_name] = pd.read_pickle(data_path)

    return data_df_dict


def generate_params_candidates():
    params_steps = {
        "number_of_pivots": 5,
        "number_of_iterations": 10,
        "eps": 0.05,
    }

    params_candidates = {}
    params_names = ["number_of_pivots", "number_of_iterations", "eps"]
    for params_name in params_names:
        params_candidates[params_name] = [
            v * params_steps[params_name] for v in list(range(1, 20 + 1))
        ]

    return params_candidates


data_df_dict = generate_data_df_dict(DATASET_NAMES)
params_candidates = generate_params_candidates()

In [3]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt
from itertools import product
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import numpy as np
from config.paths import get_project_root_path
from config.quality_metrics import ALL_QM_NAMES
import pickle

# sscaler = MinMaxScaler()
sscaler = StandardScaler()

deg = 5

# sscaler_map = {}
# for params_name in PARAMS_NAMES:
#     # params_name以外のパラメータの組み合わせ作成
#     params_t = list(filter(lambda x: x != params_name, PARAMS_NAMES))
#     comb = list(
#         product(params_candidates[params_t[0]], params_candidates[params_t[1]])
#     )
#     for qm_name in ALL_QM_NAMES:
#         fig, axis = plt.subplots(1, 1, figsize=(8, 8))
#         fig.subplots_adjust(left=0.04, right=0.98, bottom=0.05, top=0.95)
#         legends = {}
#         for dataset_name in DATASET_NAMES:

data_y_dict = {}
for dataset_name in DATASET_NAMES:
    data_y_dict[dataset_name] = {}
    for qm_name in ALL_QM_NAMES:
        data_y_dict[dataset_name][qm_name] = np.array(
            data_df_dict[dataset_name][qm_name]
        ).reshape(-1, 1)

for params_name in PARAMS_NAMES:
    # params_name以外のパラメータの組み合わせ作成
    params_t = list(filter(lambda x: x != params_name, PARAMS_NAMES))
    comb = list(
        product(params_candidates[params_t[0]], params_candidates[params_t[1]])
    )
    for qm_name in ALL_QM_NAMES:
        for dataset_name in DATASET_NAMES:
            for c in comb:
                df = data_df_dict[dataset_name]
                df = df.query(
                    " & ".join(
                        [
                            f"{item[1]} == {item[0]}"
                            for item in zip(c, params_t)
                        ]
                    )
                )
                x = df[params_name]
                y = df[qm_name]
                x = np.array(x).reshape(-1, 1)
                y = np.array(y).reshape(-1, 1)

                sscaler.fit(data_y_dict[dataset_name][qm_name])
                yss = sscaler.transform(y)

                regr = Pipeline(
                    [
                        ("poly", PolynomialFeatures(degree=deg)),
                        ("linear", LinearRegression()),
                    ]
                )

                regr.fit(x, yss)

                # axis.set_title(
                #     f'{qm_name}-{"R^2={:.3f}".format(regr.score(x, y))}'
                # )
                params_info = "-".join(
                    [f"{item[1]}={item[0]}" for item in zip(c, params_t)]
                )
                export_path = (
                    get_project_root_path()
                    .joinpath("data")
                    .joinpath("experiments")
                    .joinpath("regression_analysis")
                    .joinpath("models")
                    .joinpath("deg=5-ss")
                    .joinpath(
                        f"{dataset_name}-{qm_name}-{params_name}-{params_info}.pickle"
                    )
                )
                export_path.parent.mkdir(parents=True, exist_ok=True)
                with open(export_path, mode="wb") as f:
                    pickle.dump(regr, f, protocol=2)