In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

with open("setting.yaml") as :

    SETTING = yaml.load()

PATH = make_path_dict(SETTING)

In [None]:
w = pd.read_csv(PATH["w.tsv"], sep="\t", index_col=0)

w.columns.name = "Factor"

h = pd.read_csv(PATH["h.tsv"], sep="\t", index_col=0)

h.columns.name = SETTING["sample_alias"]

In [None]:
for feature_type, w_or_h, dataframe, apply_axis, drop_axis in (
    ("feature", "w", w, 1, 0),
    ("sample", "h", h, 0, 1),
):

    if SETTING[f"gps_map_caotic_{w_or_h}_element_fraction_to_drop"] is not None:

        print(f"{w_or_h.title()} shape before: {dataframe.shape}")

        element_entropy = dataframe.apply(kraft.compute_entropy, axis=apply_axis)

        dataframe.drop(
            kraft.select_series_indices(
                element_entropy,
                ">",
                fraction=SETTING[f"gps_map_caotic_{w_or_h}_element_fraction_to_drop"],
                title={"text": SETTING[f"{feature_type}_alias"]},
                xaxis={"title": "Rank"},
                yaxis={"title": f"Entropy in {w_or_h.title()}"},
            ),
            axis=drop_axis,
            inplace=True,
        )

        print(f"{w_or_h.title()} shape after: {dataframe.shape}")

In [None]:
gps_map = kraft.GPSMap(
    w=w.T,
    h=h,
    w_n_pull=SETTING["gps_map_w_n_pull"],
    w_pull_power=SETTING["gps_map_w_pull_power"],
    h_n_pull=SETTING["gps_map_h_n_pull"],
    h_pull_power=SETTING["gps_map_h_pull_power"],
    plot=False,
)

In [None]:
for element_type, w_or_h, elements in (
    ("feature", "w", gps_map.w_elements),
    ("sample", "h", gps_map.h_elements),
):

    if elements is None:

        continue

    title = f"{SETTING[f'{element_type}_alias']} on GPS Map"

    gps_map.plot(
        w_or_h,
        element_marker_size=SETTING[f"gps_map_{w_or_h}_element_marker_size"],
        title=title,
        html_file_path=os.path.join(PATH[f"{w_or_h}|gps_map/"], "gps_map.html"),
    )

    if os.path.isfile(PATH[f"{w_or_h}|cluster_x_element.tsv"]):

        gps_map.set_element_label(
            w_or_h,
            pd.read_csv(PATH[f"{w_or_h}|cluster_x_element.tsv"], sep="\t", index_col=0)
            .apply(lambda column: column.idxmax())
            .str[len("Cluster") :]
            .astype(int)[elements],
            bandwidth_factor=SETTING[f"gps_map_{w_or_h}_bandwidth_factor"],
        )

        gps_map.plot(
            w_or_h,
            element_marker_size=SETTING[f"gps_map_{w_or_h}_element_marker_size"],
            title=title,
            html_file_path=os.path.join(PATH[f"{w_or_h}|hcc|gps_map/"], "gps_map.html"),
        )

In [None]:
kraft.write_gps_map(gps_map, PATH["gps_map.pickle.gz"])