In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

with open("setting.yaml") as yaml_file:

    SETTING = yaml.load(yaml_file)

PATH = make_path_dict(SETTING)

In [None]:
w = pd.read_csv(PATH["w.tsv"], sep="\t", index_col=0)

w.columns.name = "Factor"

h = pd.read_csv(PATH["h.tsv"], sep="\t", index_col=0)

h.columns.name = SETTING["sample_alias"]

In [None]:
for feature_type, w_or_h, df, apply_axis, drop_axis in (
    ("feature", "w", w, 1, 0),
    ("sample", "h", h, 0, 1),
):

    if SETTING["gps_map_caotic_{}_element_fraction_to_drop".format(w_or_h)] is not None:

        print("{} shape before: {}".format(w_or_h.title(), df.shape))

        element_entropy = df.apply(ccal.compute_entropy, axis=apply_axis)

        df.drop(
            ccal.select_series_indices(
                element_entropy,
                ">",
                fraction=SETTING[
                    "gps_map_caotic_{}_element_fraction_to_drop".format(w_or_h)
                ],
                title={
                    "text": "{} Ranking by Entropy in {}".format(
                        SETTING["{}_alias".format(feature_type)], w_or_h.title()
                    )
                },
                xaxis={"title": "Rank"},
                yaxis={"title": "Entropy"},
            ),
            axis=drop_axis,
            inplace=True,
        )

        print("{} shape after: {}".format(w_or_h.title(), df.shape))

In [None]:
gps_map = ccal.GPSMap(
    w=w.T,
    h=h,
    w_n_pull=SETTING["gps_map_w_n_pull"],
    w_pull_power=SETTING["gps_map_w_pull_power"],
    h_n_pull=SETTING["gps_map_h_n_pull"],
    h_pull_power=SETTING["gps_map_h_pull_power"],
    plot=False,
)

In [None]:
for element_type, w_or_h, elements in (
    ("feature", "w", gps_map.w_elements),
    ("sample", "h", gps_map.h_elements),
):

    if elements is not None:

        title = "{} GPS Map".format(SETTING["{}_alias".format(element_type)])

        gps_map.plot(
            w_or_h,
            element_marker_size=SETTING[
                "gps_map_{}_element_marker_size".format(w_or_h)
            ],
            title=title,
            html_file_path=os.path.join(
                PATH["{}|gps_map/".format(w_or_h)], "gps_map.html"
            ),
        )

        if os.path.isfile(PATH["{}|cluster_x_element.tsv".format(w_or_h)]):

            gps_map.set_element_label(
                w_or_h,
                pd.read_csv(
                    PATH["{}|cluster_x_element.tsv".format(w_or_h)],
                    sep="\t",
                    index_col=0,
                )
                .apply(lambda column: column.idxmax())
                .str[len("Cluster") :]
                .astype(int)[elements],
                bandwidth_factor=SETTING["gps_map_{}_bandwidth_factor".format(w_or_h)],
            )

            gps_map.plot(
                w_or_h,
                element_marker_size=SETTING[
                    "gps_map_{}_element_marker_size".format(w_or_h)
                ],
                title=title,
                html_file_path=os.path.join(
                    PATH["{}|hcc|gps_map/".format(w_or_h)], "gps_map.html"
                ),
            )

In [None]:
ccal.write_gps_map(gps_map, PATH["gps_map.pickle.gz"])