In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

with open("setting.yaml") as yaml_file:

    SETTING = yaml.load(yaml_file)

PATH = make_path_dict(SETTING)

In [None]:
w = pd.read_table(PATH["w.tsv"], index_col=0)

w.name = "NMF W"

w.columns.name = "Factor"

h = pd.read_table(PATH["h.tsv"], index_col=0)

h.name = "NMF H"

h.columns.name = SETTING["sample_alias"]

In [None]:
if SETTING["element_entropy_quantile"] < 1:

    feature_entropy = w.apply(ccal.compute_entropy, axis=1)

    w = w.loc[
        feature_entropy.index[
            feature_entropy
            < feature_entropy.quantile(q=SETTING["element_entropy_quantile"])
        ]
    ]

    print(w.shape)

    sample_entropy = h.apply(ccal.compute_entropy)

    h = h[
        sample_entropy.index[
            sample_entropy
            < sample_entropy.quantile(q=SETTING["element_entropy_quantile"])
        ]
    ]

    print(h.shape)

In [None]:
gps_map = ccal.GPSMap(
    w=w.T,
    h=h,
    w_n_pull=SETTING["gps_map_w_n_pull"],
    w_pull_power=SETTING["gps_map_w_pull_power"],
    h_n_pull=SETTING["gps_map_h_n_pull"],
    h_pull_power=SETTING["gps_map_h_pull_power"],
    plot=False,
)

In [None]:
for element_type, w_or_h, elements in (
    ("feature", "w", gps_map.w_elements),
    ("sample", "h", gps_map.h_elements),
):

    title = "{}<br>{} GPS Map".format(
        SETTING["feature_x_sample_alias"], SETTING["{}_alias".format(element_type)]
    )

    gps_map.plot_gps_map(
        w_or_h,
        element_marker_size=SETTING["gps_map_{}_element_marker_size".format(w_or_h)],
        title=title,
        html_file_path="{}/gps_map.html".format(PATH["{}|gps_map/".format(w_or_h)]),
    )

    if os.path.isfile(PATH["{}|cluster_x_column.tsv".format(w_or_h)]):

        bandwidth_factor = SETTING["gps_map_{}_bandwidth_factor".format(w_or_h)]

        gps_map.set_element_labels(
            w_or_h,
            pd.read_table(
                PATH["{}|cluster_x_column.tsv".format(w_or_h)], index_col=0
            ).apply(lambda column: int(column.argmax().lstrip("C"))),
            bandwidth_factor=bandwidth_factor,
        )

        if SETTING["plotly_directory_path"] is None:

            plotly_html_file_path = None

        else:

            plotly_html_file_path = "{}/{} GPS Map.html".format(
                SETTING["plotly_directory_path"], element_type.title()
            )

        gps_map.plot_gps_map(
            w_or_h,
            element_marker_size=SETTING[
                "gps_map_{}_element_marker_size".format(w_or_h)
            ],
            title=title,
            html_file_path="{}/gps_map.html".format(
                PATH["{}|hcc|gps_map/".format(w_or_h)]
            ),
            plotly_html_file_path=plotly_html_file_path,
        )

In [None]:
ccal.dump_gps_map(gps_map, PATH["gps_map.pickle.gz"])