In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

with open("setting.yaml") as yaml_file:

    SETTING = yaml.load(yaml_file)

PATH = make_path_dict(SETTING)

In [None]:
w = pd.read_table(PATH["w.tsv"], index_col=0)

h = pd.read_table(PATH["h.tsv"], index_col=0)

In [None]:
for (
    element_type,
    w_or_h,
    df,
    normalization_axis,
    cluster_axis,
    xaxis_title,
    yaxis_title,
) in (
    ("feature", "w", w, 1, 0, "Factor", SETTING["feature_alias"]),
    ("sample", "h", h, 0, 1, SETTING["sample_alias"], "Factor"),
):

    if SETTING["plot_cluster_max_size"] < df.shape[cluster_axis]:

        cluster_axis = None

    ccal.plot_heat_map(
        df,
        normalization_axis=normalization_axis,
        normalization_method="-0-",
        cluster_axis=cluster_axis,
        title="NMF K={} {}".format(SETTING["nmf_k"], w_or_h.title()),
        xaxis_title=xaxis_title,
        yaxis_title=yaxis_title,
        html_file_path="{}/heat_map.html".format(PATH["{}/".format(w_or_h)]),
    )

In [None]:
for (element_type, w_or_h, xaxis_title, yaxis_title) in (
    ("feature", "w", "Factor", SETTING["feature_alias"]),
    ("sample", "h", SETTING["sample_alias"], "Factor"),
):

    if SETTING["{}s_to_peek".format(element_type)] is None:

        continue

    if element_type == "feature":

        df = w.loc[
            (
                any(
                    element_to_peek in element
                    for element_to_peek in SETTING["{}s_to_peek".format(element_type)]
                )
                for element in w.index
            ),
            :,
        ]

    elif element_type == "sample":

        df = h.loc[
            :,
            (
                any(element_to_peek in element for element_to_peek in elements_to_peek)
                for element in h.columns
            ),
            :,
        ]

    ccal.plot_bubble_map(
        df,
        title="{} to Peek".format(SETTING["{}_alias".format(element_type)]),
        xaxis_title=xaxis_title,
        yaxis_title=yaxis_title,
        html_file_path="{}/bubble_map.html".format(PATH["{}/".format(w_or_h)]),
    )

In [None]:
for w_or_h, df in (("w", w.T), ("h", h)):

    df_signature = pd.DataFrame(index=df.index, columns=df.columns)

    for factor_name, factor_values in df.iterrows():

        low_elements, high_elements = ccal.select_series_low_and_high_index(
            factor_values,
            title="NMF K={} {} {} Signature".format(
                SETTING["nmf_k"], w_or_h.title(), factor_name
            ),
        )

        df_signature.loc[factor_name, low_elements] = "Low"

        df_signature.loc[factor_name, high_elements] = "High"

    if w_or_h == "w":

        df_signature = df_signature.T

    df_signature.to_csv(PATH["{}|signature.tsv".format(w_or_h)], sep="\t")

In [None]:
raw = pd.read_table(SETTING["feature_x_sample_file_path"], index_col=0)

In [None]:
signal = pd.read_table(PATH["feature_x_sample.signal.tsv"], index_col=0)

signal.fillna(0, inplace=True)

In [None]:
for w_or_h, targets, df_raw, df_signal in (
    ("w", w.T, raw.T, signal.T),
    ("h", h, raw, signal),
):

    ccal.make_match_panels(
        targets,
        {
            "raw": {"df": df_raw, "data_type": "continuous"},
            "signal": {"df": df_signal, "data_type": "continuous"},
        },
        n_required_for_match_function=SETTING["n_required_for_match_function"],
        extreme_feature_threshold=SETTING["extreme_feature_threshold"],
        plot_std=SETTING["plot_std"],
        directory_path=PATH["{}|match/".format(w_or_h)],
    )