In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

with open("setting.yaml") as yaml_file:

    SETTING = yaml.load(yaml_file)

PATH = make_path_dict(SETTING)

In [None]:
w = pd.read_table(PATH["w.tsv"], index_col=0)

w.columns.name = "Factor"

h = pd.read_table(PATH["h.tsv"], index_col=0)

h.columns.name = SETTING["sample_alias"]

In [None]:
for w_or_h, df, normalization_axis, cluster_axis in (("w", w, 1, 0), ("h", h, 0, 1)):

    if SETTING["plot_cluster_max_size"] < df.shape[cluster_axis]:

        cluster_axis = None

    ccal.plot_heat_map(
        df,
        normalization_axis=normalization_axis,
        normalization_method="-0-",
        cluster_axis=cluster_axis,
        title="NMF {}".format(w_or_h.title()),
        xaxis_title=df.columns.name,
        yaxis_title=df.index.name,
        html_file_path="{}/heat_map.html".format(PATH["{}/".format(w_or_h)]),
    )

In [None]:
for element_type, w_or_h in (("feature", "w"), ("sample", "h")):

    if SETTING["{}s_to_peek".format(element_type)] is None:

        continue

    elements_to_peek = []

    for element in SETTING["{}s_to_peek".format(element_type)]:

        for template in ("{}", "(-) {}", "(+) {}"):

            elements_to_peek.append(template.format(element))

    if element_type == "feature":

        df = w.loc[w.index & pd.Index(elements_to_peek, name=w.index.name)]

    elif element_type == "sample":

        df = h[h.columns & pd.Index(elements_to_peek, name=h.columns.name)]

    ccal.plot_bubble_map(
        df,
        title="{} to Peek".format(SETTING["{}_alias".format(element_type)]),
        xaxis_title=df.columns.name,
        yaxis_title=df.index.name,
        html_file_path="{}/bubble_map.html".format(PATH["{}/".format(w_or_h)]),
    )

In [None]:
for w_or_h, df in (("w", w.T), ("h", h)):

    df_signature = pd.DataFrame(index=df.index, columns=df.columns)

    for factor_name, factor_values in df.iterrows():

        low_elements, high_elements = ccal.select_series_low_and_high_index(
            factor_values,
            title="NMF K={} {} {} Signature".format(
                SETTING["nmf_k"], w_or_h.title(), factor_name
            ),
        )

        df_signature.loc[factor_name, low_elements] = "Low"

        df_signature.loc[factor_name, high_elements] = "High"

    if w_or_h == "w":

        df_signature = df_signature.T

    df_signature.to_csv(PATH["{}|signature.tsv".format(w_or_h)], sep="\t")

In [None]:
signal = pd.read_table(PATH["feature_x_sample.signal.tsv"], index_col=0)

signal.fillna(value=0, inplace=True)

signal.columns.name = SETTING["sample_alias"]

In [None]:
for w_or_h, targets, df in (("w", w.T, signal.T), ("h", h, signal)):

    ccal.make_match_panels(
        targets,
        {"Signal": {"df": df, "data_type": "continuous"}},
        n_required_for_match_function=SETTING["n_required_for_match_function"],
        extreme_feature_threshold=SETTING["extreme_feature_threshold"],
        plot_std=SETTING["plot_std"],
        directory_path=PATH["{}|match/".format(w_or_h)],
    )