In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

with open("setting.yaml") as yaml_file:

    setting = yaml.load(yaml_file)

path = make_path_dict(setting)

In [None]:
w = pd.read_table(path["w.tsv"], index_col=0)

h = pd.read_table(path["h.tsv"], index_col=0)

w.columns.name = "Factor"

h.columns.name = setting["sample_alias"]

In [None]:
for w_or_h in ("w", "h"):

    if w_or_h == "w":

        df = w

        normalization_axis = 1

        cluster_axis = 0

    elif w_or_h == "h":

        df = h

        normalization_axis = 0

        cluster_axis = 1

    if df.shape[cluster_axis] < setting["plot_cluster_max_size"]:

        ccal.plot_heat_map(
            df,
            normalization_axis=normalization_axis,
            normalization_method="-0-",
            cluster_axis=cluster_axis,
            title="NMF K={} {}".format(setting["nmf_k"], w_or_h.title()),
            xaxis_title=df.columns.name,
            yaxis_title=df.index.name,
        )

In [None]:
for w_or_h in ("w", "h"):

    if w_or_h == "w":

        df = w.T

    elif w_or_h == "h":

        df = h

    ccal.make_comparison_panel(
        df,
        df,
        axis=1,
        title="NMF K={} {}".format(setting["nmf_k"], w_or_h.title()),
        name_0=df.index.name,
        name_1=df.index.name,
    )

In [None]:
for element_type in ("feature", "sample"):

    if element_type == "feature":

        df = w

    elif element_type == "sample":

        df = h.T

    elements_to_peek = setting["{}s_to_peek".format(element_type)]

    if elements_to_peek is not None:

        elements_to_peek = tuple(
            i for i in df.index if any(element in i for element in elements_to_peek)
        )

        element_alias = setting["{}_alias".format(element_type)]

        ccal.plot_bubble_map(
            df.loc[df.index & elements_to_peek],
            title="{} to Peek".format(element_alias),
            xaxis_title="Factor",
            yaxis_title=element_alias,
        )

In [None]:
raw = pd.read_table(setting["feature_x_sample_file_path"], index_col=0)

signal = pd.read_table(path["feature_x_sample.context_signal.tsv"], index_col=0)

signal.fillna(0, inplace=True)

for w_or_h in ("w", "h"):

    if w_or_h == "w":

        targets = w.T

        df_raw = raw.T

        df_signal = signal.T

    elif w_or_h == "h":

        targets = h

        df_raw = raw

        df_signal = signal

    match_directory_path = path["{}|match/".format(w_or_h)]

    ccal.make_match_panels(
        targets,
        {
            "signal": {"df": df_signal, "data_type": "continuous"},
            "raw": {"df": df_raw, "data_type": "continuous"},
        },
        n_job=setting["max_n_job"],
        n_required_for_match_function=0.64,
        extreme_feature_threshold=setting["extreme_feature_threshold"],
        plot_std=setting["plot_std"],
        directory_path=match_directory_path,
    )

In [None]:
for w_or_h in ("w", "h"):

    if w_or_h == "w":

        df = w.T

    elif w_or_h == "h":

        df = h

    signature_directory_path = path["{}|signature/".format(w_or_h)]

    for factor_name, factor_values in df.iterrows():

        ccal.select_series_low_and_high_index(
            factor_values,
            title="NMF K={} {} {} Signature".format(
                setting["nmf_k"], w_or_h.title(), factor_name
            ),
            file_path_prefix="{}/{}".format(signature_directory_path, factor_name),
        )