In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

with open("setting.yaml") as yaml_file:

    setting = yaml.load(yaml_file)

path = make_path_dict(setting)

In [None]:
feature_x_sample_processed = pd.read_table(
    path["feature_x_sample.processed.tsv"], index_col=0
)

In [None]:
for element_type in ("feature", "sample"):

    if element_type == "feature":

        df = feature_x_sample_processed

    elif element_type == "sample":

        df = feature_x_sample_processed.T

    element_x_fit_parameter_file_path = path[
        "{}_x_fit_parameter.tsv".format(element_type)
    ]

    element_x_fit_parameter = pd.read_table(
        element_x_fit_parameter_file_path, index_col=0
    )

    context_file_path = path["feature_x_sample.{}_context.tsv".format(element_type)]

    context = ccal.make_context_matrix(
        df,
        n_job=setting["max_n_job"],
        skew_t_pdf_fit_parameter=element_x_fit_parameter,
        output_file_path=context_file_path,
    )

    context = pd.read_table(context_file_path, index_col=0)

    element_alias = setting["{}_alias".format(element_type)]

    ccal.summarize_feature_x_sample(
        context,
        feature_x_sample_alias="{} Context Matrix".format(element_alias),
        feature_alias=setting["feature_alias"],
        sample_alias=setting["sample_alias"],
        feature_x_sample_value_name="Context",
        plot_heat_map_max_size=setting["plot_heat_map_max_size"],
        plot_distributions_max_size=setting["plot_distributions_max_size"],
        plot_rug_max_size=setting["plot_rug_max_size"],
    )

    for context_type in ("negative", "positive"):

        if context_type == "negative":

            clip_keyword_argument = {"upper": 0}

        elif context_type == "positive":

            clip_keyword_argument = {"lower": 0}

        element_context = (
            context.clip(**clip_keyword_argument).sum(axis=1).sort_values()
        )

        elements_to_peek = setting["{}s_to_peek".format(element_type)]

        if elements_to_peek is None:

            ranks = []

        else:

            ranks = np.nonzero(
                [element in elements_to_peek for element in element_context.index]
            )[0]

        ccal.plot_points(
            (tuple(range(element_context.size)), ranks),
            (element_context, element_context[ranks]),
            names=("All", "Peek"),
            texts=(element_context.index, element_context.index[ranks]),
            modes=("markers", "markers+text"),
            title="{} Ranking by {} Context Sum".format(
                element_alias, context_type.title()
            ),
            xaxis_title="Rank",
            yaxis_title="{} Context Sum".format(context_type.title()),
        )

        if 1 < element_context.unique().size:

            n_extreme = 3

            if context_type == "negative":

                ranks = range(n_extreme)

            elif context_type == "positive":

                ranks = range(-n_extreme, 0)

            for rank in ranks:

                ranked_element = element_context.index[rank]

                ranked_element_values = df.loc[ranked_element]

                n_data, location, scale, degree_of_freedom, shape = element_x_fit_parameter.loc[
                    ranked_element,
                    ["N Data", "Location", "Scale", "Degree of Freedom", "Shape"],
                ]

                ccal.plot_context(
                    ranked_element_values,
                    n_data=n_data,
                    location=location,
                    scale=scale,
                    degree_of_freedom=degree_of_freedom,
                    shape=shape,
                    plot_rug=ranked_element_values.dropna().size
                    < setting["plot_rug_max_size"],
                    title="{} {} Context Sum Rank {}: {}".format(
                        element_alias, context_type.title(), rank, ranked_element
                    ),
                    xaxis_title=setting["feature_x_sample_value_name"],
                )