In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

with open("setting.yaml") as yaml_file:

    SETTING = yaml.load(yaml_file)

PATH = make_path_dict(SETTING)

In [None]:
gps_map = ccal.load_gps_map(PATH["gps_map.pickle.gz"])

In [None]:
feature_data = SETTING["feature_data"]

if feature_data is None:

    feature_data = {}

else:

    for data_name, data_dict in feature_data.items():

        feature_data[data_name]["df"] = pd.read_table(
            data_dict["file_path"], index_col=0
        )

    print(feature_data.keys())

In [None]:
sample_data = SETTING["sample_data"]

if sample_data is None:

    sample_data = {}

else:

    for data_name, data_dict in sample_data.items():

        sample_data[data_name]["df"] = pd.read_table(
            data_dict["file_path"], index_col=0
        )

    print(sample_data.keys())

In [None]:
signal = pd.read_table(PATH["feature_x_sample.signal.tsv"], index_col=0)

signal.fillna(value=0, inplace=True)

In [None]:
for element_type, element_data, axis in (
    ("feature", feature_data, 1),
    ("sample", sample_data, 0),
):

    if element_data is None:

        continue

    df = pd.DataFrame(
        [signal.mean(axis=axis), signal.std(axis=axis), (signal == 0).sum(axis=axis)],
        index=("Mean", "Standard Deviation", "Number of 0"),
    )

    element_data["Test"] = {"df": df, "data_type": "continuous"}

In [None]:
for element_type, w_or_h, elements, element_labels, data_dicts in (
    ("feature", "w", gps_map.w_elements, gps_map.w_element_labels, feature_data),
    ("sample", "h", gps_map.h_elements, gps_map.h_element_labels, sample_data),
):

    if data_dicts is None:

        continue

    for data_name, data_dict in data_dicts.items():

        df = data_dict["df"]

        if "indices" in data_dict:

            df = df.loc[data_dict["indices"]]

        for index, element_value in df[elements].iterrows():

            if data_dict["data_type"] == "categorical":

                colorscale = ccal.make_colorscale(
                    n_category=element_value.dropna().unique().size, plot=False
                )

            else:

                colorscale = None

            title = "{} GPS Map<br>{}: {}".format(
                SETTING["{}_alias".format(element_type)], data_name, index
            )

            file_name = ccal.make_file_name_from_str(
                "{}_{}.html".format(data_name, index)
            )

            if element_labels is None:

                gps_map_directory_path = PATH["{}|gps_map/".format(w_or_h)]

            else:

                gps_map_directory_path = PATH["{}|hcc|gps_map/".format(w_or_h)]

            html_file_path = "{}/{}".format(gps_map_directory_path, file_name)

            if SETTING["plotly_directory_path"] is None or data_name == "Test":

                plotly_html_file_path = None

            else:

                plotly_html_file_path = "{}/{}".format(
                    PATH["plotly|{}_gps_map/".format(w_or_h)], file_name
                )

            gps_map.plot_gps_map(
                w_or_h,
                annotation_x_element=element_value.to_frame().T,
                annotation_std_maxs=(SETTING["plot_std"],),
                annotation_types=(data_dict["data_type"],),
                annotation_colorscale=colorscale,
                element_marker_size=SETTING[
                    "gps_map_{}_element_marker_size".format(w_or_h)
                ],
                title=title,
                html_file_path=html_file_path,
                plotly_html_file_path=plotly_html_file_path,
            )

            if element_labels is None:

                continue

            label_element_value = element_value.groupby(by=element_labels)

            names = tuple(
                "{} Cluster {}".format(SETTING["{}_alias".format(element_type)], label)
                for label in label_element_value.groups.keys()
            )

            html_file_path = "{}/{}".format(
                PATH["{}|hcc|comparison/".format(w_or_h)], file_name
            )

            if data_dict["data_type"] in ("continuous", "categorical"):

                ccal.plot_violin_or_box(
                    (element_value_ for label, element_value_ in label_element_value),
                    names=names,
                    violin_or_box="box",
                    title=title,
                    xaxis_title="{} Cluster".format(
                        SETTING["{}_alias".format(element_type)]
                    ),
                    yaxis_title="Value",
                    html_file_path=html_file_path,
                )

            elif data_dict["data_type"] == "binary":

                ccal.plot_bar(
                    ((name,) for name in names),
                    (
                        (label_density,)
                        for label_density in label_element_value.sum()
                        / label_element_value.size()
                    ),
                    names=names,
                    title=title,
                    xaxis_title="{} Cluster".format(
                        SETTING["{}_alias".format(element_type)]
                    ),
                    yaxis_title="Density",
                    html_file_path=html_file_path,
                )