In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

with open("setting.yaml") as yaml_file:

    SETTING = yaml.load(yaml_file)

PATH = make_path_dict(SETTING)

In [None]:
gps_map = ccal.read_gps_map(PATH["gps_map.pickle.gz"])

In [None]:
feature_data = SETTING["feature_data"]

if feature_data is None:

    feature_data = {}

for data_name, data_dict in feature_data.items():

    print(data_name)

    data_dict["dataframe"] = pd.read_csv(data_dict["file_path"], sep="\t", index_col=0)

In [None]:
sample_data = SETTING["sample_data"]

if sample_data is None:

    sample_data = {}

for data_name, data_dict in sample_data.items():

    print(data_name)

    data_dict["dataframe"] = pd.read_csv(data_dict["file_path"], sep="\t", index_col=0)

In [None]:
for w_or_h, data_dicts in (("w", feature_data), ("h", sample_data)):

    if SETTING["{}_summary".format(w_or_h)] is None:

        continue

    for factor_name, data_name_indices in SETTING["{}_summary".format(w_or_h)].items():

        for data_name, indices in data_name_indices.items():

            if data_dicts[data_name]["peek"] != "all":

                data_dicts[data_name]["peek"] += indices

In [None]:
for element_type, w_or_h, elements, element_label, label_color, data_dicts in (
    (
        "feature",
        "w",
        gps_map.w_elements,
        gps_map.w_element_label,
        gps_map.w_label_colors,
        feature_data,
    ),
    (
        "sample",
        "h",
        gps_map.h_elements,
        gps_map.h_element_label,
        gps_map.h_label_colors,
        sample_data,
    ),
):

    for data_name, data_dict in data_dicts.items():

        if data_dict["peek"] == "all":

            indices = data_dict["dataframe"].index

        else:

            indices = data_dict["peek"]

        for index, element_value in (
            data_dict["dataframe"]
            .loc[indices, data_dict["dataframe"].columns & elements]
            .iterrows()
        ):

            print(index)

            if element_value.isna().all():

                continue

            if element_label is None:

                gps_map_directory_path = PATH["{}|gps_map/".format(w_or_h)]

            else:

                gps_map_directory_path = PATH["{}|hcc|gps_map/".format(w_or_h)]

            file_name = ccal.normalize_file_name("{}_{}.html".format(data_name, index))

            gps_map.plot(
                w_or_h,
                annotation_x_element=element_value.to_frame().T,
                annotation_std_maxs=(SETTING["plot_std"],),
                element_marker_size=SETTING[
                    "gps_map_{}_element_marker_size".format(w_or_h)
                ],
                title="{} on GPS Map<br>{}: {}".format(
                    SETTING["{}_alias".format(element_type)], data_name, index
                ),
                html_file_path=os.path.join(gps_map_directory_path, file_name),
            )

            if element_label is None:

                continue

            label_element_value = element_value.groupby(by=element_label)

            html_file_path = os.path.join(
                PATH["{}|hcc|comparison/".format(w_or_h)], file_name
            )

            layout = {
                "title": {"text": "{}: {}".format(data_name, index)},
                "xaxis": {
                    "title": "{} Cluster".format(
                        SETTING["{}_alias".format(element_type)]
                    ),
                    "dtick": 1,
                },
                "yaxis": {"title": "Value"},
            }

            data_type = ccal.get_data_type(element_value)

            if data_type == "binary":

                ccal.plot_and_save(
                    {
                        "layout": layout,
                        "data": [
                            {
                                "type": "bar",
                                "name": label,
                                "x": (label,),
                                "y": (element_value_.sum() / element_value_.size,),
                                "marker": {"color": color},
                            }
                            for (label, element_value_), color in zip(
                                label_element_value, label_color
                            )
                        ],
                    },
                    html_file_path,
                )

            elif data_type in ("categorical", "continuous"):

                ccal.plot_and_save(
                    {
                        "layout": layout,
                        "data": [
                            {
                                "type": "violin",
                                "name": label,
                                "y": element_value_,
                                "marker": {"color": color},
                                "scalemode": "count",
                                "meanline": {"visible": True},
                                "points": "all",
                            }
                            for (label, element_value_), color in zip(
                                label_element_value, label_color
                            )
                        ],
                    },
                    html_file_path,
                )