In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *

with open("setting.yaml") as :

    SETTING = yaml.load()

PATH = make_path_dict(SETTING)

In [None]:
gps_map = kraft.read_gps_map(PATH["gps_map.pickle.gz"])

In [None]:
feature_data_dicts = SETTING["feature_data"]

if feature_data_dicts is None:

    feature_data_dicts = {}

feature_data_dicts = {
    data_name: data_dict
    for data_name, data_dict in feature_data_dicts.items()
    if "peek" in data_dict
}

for data_name, data_dict in feature_data_dicts.items():

    print(data_name)

    data_dict["dataframe"] = pd.read_csv(data_dict["file_path"], sep="\t", index_col=0)

In [None]:
sample_data_dicts = SETTING["sample_data"]

if sample_data_dicts is None:

    sample_data_dicts = {}

sample_data_dicts = {
    data_name: data_dict
    for data_name, data_dict in sample_data_dicts.items()
    if "peek" in data_dict
}

for data_name, data_dict in sample_data_dicts.items():

    print(data_name)

    data_dict["dataframe"] = pd.read_csv(data_dict["file_path"], sep="\t", index_col=0)

In [None]:
for w_or_h, data_dicts in (("w", feature_data_dicts), ("h", sample_data_dicts)):

    if SETTING[f"{w_or_h}_summary"] is not None:

        for factor_name, data_name_indices in SETTING[f"{w_or_h}_summary"].items():

            for data_name, indices in data_name_indices.items():

                if data_dicts[data_name]["peek"] != "all":

                    data_dicts[data_name]["peek"] += indices

In [None]:
for element_type, w_or_h, elements, element_label, label_color, data_dicts in (
    (
        "feature",
        "w",
        gps_map.w_elements,
        gps_map.w_element_label,
        gps_map.w_label_colors,
        feature_data_dicts,
    ),
    (
        "sample",
        "h",
        gps_map.h_elements,
        gps_map.h_element_label,
        gps_map.h_label_colors,
        sample_data_dicts,
    ),
):

    for data_name, data_dict in data_dicts.items():

        if data_dict["peek"] == "all":

            indices = data_dict["dataframe"].index

        else:

            indices = data_dict["peek"]

        for index, element_value in (
            data_dict["dataframe"]
            .loc[
                data_dict["dataframe"].index & indices,
                data_dict["dataframe"].columns & elements,
            ]
            .iterrows()
        ):

            print(data_name, index)

            if element_label is None:

                gps_map_directory_path = PATH[f"{w_or_h}|gps_map/"]

            else:

                gps_map_directory_path = PATH[f"{w_or_h}|hcc|gps_map/"]

            file_name = kraft.normalize_file_name(f"{data_name}_{index}.html")

            gps_map.plot(
                w_or_h,
                annotation_x_element=element_value.to_frame().T,
                annotation_std_maxs=(SETTING["plot_std"],),
                element_marker_size=SETTING[f"gps_map_{w_or_h}_element_marker_size"],
                title=f"{SETTING[f'{element_type}_alias']} on GPS Map<br>{data_name}: {index}",
                html_file_path=os.path.join(gps_map_directory_path, file_name),
            )

            if element_label is None:

                continue

            label_element_value = element_value.groupby(by=element_label)

            html_file_path = os.path.join(PATH[f"{w_or_h}|hcc|comparison/"], file_name)

            layout = {
                "title": {"text": f"{data_name}: {index}"},
                "xaxis": {
                    "title": f"{SETTING[f'{element_type}_alias']} Cluster",
                    "dtick": 1,
                },
                "yaxis": {"title": "Value"},
            }

            data_type = kraft.get_data_type(element_value)

            if data_type == "binary":

                kraft.plot_and_save(
                    {
                        "layout": layout,
                        "data": [
                            {
                                "type": "bar",
                                "name": label,
                                "x": (label,),
                                "y": (element_value_.sum() / element_value_.size,),
                                "marker": {"color": color},
                            }
                            for (label, element_value_), color in zip(
                                label_element_value, label_color
                            )
                        ],
                    },
                    html_file_path,
                )

            elif data_type in ("categorical", "continuous"):

                kraft.plot_and_save(
                    {
                        "layout": layout,
                        "data": [
                            {
                                "type": "violin",
                                "name": label,
                                "y": element_value_,
                                "marker": {"color": color},
                                "scalemode": "count",
                                "meanline": {"visible": True},
                                "points": "all",
                            }
                            for (label, element_value_), color in zip(
                                label_element_value, label_color
                            )
                        ],
                    },
                    html_file_path,
                )