In [51]:
import pandas as pd 
import numpy as np 
from dash_website.utils.graphs.add_line_and_annotation import add_line_and_annotation
from dash_website import CUSTOM_ORDER
from dash_website.age_prediction_performances import SCORES


sample_definition = "average_per_participant"
dimensions_selection = "custom_dimensions" #"all_dimensions"  # "without_ensemble_models"
selected_dimension = "all"
metric = "rmse"

data_scores = pd.read_feather(f"../../all_data/age_prediction_performances/scores_{sample_definition}_{dimensions_selection}.feather").to_dict()

In [54]:
import plotly.graph_objs as go

scores = pd.DataFrame(data_scores)

scores.set_index(["dimension", "subdimension", "sub_subdimension"], inplace=True)

if selected_dimension != "all":
    scores = scores.loc[[selected_dimension]]
    sorted_dimensions = scores.loc[[selected_dimension]].index.drop_duplicates()
else:
    if dimensions_selection != "without_ensemble_models":
        sorted_dimensions = scores.loc[CUSTOM_ORDER].index.drop_duplicates()
    else:
        sorted_dimensions = scores.loc[pd.Index(CUSTOM_ORDER).drop(["*", "*instances01", "*instances1.5x", "*instances23"])].index.drop_duplicates()

x_positions = pd.DataFrame(np.arange(5, 10 * len(sorted_dimensions) + 5, 10), index=sorted_dimensions, columns=["x_position"])

fig = go.Figure()
fig.update_layout(
    xaxis={
        "tickvals": np.arange(5, 10 * len(sorted_dimensions) + 5, 10),
        "ticktext": [" - ".join(elem) for elem in sorted_dimensions.values],
    },
)

algorithms = scores["algorithm"].drop_duplicates()

hovertemplate = "%{x}, score: %{y:.3f} +- %{customdata[0]:.3f}, sample size: %{customdata[1]} <extra>%{customdata[2]}</extra>"

min_score = min(scores[metric].min(), 0)

for algorithm in algorithms:
    scores_algorithm = scores[scores["algorithm"] == algorithm]
    x_positions.loc[scores_algorithm.index]

    customdata = np.dstack((scores_algorithm[f"{metric}_std"].values.flatten(), scores_algorithm["sample_size"].values.flatten(), [algorithm] * len(scores_algorithm.index)))[0]
    fig.add_bar(
        x=x_positions.loc[scores_algorithm.index].values.flatten(),
        y=scores_algorithm[metric],
        error_y={"array": scores_algorithm[f"{metric}_std"], "type": "data"},
        name=algorithm,
        hovertemplate=hovertemplate,
        customdata=customdata
    )

dimensions = sorted_dimensions.to_frame()[["dimension", "subdimension", "sub_subdimension"]].reset_index(drop=True)
dimensions["position"] = fig["layout"]["xaxis"]["tickvals"]
dimensions.set_index(["dimension", "subdimension", "sub_subdimension"], inplace=True)

lines = []
annotations = []

for dimension in dimensions.index.get_level_values("dimension").drop_duplicates():
    if metric == "r2":
        dimension_inner_margin = min_score -0.6
        dimension_outer_margin = min_score -0.9
    else:
        dimension_inner_margin = min_score -6
        dimension_outer_margin = min_score -9

    min_position = dimensions.loc[dimension].min()
    max_position = dimensions.loc[dimension].max()

    line, annotation = add_line_and_annotation(
        dimension,
        "x",
        "y",
        min_position,
        max_position,
        dimension_inner_margin,
        dimension_outer_margin,
        90,
        13,
    )

    lines.append(line)
    annotations.append(annotation)

    for subdimension in dimensions.loc[dimension].index.get_level_values("subdimension").drop_duplicates():
        if metric == "r2":
            subdimension_margin = min_score -0.35
        else:
            subdimension_margin = min_score -3.5       

        submin_position = dimensions.loc[(dimension, subdimension)].min()
        submax_position = dimensions.loc[(dimension, subdimension)].max()

        line, annotation = add_line_and_annotation(
            subdimension,
            "x",
            "y",
            submin_position,
            submax_position,
            subdimension_margin,
            dimension_inner_margin,
            90,
            8,
        )

        lines.append(line)
        annotations.append(annotation)
        for sub_subdimension in dimensions.loc[(dimension, subdimension)].index.get_level_values("sub_subdimension").drop_duplicates():
            if metric == "r2":
                sub_subdimension_margin = min_score - 0.1
            else:
                sub_subdimension_margin = min_score - 1                 

            sub_submin_position = dimensions.loc[(dimension, subdimension, sub_subdimension)].min()
            sub_submax_position = dimensions.loc[(dimension, subdimension, sub_subdimension)].max()

            line, annotation = add_line_and_annotation(
                sub_subdimension,
                "x",
                "y",
                sub_submin_position,
                sub_submax_position,
                sub_subdimension_margin,
                subdimension_margin,
                90,
                5,
            )

            lines.append(line)
            annotations.append(annotation)

# The final top/right line
line, _ = add_line_and_annotation(
    dimension,
    "x",
    "y",
    min_position,
    max_position,
    sub_subdimension_margin,
    dimension_outer_margin,
    0,
    10,
    final=True,
)

lines.append(line)

fig["layout"]["shapes"] = lines
fig["layout"]["annotations"] = annotations
fig.update_layout(xaxis={"showticklabels": False})

fig.update_layout(
    yaxis={"title": SCORES[metric], "showgrid": False, "zeroline": False},
    xaxis={"showgrid": False, "zeroline": False},
    height=800,
    width=2000,
)

fig.show()


indexing past lexsort depth may impact performance.


indexing past lexsort depth may impact performance.


indexing past lexsort depth may impact performance.

