In [122]:
import pandas as pd
import numpy as np 

import plotly.graph_objs as go

from website.utils.graphs import add_custom_legend_axis
from website import MAIN_CATEGORIES, CATEGORIES, ALGORITHMS, SCORES, FOLDS


scores_data = pd.read_feather(f"../data/scores.feather").to_dict()
targets = ["all"]
examination_categories, laboratory_categories, questionnaire_categories = ["Body__Measures"], ["all"], ["all"]
algorithms = ["elastic_net", "light_gbm"]
metric = "diff_c_index"


scores_full = pd.DataFrame(scores_data).set_index(["main_category", "category"])
scores_full.columns = pd.MultiIndex.from_tuples(
    list(map(eval, scores_full.columns.tolist())), names=["target", "algorithm", "fold", "metric"]
)

categories_to_display = {
    "examination": examination_categories,
    "laboratory": laboratory_categories,
    "questionnaire": questionnaire_categories,
}

indexes_to_take = []
indexes_to_rename = {}

for main_category in MAIN_CATEGORIES:
    indexes_to_rename[main_category] = MAIN_CATEGORIES[main_category]
    if categories_to_display[main_category] == ["all"]:
        categories_to_display[main_category] = (
            pd.Index(list(CATEGORIES[main_category].keys())).drop("all").to_list()
        )
    for category in categories_to_display[main_category]:
        indexes_to_rename[category] = CATEGORIES[main_category][category]
        indexes_to_take.append([main_category, category])
for algorithm in algorithms:
    indexes_to_rename[algorithm] = ALGORITHMS[algorithm]

scores = scores_full.loc[indexes_to_take, (targets, ["numbers", "age_ranges"] + algorithms)]
scores.rename(index=indexes_to_rename, inplace=True)

if targets != ["age"]:
    scores.replace(-1, np.nan, inplace=True)

In [123]:
hovertemplate = (
    "%{x},<Br> score: %{y:.3f} +- %{customdata[0]:.3f}, %{customdata[1]} participants with %{customdata[2]} variables, age range %{customdata[3]} to %{customdata[4]} years old <extra>%{customdata[5]}</extra>"
)

figures = {}
for fold in FOLDS:
    scores_fold = scores.loc[:, (slice(None), slice(None), ["numbers", "age_ranges", fold])].droplevel("fold", axis="columns")

    x_positions = pd.Series(np.arange(5, 10 * len(scores_fold.index) + 5, 10), index=scores_fold.index)

    figures[fold] = go.Figure()
    figures[fold].update_layout(
        xaxis={
            "tickvals": np.arange(5, 10 * len(scores_fold.index) + 5, 10),
            "ticktext": [" - ".join(elem) for elem in scores_fold.index],
        }
    )
    
    for target in targets:
        for algorithm in algorithms:
            customdata = np.dstack(
                (
                    scores_fold[(target, algorithm, f"{metric}_std")].values.flatten(),
                    scores_fold[(target, "numbers", "n_participants")].values.flatten(),
                    scores_fold[(target, "numbers", "n_variables")].values.flatten(),
                    scores_fold[(target, "age_ranges", "min")].values.flatten().astype(int),
                    scores_fold[(target, "age_ranges", "max")].values.flatten().astype(int),
                    [ALGORITHMS[algorithm]] * len(scores_fold.index),
                )
            )[0]

            figures[fold].add_bar(
                x=x_positions.loc[scores_fold.index].values.flatten(),
                y=scores_fold[(target, algorithm, metric)],
                error_y={"array": scores_fold[(target, algorithm, f"{metric}_std")], "type": "data"},
                name=ALGORITHMS[algorithm],
                hovertemplate=hovertemplate,
                customdata=customdata,
            ) 

    add_custom_legend_axis(figures[fold], scores_fold.index, -120 if metric == "rmse" else -1, -60 if metric == "rmse" else -0.5, min(scores_fold.loc[:, (targets, algorithms, metric)].min().min(), 0))
    
    figures[fold].update_layout(
        yaxis={
            "title": SCORES[targets[0]][metric],
            "showgrid": False,
            "zeroline": False,
            "showticklabels": True,
            "title_font": {"size": 45},
            "dtick": 12 if metric == "rmse" else 0.1,
            "tickfont_size": 20,
        },
        xaxis={"showgrid": False, "zeroline": False},
        height=800,
        margin={"l": 0, "r": 0, "b": 0, "t": 0},
        legend={"orientation": "h", "yanchor": "bottom", "font": {"size": 30}},
    )
    break

figures["train"].show()


In [21]:
scores
hovertemplate = (
    "%{x}, score: %{y:.3f} +- %{customdata[0]:.3f}, sample size: %{customdata[1]} <extra>%{customdata[2]}</extra>"
)

    
    min_score = min(sorted_scores.min().min(), 0)

Unnamed: 0_level_0,target,age,age,age,age,age,age,age,age,age,age,age,age
Unnamed: 0_level_1,algorithm,numbers,numbers,age_ranges,age_ranges,elastic_net,elastic_net,elastic_net,elastic_net,elastic_net,elastic_net,elastic_net,elastic_net
Unnamed: 0_level_2,fold,numbers,numbers,age_ranges,age_ranges,train,train,test,test,train,train,test,test
Unnamed: 0_level_3,metric,n_participants,n_variables,min,max,r2,r2_std,r2,r2_std,rmse,rmse_std,rmse,rmse_std
main_category,category,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4
examination,Body__Measures,84160,19,2.0,85.0,0.446,0.0,0.446,0.005,211.502,0.077,211.524,0.871
laboratory,Autoantibodies__-__Immunofluorescence__&__Immunoprecipitation__Analyses__(Surplus),4735,36,12.0,80.0,0.064,0.002,0.054,0.016,237.176,0.661,237.919,5.972
laboratory,Cholesterol__-__Total,51199,15,6.0,85.0,0.134,0.001,0.134,0.005,254.57,0.187,254.619,1.852
laboratory,Fatty__Acids__-__Serum,3152,40,3.0,80.0,0.537,0.006,0.524,0.054,204.494,1.271,205.293,12.164
laboratory,Flame__Retardant__Metabolites__-__Urine__(Surplus),4979,27,6.0,80.0,0.111,0.003,0.11,0.032,255.425,0.714,255.251,5.776
laboratory,Hepatitis__E__:__IgG__&__IgM__Antibodies,36480,15,6.0,80.0,0.084,0.001,0.083,0.008,261.004,0.165,261.057,1.332
laboratory,"Human__Papillomavirus__(HPV)__-__6,__11,__16__&__18__Antibody__-__Serum:__4-plex__CLIA",6815,17,18.0,60.1,0.033,0.001,0.031,0.012,147.47,0.279,147.602,2.529
laboratory,Lead__-__Dust,3227,24,1.0,6.1,0.004,0.001,0.001,0.007,17.145,0.049,17.253,0.436
laboratory,Non-dioxin-like__Polychlorinated__Biphenyls,3890,39,12.0,85.0,0.654,0.004,0.642,0.04,161.535,1.049,162.457,9.485
laboratory,Personal__Care__and__Consumer__Product__Chemicals__and__Metabolites,5333,19,3.0,80.0,0.034,0.001,0.028,0.013,273.03,0.578,273.363,5.418
