In [159]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import os
import numpy as np

In [160]:
named_dataset = {}
for filename in os.listdir():
    if filename.endswith(".csv"):
        named_dataset[filename.split(".csv")[0]] = pd.read_csv(filename)

for method in named_dataset:
    named_dataset[method]["method"] = [method] * len(named_dataset[method])

# Concatenate the dataframes : 
df = None
for method in named_dataset:
    if df is None: df = named_dataset[method]
    else: df = pd.concat((df, named_dataset[method]))

df = df.drop(["Unnamed: 0"], axis = 1)
df["model"] = df["filename"].apply(lambda x : '-'.join(x.split("-")[3:-3]))
df["lr"] = df["filename"].apply(lambda x : '-'.join(x.split("-")[-3:-1]))
df["lr"] = df["lr"].astype(float)

### Meilleur score par modèle

In [161]:
df_bar = df.groupby(["model", "method"], as_index=False).agg(
    best_f1_macro = ("f1_macro", lambda col : max(col))
)

fig = go.Figure()
fig.update_layout({
    "title" : {
        "text" : ("Meilleur modèle (tout epoch, lr, n_samples confondus)<br>"
                  "les classifieurs sont optimisés par rapport aux HP, sauf basicML")
    },
    "xaxis" : {"title" : {"text" : "Modèle"}},
    "yaxis" : {"title" : {"text" : "Modèle"}}
})
for method, sub_df_bar in df_bar.groupby("method"):
    fig.add_trace(
        go.Bar(
            x = sub_df_bar["model"],
            y = sub_df_bar["best_f1_macro"],
            name = method
        )
    )
fig

In [162]:
colors = [
    {"r" : 80   , "g" : 82  , "b" : 249 },
    {"r" : 232  , "g" : 61  , "b" : 45  },
    {"r" : 56   , "g" : 196 , "b" : 132 },
    {"r" : 153  , "g" : 67  , "b" : 248 },
    {"r" : 244  , "g" : 144 , "b" : 72  }
]

In [163]:
from scipy.stats import t
alpha = 1 - 0.9

df_lr = df.groupby(["model","lr"], as_index=False).agg(
    mean_f1_macro = ("f1_macro", lambda x : np.mean(x)),
    CI_f1_macro = ("f1_macro", lambda x : (
        t.ppf(1-alpha/2,len(x) - 1) * np.std(x) /\
        np.sqrt(len(x))
    ))
)
df_lr["CI_f1_macro_upper"] = df_lr["mean_f1_macro"] + df_lr["CI_f1_macro"]
df_lr["CI_f1_macro_lower"] = df_lr["mean_f1_macro"] - df_lr["CI_f1_macro"]

fig = go.Figure(layout = {
    "title" : {"text" : ("Impact du learning rate sur le f1_score (tout "
                         "classifieur, n_samples et epoch confondues)<br>"
                         "les classifieurs sont optimisés par rapport aux HP, sauf basicML")},
    "xaxis" : {"title" : {"text" : "Epoch"}, "type":"log"},
    "yaxis" : {"title" : {"text" : "F1-Macro (moyenne + CI)"}}
})
for idx, (model_name, sub_df) in enumerate(df_lr.groupby("model")):
    fig.add_trace(
        go.Scatter(
            x = sub_df["lr"],
            y = sub_df["mean_f1_macro"],
            line = {"color" : "rgb({r},{g},{b})".format(**colors[idx])},
            name = model_name
        )
    )
    fig.add_trace(
        go.Scatter(
            x= [*sub_df["lr"],*sub_df["lr"][::-1]],
            y= [*sub_df["CI_f1_macro_upper"], *sub_df["CI_f1_macro_lower"][::-1]],
            fill='toself',
            fillcolor='rgba({r},{g},{b},0.35)'.format(**colors[idx]),
            line=dict(color='rgba(0,0,0,0)'),
            hoverinfo="skip",
            showlegend=False
        )
    )

fig

In [164]:
from scipy.stats import t
alpha = 1 - 0.9

df_lr = df.groupby(["model","lr", "method"], as_index=False).\
    agg(
        best_f1 = ("f1_macro", lambda x : max(x)),
    ).\
    groupby(["model","lr"], as_index=False).\
    agg(
        mean_f1_macro = ("best_f1", lambda x : np.mean(x)),
        CI_f1_macro_upper = ("best_f1", lambda x : max(x)),
        CI_f1_macro_lower = ("best_f1", lambda x : min(x))
    )

fig = go.Figure(layout = {
    "title" : {"text" : ("Impact du learning rate sur le f1_score pour la "
                         "meilleure epoch (tout classifieur, n_samples"
                         "confondus)<br>"
                         "les classifieurs sont optimisés par rapport aux HP, sauf basicML<br>"
                         "/!\\ intervales de confiance cheloues"
                         )},
    "xaxis" : {"title" : {"text" : "Epoch"}, "type":"log"},
    "yaxis" : {"title" : {"text" : "F1-Macro (moyenne + bande min-max)"}}
})
for idx, (model_name, sub_df) in enumerate(df_lr.groupby("model")):
    fig.add_trace(
        go.Scatter(
            x = sub_df["lr"],
            y = sub_df["mean_f1_macro"],
            line = {"color" : "rgb({r},{g},{b})".format(**colors[idx])},
            name = model_name
        )
    )
    fig.add_trace(
        go.Scatter(
            x= [*sub_df["lr"],*sub_df["lr"][::-1]],
            y= [*sub_df["CI_f1_macro_upper"], *sub_df["CI_f1_macro_lower"][::-1]],
            fill='toself',
            fillcolor='rgba({r},{g},{b},0.35)'.format(**colors[idx]),
            line=dict(color='rgba(0,0,0,0)'),
            hoverinfo="skip",
            showlegend=False
        )
    )

fig

In [165]:
fig = go.Figure(layout = {
    "title" : {"text" : ("Distribution des f1_macro en fonction des classifieurs"
                         " (tout modèle, epoch et n_samples confondus)<br>"
                         "les classifieurs sont optimisés par rapport aux HP, sauf basicML")},
    "xaxis" : {"title" : {"text" : "f1_macro"}},
    "yaxis" : {"title" : {"text" : "Distribution (%)"}}
})

for method, sub_df in df.groupby("method"): 
    v,x = np.histogram(sub_df["f1_macro"],[0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7], density=True)
    fig.add_trace(go.Scatter(x = x,y = v,name = method))

fig

In [166]:
df_bar = df.groupby(["method", "n_samples"], as_index = False).agg(
    best_f1 = ("f1_macro", lambda x : max(x))
)

fig = go.Figure(layout = {
    "title" : {"text": ("Meilleur f1_macro en fonction du classifieur et du "
                        "n_samples (tout modèle, epoch "
                        "confondus)<br>"
                        "les classifieurs sont optimisés par rapport aux HP, sauf basicML")}
})
for n_sample, sub_df in df_bar.groupby("n_samples"):
    fig.add_trace(go.Bar(x = sub_df["method"], y = sub_df["best_f1"], name = n_sample))
fig