In [1]:
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import numpy as np
from os.path import join
from sklearn.metrics import f1_score, classification_report
import json

In [26]:
datadir = "../data/experiments"
#TODO zmień tutaj ściezke do plików
dataset_names = ["doccano1", "hate_speech", "unhealthy"]
# curr_dataset = "doccano1" # TODO wybierz nazwe datasetu z powyzszej listy
# curr_dataset = "hate_speech"
curr_dataset = "unhealthy"

plot_dir = "../data/experiments"
plot_extension = "pdf"

C_file = f"C_{curr_dataset}.csv"
increment_file = f"increment_{curr_dataset}.csv"
single_file = f"single_{curr_dataset}.csv"
thresholds_file = f"thresholds_{curr_dataset}.csv"

In [27]:
LABEL_COLUMNS = {
#     TODO zmienić nazwy polskie na angielskie
#     "doccano1": ["Pozytywne", "Negatywne", "Radość", "Zachwyt","Inspiruje","Spokój","Zaskoczenie","Współczucie","Strach","Smutek","Wstręt","Złość","Ironiczny","Żenujący","Wulgarny","Polityczny","Interesujący","Zrozumiały","Potrzebuję więcej informacji, aby ocenić ten tekst","Obraża mnie","Może kogoś atakować / obrażać / lekceważyć","Mnie bawi/śmieszy?","Może kogoś bawić?"],
    "doccano1": ["Positive", "Negative", "Joy", "Delight", "Inspiration", "Calm", "Surprise", "Compassion", "Fear", "Sadness", "Repulsion", "Anger", "Ironic", "Embarrassing", "Vulgar", "Political", "Interesting", "Understandable", "Incomprehensible", "Offensive to me", "Offensive to someone", "Funny to me", "Funny to someone"],
    "hate_speech": ["Sentiment","Respect","Insult","Humiliate","Status","Dehumanize","Violence","Genocide","Attack</br></br>Defend","Hate</br></br>Speech"],
    "unhealthy": ["Antagonize","Condescending","Dismissive","Generalisation","Hostile","Sarcastic"],
}

# DOCCANO_SPLITS = {
#     "emocje": [ "Joy", "Delight", "Inspiring", "Calm", "Surprise", "Compassion", "Fear", "Sadness", "Repulsion", "Anger"],
#     "obraźliwość+śmieszność": ["Ironic", "Embarrassing", "Vulgar", "Political", "Interesting", "Understandable", "Incomprehensible", "Offensive to me", "Offensive to someone", "Funny to me", "Funny to someone"],
#     "inne": ["Positive", "Negative"]
# }
# 1. AB - trzeba zmienić nazwę na self-supervised evaluation lub coś w tym stylu:
# - doccano1_AB_macro
# - doccano1_AB_ours
# - hate_speech_AB_macro
# - unhealthy_AB_macro
# - unhealthy_AB_ours
# 2. Increment:
# - doccano1_increment_ours
# - doccano1_increment_specific
# 3. Single-label vs. Multi-label:
# - doccano1_single_macro
# - hate_speech_single_macro
# 4. Threshold:
# - doccano1_threshold_ours
# - doccano1_threshold_specific
# - unhealthy_threshold_ours
# - unhealthy_threshold_specific

In [28]:
def read_data(file):
    df = pd.read_csv(join(datadir, file))
    print(df.columns)
    if "text" in df.columns:
        return df.drop(columns=["text"])
    return df

In [29]:
def convert_column_to_matrix(df, column):
    mapped = map(lambda x: np.fromstring(str(x).replace("]","").replace("[",""), sep=", "), df[column].tolist())
    matrix = np.stack(list(mapped), axis=0)
    return matrix

In [30]:
df_AB = read_data(C_file)
df_increment = read_data(increment_file)
df_single = read_data(single_file)
df_thresholds = read_data(thresholds_file)

Index(['text', 'A_label', 'B_label', 'predicted', 'fold'], dtype='object')
Index(['1', '2', '3', '4', '5', '6', '7', '8', 'original', 'test_fold'], dtype='object')
Index(['labels', 'predictions', 'text', 'fold', 'label'], dtype='object')
Index(['labels', 'predictions', 'text', 'fold', 'threshold'], dtype='object')


In [31]:
def aer(true, preds):
    return ((true + preds) == 0).sum()/true.shape[0]

def aal(true, preds):
    return (((1 - true) + preds) == 0).sum()/true.sum()

def mlral(true, preds):
    dims = true.shape[1]
    lals = [aal(true[:, i], preds[:, i]) for i in range(dims)]
    return np.mean(lals)

def compute_F1(true, preds):
    macro_per_class, score_per_class_1, score_per_class_0 = [], [], []
    for i in range(true.shape[1]):
        score = f1_score(true[:,i], preds[:,i], average="macro")
        score_1 = f1_score(true[:,i], preds[:,i], pos_label=1)
        score_0 = f1_score(true[:,i], preds[:,i], pos_label=0, zero_division=0)
        score_per_class_1.append(score_1)
        score_per_class_0.append(score_0)
        macro_per_class.append(score)
        
    macro_per_class = np.stack(macro_per_class, axis=0)
    macro = macro_per_class.mean()
    score_per_class_1 = np.stack(score_per_class_1, axis=0)
    score_1 = score_per_class_1.mean()
    score_per_class_0 = np.stack(score_per_class_0, axis=0)
    score_0 = score_per_class_0.mean()
    
    score_for_1 = f1_score(true.flatten(), preds.flatten(), pos_label=1)
    score_for_0 = f1_score(true.flatten(), preds.flatten(), pos_label=0, zero_division=0)

    """
    TODO nazwy chciałem zeby były krótkie i troche odzwierciedlały co robią - kolejność jest zachowana według dokumentu .docx
    to gdzie są listy to są dane, które sa per_dim/per_class, a te obok to są uśrednione listy, wyjątkiem są f1_for_1 i f1_for_0 bo to są wymiary po flatten
    
    """
    data = {"macro_per_dim": list(macro_per_class), "macro": macro,
            "f1_for_1":score_for_1, "f1_for_0":score_for_0, "mean_for_1_0": (score_for_1 + score_for_0)/2,
            "1_per_dim":list(score_per_class_1), "mean_for_1":score_1,
            "0_per_dim":list(score_per_class_0), "mean_for_0":score_0,
            "AER": aer(true.flatten(), preds.flatten()), "AAL": aal(true.flatten(), preds.flatten()),
            "MLRAL": mlral(true, preds)}
    return data

In [32]:
per_class_label = "macro_per_dim"
mean_label = "macro"
if mean_label == "macro":
    plot_title_infix = "(macro) "
elif mean_label == "mean_for_1":
    plot_title_infix = "(for 1 label) "
elif mean_label == "mean_for_0":
    plot_title_infix = "(for 0 label) "
    
        
#     Granatowy na strzałkach - 000549
# Czerwony dokument - CC0000
# Zielony dokument - 009900
# różowy - FF0080
neutral1="#000549"
# neutral2="gray"
neutral3="teal"
bad1="#CC0000"
bad2="#FF0080"
# bad3="maroon"
good1="#009900"
# good2="lime"

In [33]:
def get_array(data, key):
    array = np.array([x[key] for _, x in data.items()])
    return array, array.mean(axis=0), array.std(axis=0)

In [34]:
def plot_barplot(fig, means, stds, name, x=None, show=True, color="blue", last_label="all", **barplot_kwargs):
    if x is None:
        x = LABEL_COLUMNS[curr_dataset] + [last_label]
    fig.add_trace(
        go.Bar(
            name=name,
            x=x,
            y=means,
#             error_y=dict(
#                 type="data",
#                 array=stds,
#                 visible=True,
#             ) if stds is not None else None,
            showlegend=show,
            marker_color=color,
            **barplot_kwargs
        )
    ) 
def plot_lineplot(fig, means, stds, name, x=None, show=True, color="blue", **scatterplot_kwargs):
    if x is None:
        x = LABEL_COLUMNS[curr_dataset] + ["all"]
    if color is not None:
        scatterplot_kwargs["marker_color"] = color
    fig.add_trace(
        go.Scatter(
            name=name,
            x=x,
            y=means,
#             error_y=dict(
#                 type="data",
#                 array=stds,
#                 visible=True,
#             ) if stds is not None else None,
            showlegend=show,
#             marker_color=color,
            **scatterplot_kwargs
        )
    )  

In [35]:
def AB_experiment(df, per_class_label = "macro_per_dim", mean_label = "macro"):
    folds_num = df["fold"].max() + 1
    
    f1_AA, f1_BA = {}, {}
    
    for fold in range(folds_num):
        curr_data = df[df["fold"]==fold]
        A_label = convert_column_to_matrix(curr_data, "A_label")
        B_label = convert_column_to_matrix(curr_data, "B_label")
        C_label = convert_column_to_matrix(curr_data, "predicted")
        
        dims = A_label.shape[1]
        
        f1_AA[fold] = compute_F1(A_label, B_label)
        f1_BA[fold] = compute_F1(A_label, C_label)
    
    with open(join(datadir, f"f1_AA_{curr_dataset}.json"), "w") as f:
        json.dump(f1_AA, f)
        
    with open(join(datadir, f"f1_BA_{curr_dataset}.json"), "w") as f:
        json.dump(f1_BA, f)
    
    aa, means_aa, stds_aa = get_array(f1_AA, per_class_label)
    scalar_aa, mean_aa, std_aa = get_array(f1_AA, mean_label)
    
    ba, means_ba, stds_ba = get_array(f1_BA, per_class_label)
    scalar_ba, mean_ba, std_ba = get_array(f1_BA, mean_label)
    
    diff_means = (ba-aa).mean(axis=0)
    diff_mean = (scalar_ba-scalar_aa).mean()
    
    diff_array = np.array(list(diff_means) + [diff_mean])
    
    positive_difference = diff_array.copy()
    positive_difference[positive_difference < 0] = 0
    
    negative_difference = diff_array.copy()
    negative_difference[negative_difference > 0] = 0
    negative_difference *= -1
    
    means_ba = np.array(list(means_ba) + [mean_ba])
    means_aa = np.array(list(means_aa) + [mean_aa])
    
    means_ba = np.min(np.stack([means_aa, means_ba]), axis=0)
    
    fig = go.Figure()
    
    
#     print(means_aa)
#     print(means_ba)
#     print(positive_difference)
#     print(negative_difference)    
    
    last_label = "All labels (F1 macro)"
    
    offset_args = dict(
        offsetgroup=0,
        offset=-1/3,
        width=1/3,
        marker_line=dict(width=0),
    )
    plot_barplot(fig, means_aa, None, "Original train labels", color=neutral1, last_label=last_label, **offset_args)    
    
    offset_args.update(dict(offset=0, offsetgroup=1, yaxis="y2"))
    plot_barplot(fig, means_ba, None, "Train labels predicted by model", last_label=last_label, color=neutral3, **offset_args)
    
    offset_args.update(dict(
#         marker_pattern_shape="x",
        text=[f"+{v:.3f}" if v > 0 else "" for v in positive_difference],
        textposition='outside',
        textfont_color="black",
#         textfont_size=50,
        marker_line=dict(width=0)
    ))
    plot_barplot(fig, positive_difference, None, "Positive difference", color=good1, last_label=last_label, **offset_args)
    
    offset_args.update(dict(
        text=[f"-{v:.3f}" if v > 0 else "" for v in negative_difference],
#         marker_pattern_fillmode="replace",
#         marker_pattern = dict(
#             size=0.1,
#             shape="x",
#             fgcolor=bad2,
#             fillmode="replace",
#             bgcolor="white"
#         ),
#         marker_line=dict(color=bad2, width=1),
    ))
    del offset_args["marker_line"]
    plot_barplot(fig, negative_difference, None, "Negative difference", color=bad2, last_label=last_label, **offset_args)
    
    
    fig.update_layout(
#         title=dict(
#             text=f"F1 per dimension {plot_title_infix}in A-B experiment ({curr_dataset})",
#         ),
        barmode="relative",
        width=2500,
        height=1000,
        xaxis=dict(
            title="Label",
        ),
        yaxis=dict(
            title="F1 macro",
        ),
        font_size=40,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            borderwidth=1,
        ),
        yaxis2=go.layout.YAxis(
            visible=False,
            matches="y",
            overlaying="y",
            anchor="x",
        ),
    )
    
    fig.update_xaxes(
        tickangle=35,
        tickfont=dict(size=40)
    )
    
#     fig.show()
    
    plot_name = join(plot_dir, f"{curr_dataset}_AB_{plot_title_infix[1:-2]}.{plot_extension}")
    fig.write_image(plot_name)
       
    
    plot_data = {}
    for split, ff in (("A", f1_AA), ("B", f1_BA)):
        plot_data[split] = {"means": [], "stds": []}
        for metric in ("AER", "AAL", "MLRAL"):
            _, mean, std = get_array(ff, metric)
            plot_data[split]["means"].append(mean)
            plot_data[split]["stds"].append(std)
    
    fig = go.Figure()
    
#     for split, v in plot_data.items():
    means_diff = np.array(plot_data["A"]["means"]) -  np.array(plot_data["B"]["means"])
#     print(means_diff)
#     stds =  np.array(plot_data["A"]["stds"]) -  np.array(plot_data["B"]["stds"])
#     for split, v in plot_data.items():
#         if split == "A":
#             color = neutral1
#         else:
#             color = neutral2
    offset_args = dict(
        offsetgroup=0,
        offset=-1/3,
        width=1/3,
    )
    plot_barplot(fig, plot_data["A"]["means"], None, "Original train labels", x=("AER", "AAL", "MLRAL"), show=True, color=neutral1, **offset_args)
    
    
    model_loss = plot_data["A"]["means"][1] - plot_data["A"]["means"][0]
    model_benefit = -1*model_loss 
    # offset_args.update(dict(
    #     marker_pattern=dict(
    #         shape="/",
    #         fillmode="replace",
    #     ),
    #     textfont_color="black",
    #     textposition="inside",
    # ))
    # if model_loss > 0:
    #     offset_args["text"] = [f"{model_loss:.3f}"]
    #     plot_barplot(fig, [model_loss], None, "Model loss", x=["AER"], show=True, color=bad1, **offset_args)
    # else:
    #     offset_args["text"] = [f"{model_benefit:.3f}"]
    #     plot_barplot(fig, [model_benefit], None, "MB", x=["AAL"], show=True, color="darkviolet", **offset_args)
    
    
    offset_args.update(dict(offset=0, offsetgroup=1, yaxis="y2"))
    plot_barplot(fig, plot_data["B"]["means"], None, "Train labels predicted by model", x=("AER", "AAL", "MLRAL"), show=True, color=neutral3, **offset_args)
    
    offset_args.update(dict(
#         marker_pattern_shape="x",
        text=[f"-{v:.3f}" for v in means_diff],
        textposition='auto',
        textfont_color="black",
#         marker_pattern_fillmode="replace",
#         marker_line=dict(color=bad2, width=1),
    ))
    plot_barplot(fig, means_diff, None, "Difference", x=("AER", "AAL", "MLRAL"), show=True, color=bad2, **offset_args)
    
    offset_args = dict(
        marker_pattern=dict(
            shape="/",
            fillmode="replace",
        ),
        textfont_color="black",
        textposition="inside",
        offsetgroup=0,
        offset=-1/3,
        width=1/3,
    )
    if model_loss > 0:
        offset_args["text"] = [f"{model_loss:.3f}"]
        plot_barplot(fig, [model_loss], None, "Model loss", x=["AER"], show=True, color=bad1, **offset_args)
    else:
        offset_args["text"] = [f"{model_benefit:.3f}"]
        plot_barplot(fig, [model_benefit], None, "MB", x=["AAL"], show=True, color="darkviolet", **offset_args)
    
    
    
    fig.update_layout(
#         title=dict(
#             text=f"Our metrics in A-B experiment ({curr_dataset})",
#         ),
        barmode="relative",
        width=1000,
        height=700,
        xaxis=dict(
            title="Metric name"
        ),
        yaxis=dict(
            title="Metric value"
        ),
        font=dict(
            size=20,
        ),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            borderwidth=1,
        ),
        yaxis2=go.layout.YAxis(
            visible=False,
            matches="y",
            overlaying="y",
            anchor="x",
        ),
    )
    
#     fig.update_traces(
#         marker_color="#000549"
#     )
    
#     fig.show()
    
    plot_name = join(plot_dir, f"{curr_dataset}_AB_ours.{plot_extension}")
    fig.write_image(plot_name)
    
#     Granatowy na strzałkach - 000549
# Czerwony dokument - CC0000
# Zielony dokument - 009900
# różowy - FF0080
    

In [21]:
AB_experiment(df_AB,  per_class_label, mean_label)

In [36]:
def increment_experiment(df, per_class_label = "macro_per_dim", mean_label = "macro"):
    folds_num = df["test_fold"].max() + 1
    
    f1_data = {} 
    
    for fold in range(folds_num):
        curr_data = df[df["test_fold"]==fold]
        original = convert_column_to_matrix(curr_data, "original")
        
        for i in range(1,9):
            curr_matrix = convert_column_to_matrix(curr_data, str(i))
            if f"train_{i}" not in f1_data:
                f1_data[f"train_{i}"] = {}
            f1_data[f"train_{i}"][fold] = compute_F1(original, curr_matrix)

    
    with open(join(datadir, f"f1_increment_{curr_dataset}.json"), "w") as f:
        json.dump(f1_data, f)
    
    
    plot_data = {}
    for metric in ("f1_for_0", "f1_for_1"):
        plot_data[metric] = {"means": [], "stds": []}
        for i in range(1,9):
            _, mean, std = get_array(f1_data[f"train_{i}"], metric)
            plot_data[metric]["means"].append(mean)
            plot_data[metric]["stds"].append(std)
            
    fig = go.Figure()
    
    for split, v in plot_data.items():
        if split == "f1_for_1":
            name = "class 1"
            color = neutral1
        else:
            name = "class 0"
            color = neutral3
        plot_lineplot(fig, v["means"], v["stds"], name, x=[x for x in range(1, 9)], show=True, color=color)
    
    fig.update_layout(
#         title=dict(
#             text=f"F1 for specific labels in increment experiment ({curr_dataset})",
#         ),
        width=1000,
        height=500,
        xaxis=dict(
            title="Amount of training folds"
        ),
        yaxis=dict(
            title="F1 score"
        ),
        font=dict(
            size=20
        ),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            borderwidth=1,
        ),
    )
    
#     fig.show()
    
    plot_name = join(plot_dir, f"{curr_dataset}_increment_specific.{plot_extension}")
    fig.write_image(plot_name)
    
    
    plot_data = {}
    for metric in  ("AER", "AAL", "MLRAL"):
        plot_data[metric] = {"means": [], "stds": []}
        for i in range(1,9):
            _, mean, std = get_array(f1_data[f"train_{i}"], metric)
            plot_data[metric]["means"].append(mean)
            plot_data[metric]["stds"].append(std)
    
    fig = go.Figure()
    
    args = dict(
        fill="toself",
        fillpattern=dict(
            fgopacity=0.5,
            fgcolor="darkviolet",
            fillmode="replace",
            shape="/"
        ),
        marker=dict(
            color="rgb(255,255,255,0.1)",
        ),
        line=dict(
#             width=0,
            color="rgb(255,255,255,0.1)",
        ),
    )
    plot_lineplot(fig, plot_data["AER"]["means"]+list(reversed(plot_data["AAL"]["means"])), None, "MB", x=[x for x in range(1, 9)] + [x for x in range(8, 0, -1)], show=True, color=None, **args)
    
# #     for split, v in plot_data.items():
#     means = np.array(plot_data["A"]["means"]) -  np.array(plot_data["B"]["means"])
#     stds =  np.array(plot_data["A"]["stds"]) -  np.array(plot_data["B"]["stds"])
    for split, v in plot_data.items():
        if split == "AER":
            color = neutral1
        elif split == "AAL":
            color = neutral3
        else:
            color = bad2
        plot_lineplot(fig, v["means"], None, split, x=[x for x in range(1, 9)], show=True, color=color)
    
    
    fig.update_layout(
        width=1000,
        height=500,
        xaxis=dict(
            title="Amount of training folds"
        ),
        yaxis=dict(
            title="Metric value"
        ),
        font=dict(
            size=20
        ),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            borderwidth=1,
            traceorder="reversed"
        ),
    )
    
#     fig.show()
    
    plot_name = join(plot_dir, f"{curr_dataset}_increment_ours.{plot_extension}")
    fig.write_image(plot_name)
    
        

In [23]:
increment_experiment(df_increment, per_class_label, mean_label)

In [37]:
def threshold_experiment(df, per_class_label = "macro_per_dim", mean_label = "macro", save=True):
    folds_num = df["fold"].max() + 1
    
    f1_data = {} 
    thresholds = df["threshold"].unique()
    for fold in range(folds_num):
        curr_data = df[df["fold"]==fold]
        for th in thresholds:
            curr_data_thresh = curr_data[curr_data["threshold"] == th]
            curr_matrix = convert_column_to_matrix(curr_data_thresh,"predictions")
            original = convert_column_to_matrix(curr_data_thresh, "labels")
            if f"th_{th}" not in f1_data:
                f1_data[f"th_{th}"] = {}
            f1_data[f"th_{th}"][fold] = compute_F1(original, curr_matrix)

    
    with open(join(datadir, f"f1_threshold_{curr_dataset}.json"), "w") as f:
        json.dump(f1_data, f)

    if save:
        plot_data = {}
        for metric in ("f1_for_0", "f1_for_1"):
            plot_data[metric] = {"means": [], "stds": []}
            for i in thresholds:
                _, mean, std = get_array(f1_data[f"th_{i}"], metric)
                plot_data[metric]["means"].append(mean)
                plot_data[metric]["stds"].append(std)

        fig = go.Figure()

        for split, v in plot_data.items():
            if split == "f1_for_1":
                name = "class 1"
                color = neutral1
            else:
                name = "class 0"
                color = neutral3
            plot_lineplot(fig, v["means"], v["stds"], name, x=[str(th) for th in thresholds], show=True, color=color)


        fig.update_layout(
    #         title=dict(
    #             text=f"F1 for specific labels in threshold experiment ({curr_dataset})",
    #         ),
            width=1000,
            height=500,
            xaxis=dict(
                title="Threshold value",
            ),
            yaxis=dict(
                title="F1 score",
            ),
            font_size=20,
            legend=dict(
                orientation="h",
                yanchor="bottom",
                y=1.02,
                xanchor="right",
                x=1,
                borderwidth=1,
            ),
        )

    #     fig.show()

        plot_name = join(plot_dir, f"{curr_dataset}_threshold_specific.{plot_extension}")
        fig.write_image(plot_name)

        plot_data = {}
        for metric in  ("AER", "AAL", "MLRAL"):
            plot_data[metric] = {"means": [], "stds": []}
            for i in thresholds:
                _, mean, std = get_array(f1_data[f"th_{i}"], metric)
                plot_data[metric]["means"].append(mean)
                plot_data[metric]["stds"].append(std)

        fig = go.Figure()

    # #     for split, v in plot_data.items():
    #     means = np.array(plot_data["A"]["means"]) -  np.array(plot_data["B"]["means"])
    #     stds =  np.array(plot_data["A"]["stds"]) -  np.array(plot_data["B"]["stds"])
    
        args = dict(
            fill="toself",
            fillpattern=dict(
                fgopacity=0.5,
                fgcolor="darkviolet",
                fillmode="replace",
                shape="/"
            ),
            marker=dict(
                color="rgb(255,255,255,0.1)",
            ),
            line=dict(
                color="rgb(255,255,255,0.1)",
            ),
        )
        xs = [str(x) for x in (0.1, 0.15, 0.2, 0.25)] + [str(x) for x in (0.25, 0.2, 0.15, 0.1)]
        plot_lineplot(fig, plot_data["AER"]["means"]+list(reversed(plot_data["AAL"]["means"])), None, "MB", x=xs, show=True, color=None, **args)
    
    
        for split, v in plot_data.items():
            if split == "AER":
                color = neutral1
            elif split == "AAL":
                color = neutral3
            else:
                color = bad2
            plot_lineplot(fig, v["means"], v["stds"], split, x=[str(x) for x in (0.1, 0.15, 0.2, 0.25)], show=True, color=color)

        fig.update_layout(
    #         title=dict(
    #             text=f"Our metrics in threshold experiment ({curr_dataset})",
    #         ),
            width=1000,
            height=500,
            xaxis=dict(
                title="Threshold value",
            ),
            yaxis=dict(
                title="Metric value",
            ),
            font_size=20,
            legend=dict(
                orientation="h",
                yanchor="bottom",
                y=1.02,
                xanchor="right",
                x=1,
                borderwidth=1,
                traceorder="reversed",
            ),
        )

    #     fig.show()

        plot_name = join(plot_dir, f"{curr_dataset}_threshold_ours.{plot_extension}")
        fig.write_image(plot_name)

    
    return f1_data
        

In [38]:
# for k in DOCCANO_SPLITS.keys(): 
f1_thresholds = threshold_experiment(df_thresholds, per_class_label, mean_label, save=True)

In [825]:
def single_training_experiment(df, f1_thresholds, per_class_label = "macro_per_dim", mean_label = "macro"):
    folds_num = df["fold"].max() + 1
    
    f1_data = {} 
    labels = df["label"].unique()
    for fold in range(folds_num):
        curr_data = df[df["fold"]==fold]
        for label in labels:
            curr_data_thresh = curr_data[curr_data["label"] == label]
            curr_matrix = convert_column_to_matrix(curr_data_thresh, "predictions")
            original = convert_column_to_matrix(curr_data_thresh, "labels")
            if label not in f1_data:
                f1_data[label] = {}
            f1_data[label][fold] = compute_F1(original, curr_matrix)

    
    with open(join(datadir, f"f1_single_training_{curr_dataset}.json"), "w") as f:
        json.dump(f1_data, f)

    plot_data = {}
    for label in labels:
        _, means, stds = get_array(f1_data[label], per_class_label)
        # _, mean, std = get_array(f1_data[label], mean_label)
        plot_data[label] = (list(means), list(stds))
        
    _, means, stds = get_array(f1_thresholds["th_0.15"], per_class_label)
    _, mean, std = get_array(f1_thresholds["th_0.15"], mean_label)
    all_data = (list(means)+[mean], list(stds)+[std])
        
    fig = go.Figure()
    
    single_means = []
    single_stds = []
    for i, (means, std) in plot_data.items():
        single_means.append(means[0])
        single_stds.append(std[0])
        
    single_means.append(np.mean(single_means))
    single_stds.append(np.std(single_means))
        
    plot_barplot(fig, single_means, single_stds, "Single-label", show=True, color=neutral1, last_label="All labels (Average)")
    plot_barplot(fig, all_data[0], all_data[1], "Multi-label", show=True, color=neutral3, last_label="All labels (Average)")
        
    
    fig.update_layout(
        width=2200,
        height=1000,
        xaxis=dict(
            title="Label"
        ),
        yaxis=dict(
            title="F1 macro"
        ),
        font_size=40,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            borderwidth=1,
        )
    )
    
    fig.update_xaxes(
        tickangle=35,
        tickfont=dict(size=40)
    )
    
    plot_name = join(plot_dir, f"{curr_dataset}_single_{plot_title_infix[1:-2]}.{plot_extension}")
    fig.write_image(plot_name)
    

In [826]:
single_training_experiment(df_single, f1_thresholds, per_class_label, mean_label)