In [None]:
from IPython.display import display
from pathlib import Path
import json

import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
import seaborn

In [None]:
# files
plot_dir = Path("plots")
if not plot_dir.exists():
    plot_dir.mkdir()

experiment1_f = Path("training_logs/multihead_finetuning_v4/checkpoint-960/trainer_state.json")
experiment2_f = Path("training_logs/multihead_finetuning_v4/checkpoint-864/trainer_state.json")

experiment1_val_f = Path("results/multihead_finetuning_v1/validation.csv")
experiment1_test_f = Path("results/multihead_finetuning_v1/test.csv")

baseline_val_f = Path("results/baseline/validation.csv")
baseline_test_f = Path("results/baseline/test.csv")

In [None]:
# fileloader
baseline_val_df = pd.read_csv(baseline_val_f, index_col=0)
baseline_test_df = pd.read_csv(baseline_test_f, index_col=0)

experiment1_val_df = pd.read_csv(experiment1_val_f, index_col=0)
experiment1_test_df = pd.read_csv(experiment1_test_f, index_col=0)

def load_experiment(experiment_file) -> pd.DataFrame:
    with open(experiment_file, "r") as file:
        experiment_df = pd.DataFrame(json.load(file)["log_history"]).set_index("epoch")
        # contains 2 entries per epoch
        extra_cols = ["grad_norm", "learning_rate", "loss", "step"]
        train_df = experiment_df[extra_cols].dropna()
        experiment_df = experiment_df[experiment_df["loss"].isna()].drop(["grad_norm", "learning_rate", "loss", "step"], axis=1)
        experiment_df = pd.concat([train_df, experiment_df], axis=1)
        return experiment_df

experiment1_df = load_experiment(experiment1_f)
experiment2_df = load_experiment(experiment2_f)

In [None]:
# display dataframes
display(experiment1_df)
display(baseline_val_df)
display(baseline_test_df)

In [None]:
def format_df(metric: str, experiments: list[pd.DataFrame], experiment_names: list[str], prefixes: list[str] | str = "eval_") -> tuple[pd.DataFrame, str]:
    parsed_metric = metric.replace("_", " ").title()
    if isinstance(prefixes, list):
        for i, prefix, name, exp in zip(range(len(experiments)), prefixes, experiment_names, experiments):
            cur_metric = f"{prefix}{metric}"
            experiments[i] = exp.rename({cur_metric: parsed_metric}, axis=1)
            experiments[i]["Experiment"] = name.replace("_", " ").title()

    else:
        for i, name, exp in zip(range(len(experiments)), experiment_names, experiments):
            cur_metric = f"{prefixes}{metric}"
            experiments[i] = exp.rename({cur_metric: parsed_metric}, axis=1)
            experiments[i]["Experiment"] = name.replace("_", " ").title()
    
    formatted_df = pd.concat(experiments, axis=0)
    formatted_df.index.rename("Epoch", inplace=True)
    return formatted_df, parsed_metric

In [None]:
# plot creation
matplotlib.rcParams['figure.dpi'] = 200

BASELINE_COLOR = "black"
CONTEXT = "poster"
FIGSIZE = (11, 7)

def plot_training_progress(metric: str, experiments: list[pd.DataFrame], experiment_names: list[str], prefixes: list[str] | str = "eval_", baseline: pd.DataFrame = None):
    formatted_df, metric_name = format_df(metric, experiments, experiment_names, prefixes)
    formatted_df = formatted_df[[metric_name, "Experiment"]]
    
    plt.figure(figsize=FIGSIZE)

    seaborn.lineplot(formatted_df, x=formatted_df.index, y=metric_name, hue="Experiment")

    handles, labels = plt.gca().get_legend_handles_labels()

    if not baseline is None:
        plt.hlines(baseline[f"validation_{metric}"].iloc[0], xmin=0, xmax=max(formatted_df.index), colors=[BASELINE_COLOR])
        patch = matplotlib.patches.Patch(color=BASELINE_COLOR, linewidth=1.0, label='Baseline')
        handles.append(patch) 
    
    plt.legend(handles=handles, title="Experiment")
    plt.ylim(0, 1)
    plt.title("Training Progress")
    seaborn.set_theme(context=CONTEXT, palette="pastel")
    seaborn.despine(top=True, left=True, bottom=True, right=True)
    seaborn.set_style("whitegrid", rc={"c": (0.95, 0.95, 0.95)})

    plt.show()

def plot_training_metrics(metrics: list[str], experiment: pd.DataFrame, experiment_name: str, prefix: str = "eval_"):
    formatted_df = pd.DataFrame(columns=["Value", "Metric"])
    for metric in metrics:
        df, metric_name = format_df(metric, [experiment], [metric], prefix)
        formatted_df = pd.concat([formatted_df, df.rename({metric_name: "Value", "Experiment": "Metric"}, axis=1)[["Value", "Metric"]]])
    formatted_df.index.rename("Epoch", inplace=True)

    plt.figure(figsize=FIGSIZE)

    seaborn.lineplot(formatted_df, x=formatted_df.index, y="Value", hue="Metric")
    
    plt.ylim(0, 1)
    plt.title(experiment_name)
    seaborn.set_theme(context=CONTEXT, palette="pastel")
    seaborn.despine(top=True, left=True, bottom=True, right=True)
    seaborn.set_style("whitegrid", rc={"c": (0.95, 0.95, 0.95)})

    plt.show()

def plot_result_metrics(metrics: list[str], experiments: list[pd.DataFrame], experiment_names: list[str], prefixes: list[str] | str = "eval_"):
    formatted_df = pd.DataFrame(columns=["Value", "Metric", "Experiment"])
    for metric in metrics:
        df, metric_name = format_df(metric, experiments, experiment_names, prefixes)
        df["Metric"] = metric_name
        formatted_df = pd.concat([formatted_df, df.rename({metric_name: "Value"}, axis=1)[["Value", "Metric", "Experiment"]]])

    plt.figure(figsize=FIGSIZE)

    seaborn.catplot(formatted_df, x="Experiment", y="Value", col="Metric", hue="Experiment", kind="bar")
    
    plt.ylim(0, 1)
    #plt.title("Results")
    seaborn.set_theme(context=CONTEXT, palette="pastel")
    seaborn.despine(top=True, left=True, bottom=True, right=True)
    seaborn.set_style("whitegrid", rc={"c": (0.95, 0.95, 0.95)})

    plt.show()

In [None]:
metric = "f1_macro"
experiments = [experiment1_df, experiment2_df]
experiment_names = ["Test 1", "Test 2"]
prefixes = ["eval_", "eval_"]

plot_training_progress(metric, experiments, experiment_names, prefixes, baseline=baseline_val_df)

In [None]:
metrics = ["f1_macro", "f1_micro"]
experiment = experiment1_df
experiment_name = "Test 1"
prefix = "eval_"

plot_training_metrics(metrics, experiment, experiment_name, prefix)

In [None]:
metrics = ["f1_macro", "f1_micro", "f1_weighted"]
experiments = [experiment1_test_df, experiment1_val_df, baseline_test_df]
experiment_names = ["Test 1", "Val 1", "Baseline"]
prefixes = ["test_", "validation_", "test_"]

plot_result_metrics(metrics, experiments, experiment_names, prefixes)