In [11]:
import os
import sys
import warnings
from pprint import pprint

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import wandb

project_root = os.path.abspath("..")
if project_root not in sys.path:
    sys.path.append(project_root)

from ppm.wandb_utils import fetch_experiments

warnings.filterwarnings("ignore")

os.environ.setdefault("ENTITY", "privajet-university-of-mannheim")
entity = os.environ["ENTITY"]
os.environ["WANDB_MODE"] = "offline"

print("CWD:", os.getcwd())
print("project_root in sys.path:", project_root in sys.path)
print("ENTITY:", entity)
print("WANDB_MODE:", os.environ.get("WANDB_MODE"))

CWD: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results
project_root in sys.path: True
ENTITY: privajet-university-of-mannheim
WANDB_MODE: offline


In [12]:
output_dir_csv = "/ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv"
output_dir_plots = "/ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots"
os.makedirs(output_dir_csv, exist_ok=True)
os.makedirs(output_dir_plots, exist_ok=True)

In [13]:
# Display all lines pandas
pd.set_option("display.max_rows", None)

mpl.rcParams.update({
    "figure.figsize": (6, 4),          
    "font.size": 10,                   
    "axes.labelsize": 10,              
    "axes.titlesize": 10,              
    "legend.fontsize": 9,              
    "xtick.labelsize": 9,              
    "ytick.labelsize": 9,
    "lines.linewidth": 1.5,            
    "lines.markersize": 5,             
    "axes.grid": True,                 
    "grid.linestyle": "--",
    "grid.linewidth": 0.5,
    "legend.frameon": False,           
    "pdf.fonttype": 42,                
    "ps.fonttype": 42,
    "savefig.bbox": "tight",           
    "savefig.dpi": 300,                
})

colors = [
    "#9467bd",
    "#2ca02c",
    "#bcbd22",
    "#7f7f7f",
    "#e377c2",
    "#8c564b",
    "#d62728",
    "#17becf",
    "#1f77b4",
    "#ff7f0e",
]

plt.rcParams["axes.prop_cycle"] = plt.cycler(color=colors)  

In [16]:
properties = pd.read_csv(
    "/ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/log_properties.csv"
)
properties

Unnamed: 0,Log,# cases,# evt.,# act.,Trace length
0,BPI20PrepaidTravelCosts,2099,18246,29,8.6927±2.3
1,BPI20RequestForPayment,6886,36796,19,5.3436±1.5
2,BPI20TravelPermitData,7065,86581,51,12.2549±5.6
3,BPI12,13087,262200,24,20.0351±19.9
4,BPI17,31509,1202267,26,38.1563±16.7


In [5]:
from ppm.models import NextEventPredictor
import torch 

use_cuda = torch.cuda.is_available()
device = "cuda" if use_cuda else "cpu"
print("Using device:", device)

rnn_example = NextEventPredictor(
    embedding_size=32,
    categorical_cols=["activity"],
    numerical_cols=["accumulated_time"],
    categorical_sizes={"activity": 20},
    categorical_targets=["activity"],
    numerical_targets=["remaining_time"],
    backbone_name="rnn",
    backbone_hidden_size=64,
    backbone_n_layers=2,
    padding_idx=0,
    strategy="sum",
    backbone_pretrained=False,
    backbone_finetuning=None,
    backbone_type="lstm",
    device=device,
)
pprint(rnn_example)

KeyboardInterrupt: 

In [17]:
def _to_int_or_none(x):
    if x is None or (isinstance(x, float) and pd.isna(x)):
        return None
    try:
        return int(x)
    except Exception:
        return None

def map_setting(row):
    ft = row.get("fine_tuning")
    k_raw = row.get("few_shot_k", None)
    fl    = row.get("freeze_layers", None)
    ep_raw = row.get("epochs", None)

    k  = _to_int_or_none(k_raw)
    ep = _to_int_or_none(ep_raw)

    # LoRA Few-Shot
    if ft == "lora" and k == 8:
        return "FewShot-LoRA"

    # LoRA Full
    if ft == "lora" and k is None:
        return "LoRA"

    # Zero-Shot (epochs = 0)
    if ft == "freeze" and ep == 0:
        return "ZeroShot"

    # Freezing Few-Shot
    if ft == "freeze" and k == 8:
        return "FewShot-Freezing"

    # Freezing standard (keine freeze_layers angegeben)
    if ft == "freeze" and fl in (None, "", [], ()):
        return "Freezing"

    # Freezing layer configs (z.B. -1, -2 / 0, 1)
    if ft == "freeze" and fl is not None:
        if isinstance(fl, (list, tuple)):
            fl_clean = [_to_int_or_none(x) for x in fl]
        else:
            tokens = str(fl).replace("[", "").replace("]", "").replace(",", " ").split()
            fl_clean = [_to_int_or_none(x) for x in tokens]
        fl_clean = [x for x in fl_clean if x is not None]
        return f"Freezing-{fl_clean}"

    return "Other"

In [18]:
pkl_path = os.path.join(output_dir_csv, "global_results.pkl")

BACKBONE_PROJECTS = {
    "majority":         "llm-peft-ppm_majority_baseline",
    "rnn":              "llm-peft-ppm_rnn",
    "transformer":      "llm-peft-ppm_transformer_baseline",
    "tabpfn":           "llm-peft-ppm_tabpfn_baseline",
    "saprpt":           "llm-peft-ppm_saprpt_baseline",
    "gpt2":             "llm-peft-ppm_gpt2",
    "gptneo-1b3":       "llm-peft-ppm_gpt-neo-1.3B",
    "qwen25-05b":       "llm-peft-ppm_qwen25-05b",
    "llama32-1b":       "llm-peft-ppm_llama32-1b",
    "gemma-2-2b":       "llm-peft-ppm_gemma-2-2b",
}

def build_global_results():
    all_results = []
    for backbone, project_name in BACKBONE_PROJECTS.items():
        df_tmp = fetch_experiments(project=project_name, entity=entity, include_metrics=True)
        df_tmp["backbone"] = backbone
        df_tmp["project"] = project_name
        all_results.append(df_tmp)

    gr = pd.concat(all_results, ignore_index=True)

    safe_cols = [
        "id", "log", "backbone", "project", "fine_tuning",
        "total_params", "trainable_params", "seed", "_runtime", "_timestamp",
        "categorical_features", "categorical_targets",
        "continuous_features", "continuous_targets", "device", "model", "name",

        "test_next_activity_acc",
        "test_next_activity_loss",
        "test_next_remaining_time_loss",
        "test_next_time_to_next_event_loss",
        "best_test_next_activity_acc",
        "best_test_next_activity_loss",
        "best_test_next_remaining_time_loss",
        "best_test_next_time_to_next_event_loss",

        "batch_size",
        "embedding_size",
        "epochs",
        "freeze_layers",
        "grad_clip",
        "hidden_size",
        "lr",
        "n_layers",
        "rnn_type",
        "strategy",
        "weight_decay",
        "lora_alpha",
        "r",
        "few_shot_k",
    ]

    safe_cols = [c for c in safe_cols if c in gr.columns]
    gr = gr[safe_cols]
    return gr


if os.path.exists(pkl_path):
    try:
        global_results = pd.read_pickle(pkl_path)
    except Exception as e:
        print("Fehler beim Laden von global_results.pkl, baue neu:", repr(e))
        global_results = build_global_results()
        global_results.to_pickle(pkl_path)
else:
    global_results = build_global_results()
    global_results.to_pickle(pkl_path)

In [19]:
cols = [
    "id",
    "log",
    "backbone",
    "project",
    "fine_tuning",
    "total_params",
    "trainable_params",
    "test_next_activity_acc",
    "test_next_activity_loss",
    "test_next_remaining_time_loss",
    "test_next_time_to_next_event_loss",
    "best_test_next_activity_acc",
    "best_test_next_activity_loss",
    "best_test_next_remaining_time_loss",
    "best_test_next_time_to_next_event_loss",
    "_runtime",
    "mt_score",
]

df = global_results.copy()
df = df[
    df["test_next_activity_acc"].notna()
    & df["test_next_remaining_time_loss"].notna()
    & df["test_next_time_to_next_event_loss"].notna()
].copy()

sc_acc = MinMaxScaler()
sc_rt  = MinMaxScaler()
sc_nt  = MinMaxScaler()

df["na_norm"] = sc_acc.fit_transform(df[["test_next_activity_acc"]])
df["rt_norm"] = sc_rt.fit_transform(-df[["test_next_remaining_time_loss"]])
df["nt_norm"] = sc_nt.fit_transform(-df[["test_next_time_to_next_event_loss"]])
df["mt_score"] = df["na_norm"] + df["rt_norm"] + df["nt_norm"]

df.head(10)

Unnamed: 0,id,log,backbone,project,fine_tuning,total_params,trainable_params,seed,_runtime,_timestamp,...,rnn_type,strategy,weight_decay,lora_alpha,r,few_shot_k,na_norm,rt_norm,nt_norm,mt_score
0,pox3cg0n,BPI20PrepaidTravelCosts,majority,llm-peft-ppm_majority_baseline,,1.0,1.0,,1.127169,1762725000.0,...,lstm,concat,0.1,,,,0.130048,0.944634,0.940268,2.01495
1,whfyo8uu,BPI12,majority,llm-peft-ppm_majority_baseline,,1.0,1.0,,35.60098,1762849000.0,...,lstm,concat,0.1,,,,0.252459,0.955857,0.943034,2.15135
2,oy378knj,BPI20TravelPermitData,majority,llm-peft-ppm_majority_baseline,,1.0,1.0,,23.808587,1762850000.0,...,lstm,concat,0.1,,,,0.087553,0.951915,0.961259,2.000726
3,3gchqw9a,BPI20RequestForPayment,majority,llm-peft-ppm_majority_baseline,,1.0,1.0,,21.075565,1762850000.0,...,lstm,concat,0.1,,,,0.188769,0.968881,0.975401,2.133051
4,cl197f3r,BPI17,majority,llm-peft-ppm_majority_baseline,,1.0,1.0,,107.474805,1762850000.0,...,lstm,concat,0.1,,,,0.170084,0.972109,0.963413,2.105607
5,vl6y1axq,BPI12,rnn,llm-peft-ppm_rnn,,88733.0,88733.0,41.0,64.434233,1764079000.0,...,lstm,sum,0.01,,,,0.898347,0.890896,0.959974,2.749217
6,wwv4s6ta,BPI12,rnn,llm-peft-ppm_rnn,,88733.0,88733.0,42.0,65.57315,1764080000.0,...,lstm,sum,0.01,,,,0.866947,0.885115,0.957324,2.709386
7,xjcuy92g,BPI12,rnn,llm-peft-ppm_rnn,,88733.0,88733.0,43.0,66.748843,1764080000.0,...,lstm,sum,0.01,,,,0.883914,0.882064,0.956911,2.722889
8,r5ax60vx,BPI12,rnn,llm-peft-ppm_rnn,,88733.0,88733.0,44.0,60.193704,1764080000.0,...,lstm,sum,0.01,,,,0.876448,0.886308,0.959447,2.722204
9,s5xrzrsn,BPI12,rnn,llm-peft-ppm_rnn,,88733.0,88733.0,45.0,68.910497,1764080000.0,...,lstm,sum,0.01,,,,0.904076,0.889049,0.956773,2.749898


In [20]:
METRICS = [
    "test_next_activity_acc",
    "test_next_activity_loss",
    "test_next_remaining_time_loss",
    "test_next_time_to_next_event_loss",
    "best_test_next_activity_acc",
    "best_test_next_activity_loss",
    "best_test_next_remaining_time_loss",
    "best_test_next_time_to_next_event_loss",
]

def agg_over_seeds(group: pd.DataFrame) -> pd.Series:
    out = {"n_runs": len(group)}
    if "mt_score" in group.columns:
        out["mt_score_mean"] = group["mt_score"].mean()
        out["mt_score_std"] = group["mt_score"].std()
    for m in METRICS:
        if m in group.columns:
            vals = group[m].dropna()
            out[m + "_mean"] = vals.mean()
            out[m + "_std"] = vals.std()
    return pd.Series(out)

In [21]:
majority = df[df["backbone"] == "majority"].copy()
majority_grouped = (
    majority
    .groupby(["log", "backbone"], dropna=False)
    .apply(agg_over_seeds)
    .reset_index()
)

BASELINE_BACKBONES = ["rnn", "transformer", "tabpfn", "saprpt"]
baseline = df[df["backbone"].isin(BASELINE_BACKBONES)].copy()

NON_HP_COLS = set(
    [
        "id","log","backbone","categorical_features","categorical_targets",
        "continuous_features","continuous_targets","device","project","model",
        "name","fine_tuning","lora_alpha", "r", "few_shot_k", "seed","_runtime","_timestamp",
        "na_norm","rt_norm","nt_norm","mt_score","majority_stat",
        "total_params","trainable_params","best_train_next_remaining_time_loss",
        "_step","best_train_next_activity_loss","train_next_time_to_next_event_loss",
        "best_train_next_time_to_next_event_loss","train_next_activity_acc",
        "train_next_activity_loss","_wandb.runtime","best_train_next_activity_acc",
        "train_next_remaining_time_loss","persist_model","project_name","wandb",
    ]
    + METRICS
)

HP_COLS = [c for c in baseline.columns if c not in NON_HP_COLS]
print("Hyperparameter columns:", HP_COLS)

Hyperparameter columns: ['batch_size', 'embedding_size', 'epochs', 'freeze_layers', 'grad_clip', 'hidden_size', 'lr', 'n_layers', 'rnn_type', 'strategy', 'weight_decay']


In [22]:
group_cols = ["log", "backbone"] + HP_COLS

baseline_grouped = (
    baseline
    .groupby(group_cols, dropna=False)
    .apply(agg_over_seeds)   # deine Funktion von oben
    .reset_index()
)

score_col = "mt_score_mean"
if score_col not in baseline_grouped.columns:
    score_col = "test_next_activity_acc_mean"

idx_best = (
    baseline_grouped
    .groupby(["log", "backbone"])[score_col]
    .idxmax()
)
baseline_best = baseline_grouped.loc[idx_best].reset_index(drop=True)

baseline_all = pd.concat([baseline_best, majority_grouped], ignore_index=True)

DATASET_MAP = {
    "BPI12": "BPI12",
    "BPI17": "BPI17",
    "BPI20PrepaidTravelCosts": "BPI20PTC",
    "BPI20RequestForPayment": "BPI20RfP",
    "BPI20TravelPermitData": "BPI20TPD",
}
BACKBONE_MAP = {
    "majority": "Majority",
    "rnn": "RNN",
    "transformer": "Transformer",
    "tabpfn": "TabPFN",
    "saprpt": "SAP-RPT",
}

baseline_all["Dataset"] = baseline_all["log"].map(DATASET_MAP).fillna(baseline_all["log"])
baseline_all["Backbone_pretty"] = baseline_all["backbone"].map(BACKBONE_MAP).fillna(baseline_all["backbone"])

for m in METRICS:
    mean_col = m + "_mean"
    std_col  = m + "_std"
    if mean_col in baseline_all.columns and std_col in baseline_all.columns:
        baseline_all[m + "_mean_std"] = (
            baseline_all[mean_col].round(4).astype(str)
            + " ± "
            + baseline_all[std_col].round(4).astype(str)
        )

csv_path = os.path.join(output_dir_csv, "baseline_best_settings_mean_std.csv")
baseline_all.to_csv(csv_path, index=False)
print("Saved baseline summary to:", csv_path)

baseline_all

Saved baseline summary to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/baseline_best_settings_mean_std.csv


Unnamed: 0,log,backbone,batch_size,embedding_size,epochs,freeze_layers,grad_clip,hidden_size,lr,n_layers,...,Dataset,Backbone_pretty,test_next_activity_acc_mean_std,test_next_activity_loss_mean_std,test_next_remaining_time_loss_mean_std,test_next_time_to_next_event_loss_mean_std,best_test_next_activity_acc_mean_std,best_test_next_activity_loss_mean_std,best_test_next_remaining_time_loss_mean_std,best_test_next_time_to_next_event_loss_mean_std
0,BPI12,rnn,32.0,128.0,25.0,,5.0,512.0,5e-05,1.0,...,BPI12,RNN,0.7757 ± 0.0142,0.7689 ± 0.042,1.7738 ± 0.0889,1.3352 ± 0.0302,0.7799 ± 0.0095,0.758 ± 0.0285,1.7033 ± 0.1138,1.3205 ± 0.014
1,BPI12,saprpt,16.0,16.0,25.0,,5.0,32.0,0.0001,1.0,...,BPI12,SAP-RPT,0.6294 ± 0.0121,nan ± nan,2.1601 ± 0.1146,1.6706 ± 0.372,nan ± nan,nan ± nan,nan ± nan,nan ± nan
2,BPI12,tabpfn,16.0,16.0,25.0,,5.0,32.0,0.0001,1.0,...,BPI12,TabPFN,0.6364 ± 0.0153,nan ± nan,2.2338 ± 0.0937,1.6877 ± 0.4063,nan ± nan,nan ± nan,nan ± nan,nan ± nan
3,BPI12,transformer,32.0,128.0,10.0,,5.0,128.0,0.001,2.0,...,BPI12,Transformer,0.7562 ± 0.0246,0.7139 ± 0.1077,2.8388 ± 0.1209,1.3671 ± 0.0495,0.7687 ± 0.016,0.6635 ± 0.0383,2.6204 ± 0.1508,1.3364 ± 0.016
4,BPI17,rnn,256.0,32.0,25.0,,5.0,512.0,0.0001,1.0,...,BPI17,RNN,0.8535 ± 0.0013,0.4193 ± 0.0068,0.6725 ± 0.0325,0.7629 ± 0.0159,0.8535 ± 0.0013,0.4193 ± 0.0068,0.6097 ± 0.0138,0.7629 ± 0.0159
5,BPI17,saprpt,16.0,16.0,25.0,,5.0,32.0,0.0001,1.0,...,BPI17,SAP-RPT,0.6472 ± 0.0268,nan ± nan,1.3056 ± 0.2773,1.1287 ± 0.1969,nan ± nan,nan ± nan,nan ± nan,nan ± nan
6,BPI17,tabpfn,16.0,16.0,25.0,,5.0,32.0,0.0001,1.0,...,BPI17,TabPFN,0.669 ± 0.0194,nan ± nan,1.6011 ± 0.0589,1.1679 ± 0.2366,nan ± nan,nan ± nan,nan ± nan,nan ± nan
7,BPI17,transformer,32.0,128.0,10.0,,5.0,128.0,0.001,1.0,...,BPI17,Transformer,0.8556 ± 0.0058,0.4141 ± 0.0136,1.156 ± 0.0803,0.8836 ± 0.0296,0.8596 ± 0.0024,0.407 ± 0.0142,1.1343 ± 0.0847,0.8433 ± 0.0401
8,BPI20PrepaidTravelCosts,rnn,32.0,128.0,25.0,,5.0,128.0,0.0005,1.0,...,BPI20PTC,RNN,0.7841 ± 0.0201,0.6578 ± 0.0553,1.0744 ± 0.0308,1.1906 ± 0.0485,0.7897 ± 0.0134,0.6549 ± 0.0492,0.9536 ± 0.027,1.1239 ± 0.0216
9,BPI20PrepaidTravelCosts,saprpt,16.0,16.0,25.0,,5.0,32.0,0.0001,1.0,...,BPI20PTC,SAP-RPT,0.7646 ± 0.0054,nan ± nan,1.041 ± 0.0605,1.0863 ± 0.2173,nan ± nan,nan ± nan,nan ± nan,nan ± nan


In [23]:
LLM_BACKBONES = ["gpt2", "gptneo-1b3", "qwen25-05b", "llama32-1b", "gemma-2-2b"]

llm = df[df["backbone"].isin(LLM_BACKBONES)].copy()

llm["Setting"] = llm.apply(map_setting, axis=1)

NON_HP_COLS_LLM = set(NON_HP_COLS)
for col in ["lora_alpha", "r", "few_shot_k"]:
    NON_HP_COLS_LLM.discard(col)
NON_HP_COLS_LLM.add("Setting")

HP_COLS_LLM = [c for c in llm.columns if c not in NON_HP_COLS_LLM]
print("LLM Hyperparameter columns:", HP_COLS_LLM)

group_cols_llm = ["log", "backbone", "Setting"] + HP_COLS_LLM

llm_grouped = (
    llm
    .groupby(group_cols_llm, dropna=False)
    .apply(agg_over_seeds)   # gleiche Funktion wie bei Baselines
    .reset_index()
)

score_col = "mt_score_mean"
if score_col not in llm_grouped.columns:
    score_col = "test_next_activity_acc_mean"

idx_best_llm = (
    llm_grouped
    .groupby(["log", "backbone", "Setting"])[score_col]
    .idxmax()
)

llm_all = llm_grouped.loc[idx_best_llm].reset_index(drop=True)

BACKBONE_MAP_LLM = {
    "gpt2":         "GPT2",
    "gptneo-1b3":   "GPT-Neo-1.3B",
    "qwen25-05b":   "Qwen2.5-0.5B",
    "llama32-1b":   "Llama3.2-1B",
    "gemma-2-2b":   "Gemma-2-2B",
}

llm_all["Dataset"] = llm_all["log"].map(DATASET_MAP).fillna(llm_all["log"])
llm_all["Backbone_pretty"] = llm_all["backbone"].map(BACKBONE_MAP_LLM).fillna(llm_all["backbone"])

for m in METRICS:
    mean_col = m + "_mean"
    std_col  = m + "_std"
    if mean_col in llm_all.columns and std_col in llm_all.columns:
        llm_all[m + "_mean_std"] = (
            llm_all[mean_col].round(4).astype(str)
            + " ± "
            + llm_all[std_col].round(4).astype(str)
        )

csv_path = os.path.join(output_dir_csv, "llm_all_settings_by_method_mean_std.csv")
llm_all.to_csv(csv_path, index=False)
print("Saved LLM summary to:", csv_path)

llm_all.head(15)

LLM Hyperparameter columns: ['batch_size', 'embedding_size', 'epochs', 'freeze_layers', 'grad_clip', 'hidden_size', 'lr', 'n_layers', 'rnn_type', 'strategy', 'weight_decay', 'lora_alpha', 'r', 'few_shot_k']
Saved LLM summary to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/llm_all_settings_by_method_mean_std.csv


Unnamed: 0,log,backbone,Setting,batch_size,embedding_size,epochs,freeze_layers,grad_clip,hidden_size,lr,...,Dataset,Backbone_pretty,test_next_activity_acc_mean_std,test_next_activity_loss_mean_std,test_next_remaining_time_loss_mean_std,test_next_time_to_next_event_loss_mean_std,best_test_next_activity_acc_mean_std,best_test_next_activity_loss_mean_std,best_test_next_remaining_time_loss_mean_std,best_test_next_time_to_next_event_loss_mean_std
0,BPI12,gemma-2-2b,FewShot-Freezing,8,2304,10,,5,2304,5e-05,...,BPI12,Gemma-2-2B,0.0799 ± 0.0341,3.6757 ± 0.2217,1.712 ± 0.236,1.609 ± 0.0568,0.0876 ± 0.0281,3.4803 ± 0.1926,1.3363 ± 0.123,1.5519 ± 0.0226
1,BPI12,gemma-2-2b,FewShot-LoRA,8,2304,10,,5,2304,5e-05,...,BPI12,Gemma-2-2B,0.0751 ± 0.0496,4.1549 ± 0.8314,3.5247 ± 2.3271,2.502 ± 1.3303,0.0953 ± 0.0496,3.1654 ± 0.2169,1.4307 ± 0.2371,1.6082 ± 0.0677
2,BPI12,gemma-2-2b,Freezing,8,2304,10,,5,2304,5e-05,...,BPI12,Gemma-2-2B,0.3912 ± 0.0314,1.4898 ± 0.0318,2.0048 ± 0.0841,1.5121 ± 0.0317,0.4028 ± 0.0211,1.4816 ± 0.0364,1.9842 ± 0.0928,1.4755 ± 0.0157
3,BPI12,gemma-2-2b,"Freezing-[-1, -2]",8,2304,10,"-1,-2",5,2304,5e-05,...,BPI12,Gemma-2-2B,0.7409 ± 0.0644,0.7587 ± 0.1567,2.9607 ± 0.0668,1.407 ± 0.0574,0.7921 ± 0.0109,0.6666 ± 0.0472,2.1902 ± 0.1723,1.3602 ± 0.0079
4,BPI12,gemma-2-2b,Freezing-[-1],8,2304,10,-1,5,2304,5e-05,...,BPI12,Gemma-2-2B,0.7481 ± 0.0678,0.8252 ± 0.3164,2.6484 ± 0.1867,1.4281 ± 0.0205,0.8007 ± 0.0061,0.6464 ± 0.0429,2.2036 ± 0.1755,1.3601 ± 0.0117
5,BPI12,gemma-2-2b,"Freezing-[0, 1]",8,2304,10,01,5,2304,5e-05,...,BPI12,Gemma-2-2B,0.7328 ± 0.0603,0.7753 ± 0.0927,2.413 ± 0.1309,1.3021 ± 0.0234,0.797 ± 0.0121,0.6983 ± 0.0244,2.3113 ± 0.1783,1.2989 ± 0.019
6,BPI12,gemma-2-2b,Freezing-[0],8,2304,10,0,5,2304,5e-05,...,BPI12,Gemma-2-2B,0.7857 ± 0.0103,0.729 ± 0.0074,2.4203 ± 0.1474,1.3369 ± 0.0233,0.7933 ± 0.0051,0.7206 ± 0.0217,2.2122 ± 0.0645,1.3219 ± 0.0238
7,BPI12,gemma-2-2b,LoRA,8,2304,10,,5,2304,5e-05,...,BPI12,Gemma-2-2B,0.625 ± 0.0467,1.1342 ± 0.1744,2.3835 ± 0.2465,1.4898 ± 0.0821,0.6512 ± 0.0557,1.0486 ± 0.1395,2.0852 ± 0.11,1.4166 ± 0.0593
8,BPI12,gemma-2-2b,ZeroShot,8,2304,0,,5,2304,5e-05,...,BPI12,Gemma-2-2B,0.0298 ± 0.0184,4.4444 ± 0.6145,4.3306 ± 3.2487,3.6357 ± 2.514,0.0298 ± 0.0184,4.4444 ± 0.6145,4.3306 ± 3.2487,3.6357 ± 2.514
9,BPI12,gpt2,FewShot-Freezing,8,768,10,,5,768,5e-05,...,BPI12,GPT2,0.0372 ± 0.0315,3.5797 ± 0.3214,1.9015 ± 0.8402,1.6738 ± 0.0887,0.0387 ± 0.0323,3.5797 ± 0.3214,1.8622 ± 0.8651,1.5788 ± 0.0816


In [24]:
llm["Setting"] = llm.apply(map_setting, axis=1)
print(llm["Setting"].value_counts())

Setting
Freezing-[-1, -2]    125
Freezing-[0, 1]      125
Freezing-[0]         125
Freezing-[-1]        125
Freezing             125
ZeroShot             125
FewShot-Freezing     125
FewShot-LoRA         125
LoRA                 125
Name: count, dtype: int64


In [25]:
multi = pd.concat([baseline_all, llm_all], ignore_index=True, sort=False)

multi = (
    multi
    .sort_values(["Dataset", "Backbone_pretty", "Setting"])
    .reset_index(drop=True)
)

csv_path = os.path.join(output_dir_csv, "multi_task_benchmark_results.csv")
multi.to_csv(csv_path, index=False)
print("Saved combined multi-task table to:", csv_path)

multi.head()

Saved combined multi-task table to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/multi_task_benchmark_results.csv


Unnamed: 0,log,backbone,batch_size,embedding_size,epochs,freeze_layers,grad_clip,hidden_size,lr,n_layers,...,test_next_remaining_time_loss_mean_std,test_next_time_to_next_event_loss_mean_std,best_test_next_activity_acc_mean_std,best_test_next_activity_loss_mean_std,best_test_next_remaining_time_loss_mean_std,best_test_next_time_to_next_event_loss_mean_std,Setting,lora_alpha,r,few_shot_k
0,BPI12,gptneo-1b3,8.0,2048.0,10.0,,5.0,2048.0,5e-05,1.0,...,1.9737 ± 0.3397,1.7584 ± 0.1475,0.0734 ± 0.0279,3.3621 ± 0.3649,1.5616 ± 0.2029,1.6504 ± 0.1312,FewShot-Freezing,,,8.0
1,BPI12,gptneo-1b3,8.0,2048.0,10.0,,5.0,2048.0,5e-05,1.0,...,3.1797 ± 2.6166,1.8453 ± 0.314,0.1002 ± 0.0504,3.1159 ± 0.0594,1.4986 ± 0.4039,1.5639 ± 0.0325,FewShot-LoRA,512.0,256.0,8.0
2,BPI12,gptneo-1b3,8.0,2048.0,10.0,,5.0,2048.0,5e-05,1.0,...,2.348 ± 0.1437,1.531 ± 0.0338,0.5736 ± 0.0769,1.4783 ± 0.3746,2.2858 ± 0.1486,1.4952 ± 0.0342,Freezing,,,
3,BPI12,gptneo-1b3,8.0,2048.0,10.0,"-1,-2",5.0,2048.0,5e-05,1.0,...,2.466 ± 0.111,1.5124 ± 0.0677,0.5663 ± 0.071,1.4508 ± 0.2137,2.3459 ± 0.1544,1.4892 ± 0.0518,"Freezing-[-1, -2]",,,
4,BPI12,gptneo-1b3,8.0,2048.0,10.0,-1,5.0,2048.0,5e-05,1.0,...,2.5603 ± 0.095,1.5131 ± 0.0366,0.5474 ± 0.0859,1.4897 ± 0.2106,2.3335 ± 0.0808,1.492 ± 0.0327,Freezing-[-1],,,


In [26]:
multi_path = os.path.join(output_dir_csv, "multi_task_benchmark_results.csv")
multi = pd.read_csv(multi_path)

for log_name, df_log in multi.groupby("log"):
    log_dir = os.path.join(output_dir_csv, "per_dataset", log_name)
    os.makedirs(log_dir, exist_ok=True)
    
    csv_path = os.path.join(log_dir, f"multi_task_benchmark_results_{log_name}.csv")
    
    df_log.to_csv(csv_path, index=False)
    print(f"Saved per-log table for {log_name} to {csv_path}")

Saved per-log table for BPI12 to /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI12/multi_task_benchmark_results_BPI12.csv
Saved per-log table for BPI17 to /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI17/multi_task_benchmark_results_BPI17.csv
Saved per-log table for BPI20PrepaidTravelCosts to /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI20PrepaidTravelCosts/multi_task_benchmark_results_BPI20PrepaidTravelCosts.csv
Saved per-log table for BPI20RequestForPayment to /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI20RequestForPayment/multi_task_benchmark_results_BPI20RequestForPayment.csv
Saved per-log table for BPI20TravelPermitData to /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI20TravelPermitData/multi_task_benchmark_results_BPI20TravelPermitData.csv


In [27]:
multi_path = os.path.join(output_dir_csv, "multi_task_benchmark_results.csv")
multi = pd.read_csv(multi_path)

LLM_BACKBONES = ["gpt2", "gptneo-1b3", "qwen25-05b", "llama32-1b", "gemma-2-2b"]

llm_multi = multi[multi["backbone"].isin(LLM_BACKBONES)].copy()

for (log_name, backbone), df_sub in llm_multi.groupby(["log", "backbone"]):
    log_dir = os.path.join(output_dir_csv, "per_dataset", log_name)
    os.makedirs(log_dir, exist_ok=True)

    csv_path = os.path.join(log_dir, f"llm_methods_{log_name}_{backbone}.csv")

    df_sub.to_csv(csv_path, index=False)
    print(f"Saved LLM methods table for log={log_name}, backbone={backbone} to: {csv_path}")

Saved LLM methods table for log=BPI12, backbone=gemma-2-2b to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI12/llm_methods_BPI12_gemma-2-2b.csv
Saved LLM methods table for log=BPI12, backbone=gpt2 to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI12/llm_methods_BPI12_gpt2.csv
Saved LLM methods table for log=BPI12, backbone=gptneo-1b3 to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI12/llm_methods_BPI12_gptneo-1b3.csv
Saved LLM methods table for log=BPI12, backbone=llama32-1b to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI12/llm_methods_BPI12_llama32-1b.csv
Saved LLM methods table for log=BPI12, backbone=qwen25-05b to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI12/llm_methods_BPI12_qwen25-05b.csv
Saved LLM methods table for log=BPI17, backbone=gemma-2-2b to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI17/llm_methods_BPI17_gemma-2-2b.cs

In [28]:
multi_path = os.path.join(output_dir_csv, "multi_task_benchmark_results.csv")
multi = pd.read_csv(multi_path)

BASELINE_BACKBONES = ["majority", "rnn", "transformer", "tabpfn", "saprpt"]

mask_baseline = multi["backbone"].isin(BASELINE_BACKBONES)
mask_llm_lora = (~multi["backbone"].isin(BASELINE_BACKBONES)) & (multi["Setting"] == "LoRA")

subset = multi[mask_baseline | mask_llm_lora].copy()

for log_name, df_log in subset.groupby("log"):

    log_dir = os.path.join(output_dir_csv, "per_dataset", log_name)
    os.makedirs(log_dir, exist_ok=True)

    csv_path_log = os.path.join(log_dir, f"baseline_vs_lora_multi_task_results_{log_name}.csv")

    df_log.to_csv(csv_path_log, index=False)
    print(f"Saved baseline vs LoRA table for {log_name} to: {csv_path_log}")

Saved baseline vs LoRA table for BPI12 to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI12/baseline_vs_lora_multi_task_results_BPI12.csv
Saved baseline vs LoRA table for BPI17 to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI17/baseline_vs_lora_multi_task_results_BPI17.csv
Saved baseline vs LoRA table for BPI20PrepaidTravelCosts to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI20PrepaidTravelCosts/baseline_vs_lora_multi_task_results_BPI20PrepaidTravelCosts.csv
Saved baseline vs LoRA table for BPI20RequestForPayment to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI20RequestForPayment/baseline_vs_lora_multi_task_results_BPI20RequestForPayment.csv
Saved baseline vs LoRA table for BPI20TravelPermitData to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/csv/per_dataset/BPI20TravelPermitData/baseline_vs_lora_multi_task_results_BPI20TravelPermitData.csv


In [34]:
LLM_BACKBONES = ["gpt2", "gptneo-1b3", "qwen25-05b", "llama32-1b", "gemma-2-2b"]

llm = df[df["backbone"].isin(LLM_BACKBONES)].copy()
llm["Setting"] = llm.apply(map_setting, axis=1)

def collapse_setting_for_main(setting: str) -> str:
    if isinstance(setting, str) and setting.startswith("Freezing-["):
        return "Freezing"   # -1, 0, 0,1, -1,-2 zusammengefasst
    return setting

llm["Setting_main"] = llm["Setting"].apply(collapse_setting_for_main)

plots_base_dir = "/ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/per_dataset"

SETTING_ORDER_FULL = [
    "ZeroShot",
    "LoRA",
    "FewShot-LoRA",
    "Freezing",
    "Freezing-[-1]",
    "Freezing-[0]",
    "Freezing-[0, 1]",
    "Freezing-[-1, -2]",
    "FewShot-Freezing",
]

SETTING_ORDER_MAIN = [
    "ZeroShot",
    "LoRA",
    "FewShot-LoRA",
    "Freezing",
    "FewShot-Freezing",
]

PLOTS = [
    ("test_next_activity_acc",           "NA Acc."),
    ("test_next_remaining_time_loss",    "RT MSE"),
    ("test_next_time_to_next_event_loss","NT MSE"),
]

for log_name, df_log in llm.groupby("log"):
    log_dir = os.path.join(plots_base_dir, log_name)
    os.makedirs(log_dir, exist_ok=True)

    for backbone, df_b in df_log.groupby("backbone"):

        settings_full = [s for s in SETTING_ORDER_FULL if s in df_b["Setting"].unique()]
        if settings_full:
            fig, axes = plt.subplots(3, 1, figsize=(8, 9), sharex=True)

            for ax, (metric, ylabel) in zip(axes, PLOTS):
                sns.boxplot(
                    data=df_b,
                    x="Setting",
                    y=metric,
                    order=settings_full,
                    ax=ax,
                )
                ax.set_ylabel(ylabel)
                ax.set_xticks(range(len(settings_full)))
                ax.set_xticklabels(settings_full, rotation=45, ha="right")

            axes[-1].set_xlabel("Fine-tuning method (detailed)")

            fig.suptitle(f"{log_name} – {backbone} (all Freezing variants)", fontsize=12)
            plt.tight_layout()

            out_path = os.path.join(log_dir, f"llm_methods_boxplot_freezing_{log_name}_{backbone}.png")
            plt.savefig(out_path, dpi=300)
            plt.close(fig)

            print(f"Saved detailed boxplot for log={log_name}, backbone={backbone} to: {out_path}")

        settings_main = [s for s in SETTING_ORDER_MAIN if s in df_b["Setting_main"].unique()]
        if settings_main:
            fig, axes = plt.subplots(3, 1, figsize=(8, 9), sharex=True)

            for ax, (metric, ylabel) in zip(axes, PLOTS):
                sns.boxplot(
                    data=df_b,
                    x="Setting_main",
                    y=metric,
                    order=settings_main,
                    ax=ax,
                )
                ax.set_ylabel(ylabel)
                ax.set_xticks(range(len(settings_main)))
                ax.set_xticklabels(settings_main, rotation  =45, ha="right")

            axes[-1].set_xlabel("Fine-tuning method (collapsed)")

            fig.suptitle(f"{log_name} – {backbone} (collapsed Freezing)", fontsize=12)
            plt.tight_layout()

            out_path = os.path.join(log_dir, f"llm_methods_boxplot_collapsed_{log_name}_{backbone}.png")
            plt.savefig(out_path, dpi=300)
            plt.close(fig)

            print(f"Saved collapsed boxplot for log={log_name}, backbone={backbone} to: {out_path}")

Saved detailed boxplot for log=BPI12, backbone=gemma-2-2b to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/per_dataset/BPI12/llm_methods_boxplot_freezing_BPI12_gemma-2-2b.png
Saved collapsed boxplot for log=BPI12, backbone=gemma-2-2b to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/per_dataset/BPI12/llm_methods_boxplot_collapsed_BPI12_gemma-2-2b.png
Saved detailed boxplot for log=BPI12, backbone=gpt2 to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/per_dataset/BPI12/llm_methods_boxplot_freezing_BPI12_gpt2.png
Saved collapsed boxplot for log=BPI12, backbone=gpt2 to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/per_dataset/BPI12/llm_methods_boxplot_collapsed_BPI12_gpt2.png
Saved detailed boxplot for log=BPI12, backbone=gptneo-1b3 to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/per_dataset/BPI12/llm_methods_boxplot_freezing_BPI12_gptneo-1b3.png
Saved collapsed boxplot for log=BPI12, backbone=gptneo-1b3 to: /ceph/lfertig/Thesis/n

In [35]:
LLM_BACKBONES = ["gpt2", "gptneo-1b3", "qwen25-05b", "llama32-1b", "gemma-2-2b"]

llm = df[df["backbone"].isin(LLM_BACKBONES)].copy()
llm["Setting"] = llm.apply(map_setting, axis=1)

def collapse_setting_for_main(setting: str) -> str:
    if isinstance(setting, str) and setting.startswith("Freezing-["):
        return "Freezing"   # -1, 0, 0,1, -1,-2 zusammengefasst
    return setting

llm["Setting_main"] = llm["Setting"].apply(collapse_setting_for_main)

# Hübsche Modellnamen
BACKBONE_MAP_LLM = {
    "gpt2":         "GPT2",
    "gptneo-1b3":   "GPT-Neo-1.3B",
    "qwen25-05b":   "Qwen2.5-0.5B",
    "llama32-1b":   "Llama3.2-1B",
    "gemma-2-2b":   "Gemma-2-2B",
}
llm["Backbone_pretty"] = llm["backbone"].map(BACKBONE_MAP_LLM).fillna(llm["backbone"])

plots_base_dir = "/ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/per_dataset"

MAIN_SETTING_ORDER = [
    "ZeroShot",
    "LoRA",
    "FewShot-LoRA",
    "Freezing",
    "FewShot-Freezing",
]

PLOTS = [
    ("test_next_activity_acc",           "NA Acc."),
    ("test_next_remaining_time_loss",    "RT MSE"),
    ("test_next_time_to_next_event_loss","NT MSE"),
]

for log_name, df_log in llm.groupby("log"):
    log_dir = os.path.join(plots_base_dir, log_name)
    os.makedirs(log_dir, exist_ok=True)

    for setting in MAIN_SETTING_ORDER:
        df_s = df_log[df_log["Setting_main"] == setting].copy()
        if df_s.empty:
            continue

        backbone_order = [
            BACKBONE_MAP_LLM[b]
            for b in LLM_BACKBONES
            if b in df_s["backbone"].unique()
        ]
        if not backbone_order:
            continue

        fig, axes = plt.subplots(3, 1, figsize=(8, 9), sharex=True)

        for ax, (metric, ylabel) in zip(axes, PLOTS):
            sns.boxplot(
                data=df_s,
                x="Backbone_pretty",
                y=metric,
                order=backbone_order,
                ax=ax,
            )
            ax.set_ylabel(ylabel)
            ax.set_xticklabels(backbone_order, rotation=45, ha="right")

        axes[-1].set_xlabel("LLM backbone")

        fig.suptitle(f"{log_name} – {setting}", fontsize=12)
        plt.tight_layout()

        out_path = os.path.join(
            log_dir,
            f"llm_backbones_boxplot_{log_name}_{setting}.png"
        )
        plt.savefig(out_path, dpi=300)
        plt.close(fig)

        print(f"Saved LLM-backbone comparison for log={log_name}, setting={setting} to: {out_path}")

Saved LLM-backbone comparison for log=BPI12, setting=ZeroShot to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/per_dataset/BPI12/llm_backbones_boxplot_BPI12_ZeroShot.png
Saved LLM-backbone comparison for log=BPI12, setting=LoRA to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/per_dataset/BPI12/llm_backbones_boxplot_BPI12_LoRA.png
Saved LLM-backbone comparison for log=BPI12, setting=FewShot-LoRA to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/per_dataset/BPI12/llm_backbones_boxplot_BPI12_FewShot-LoRA.png
Saved LLM-backbone comparison for log=BPI12, setting=Freezing to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/per_dataset/BPI12/llm_backbones_boxplot_BPI12_Freezing.png
Saved LLM-backbone comparison for log=BPI12, setting=FewShot-Freezing to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/per_dataset/BPI12/llm_backbones_boxplot_BPI12_FewShot-Freezing.png
Saved LLM-backbone comparison for log=BPI17, setting=ZeroShot to: /ceph/lf

In [None]:
# def fetch_single(
#     wandb_id: str,
#     targets=["na", "rt", "nt"],
#     project_name: str | None = None,
#     entity: str | None = None,
# ):
#     """
#     Holt die Verlaufskurven (pro Epoch) für einen einzelnen W&B-Run.
#     Gibt (na_acc, na_loss, rt_loss, nt_loss) als Listen zurück.
#     Fehlende Targets -> entsprechende Liste = None.
#     """
#     if isinstance(targets, str):
#         targets = [targets]

#     if project_name is None:
#         raise ValueError("fetch_single requires an explicit project_name.")

#     if entity is None:
#         entity = os.environ.get("ENTITY")
#         if entity is None:
#             raise ValueError("ENTITY not set and no entity passed to fetch_single().")

#     api = wandb.Api()
#     run = api.run(f"{entity}/{project_name}/{wandb_id}")
#     history = list(run.scan_history())

#     na_acc, na_loss, rt_loss, nt_loss = None, None, None, None

#     if "rt" in targets:
#         rt_loss = [
#             row["test_next_remaining_time_loss"]
#             for row in history
#             if "test_next_remaining_time_loss" in row
#         ]

#     if "na" in targets:
#         na_loss = [
#             row["test_next_activity_loss"]
#             for row in history
#             if "test_next_activity_loss" in row
#         ]
#         na_acc = [
#             row["test_next_activity_acc"]
#             for row in history
#             if "test_next_activity_acc" in row
#         ]

#     if "nt" in targets:
#         nt_loss = [
#             row["test_next_time_to_next_event_loss"]
#             for row in history
#             if "test_next_time_to_next_event_loss" in row
#         ]

#     return na_acc, na_loss, rt_loss, nt_loss

In [None]:
# loss_csv_path = os.path.join(output_dir_csv, "final_loss_curves_multitask.csv")

# LLM_BACKBONES = ["gpt2", "gptneo-1b3", "qwen25-05b", "llama32-1b", "gemma-2-2b"]

# if os.path.exists(loss_csv_path):
#     losses = pd.read_csv(loss_csv_path)
# else:
#     df_llm = df[df["backbone"].isin(LLM_BACKBONES)].copy()
#     best_runs = (
#         df_llm
#         .sort_values("mt_score", ascending=False)
#         .groupby(["log", "backbone"], as_index=False)
#         .head(1)  # bester Run je (log, backbone)
#     )

#     losses_list = []

#     for _, row in best_runs.iterrows():
#         na_acc, na_loss, rt_loss, nt_loss = fetch_single(
#             row.id,
#             project_name=row.project,
#             targets=["na", "rt", "nt"],
#         )

#         tmp = pd.DataFrame({
#             "epoch": range(len(na_loss)),
#             "na_acc": na_acc,
#             "na_loss": na_loss,
#             "rt_loss": rt_loss,
#             "nt_loss": nt_loss,
#         })
#         tmp["log"] = row.log
#         tmp["backbone"] = row.backbone
#         losses_list.append(tmp)

#     losses = pd.concat(losses_list, axis=0, ignore_index=True)
#     losses.to_csv(loss_csv_path, index=False)

# print("Loss curves shape:", losses.shape)

Loss curves shape: (510, 7)


In [None]:
# LOGS_TO_PLOT = sorted(losses["log"].unique())

# HUE_MAP = {
#     "gpt2":         "GPT2",
#     "gptneo-1b3":   "GPT-Neo-1.3B",
#     "qwen25-05b":   "Qwen2.5-0.5B",
#     "llama32-1b":   "Llama3.2-1B",
#     "gemma-2-2b":   "Gemma-2-2B",
# }

# HUE_ORDER = [
#     "GPT2",
#     "GPT-Neo-1.3B",
#     "Qwen2.5-0.5B",
#     "Llama3.2-1B",
#     "Gemma-2-2B",
# ]

# l = losses.melt(
#     id_vars=["log", "backbone", "epoch"],
#     value_vars=["na_loss", "rt_loss", "nt_loss"],
#     var_name="Loss",
#     value_name="Value",
# ).dropna(subset=["Value"])

# l["Backbone"] = l["backbone"].map(HUE_MAP)
# l = l[l["Backbone"].notna()]

# LOSS_LABELS = {
#     "na_loss": "NA Loss",
#     "rt_loss": "RT Loss",
#     "nt_loss": "NT Loss",
# }

# fig, axes = plt.subplots(
#     3, len(LOGS_TO_PLOT),
#     figsize=(4 * len(LOGS_TO_PLOT), 8),
#     sharex=True
# )
# axes_iter = iter(axes.flatten())

# legend_handles, legend_labels = None, None  # später für globale Legende

# for loss_name in ["na_loss", "rt_loss", "nt_loss"]:
#     for log_name in LOGS_TO_PLOT:
#         ax = next(axes_iter)
#         tmp = l[(l["Loss"] == loss_name) & (l["log"] == log_name)]

#         sns.lineplot(
#             data=tmp,
#             x="epoch",
#             y="Value",
#             hue="Backbone",
#             hue_order=[h for h in HUE_ORDER if h in tmp["Backbone"].unique()],
#             ax=ax,
#             linewidth=2.0,
#         )

#         ax.set_xlabel("Epoch")
#         ax.set_ylabel(LOSS_LABELS[loss_name])
#         ax.set_title(log_name)

#         # Legend nur einmal abgreifen
#         leg = ax.get_legend()
#         if leg is not None:
#             handles, labels = leg.legend_handles, [t.get_text() for t in leg.get_texts()]
#             legend_handles, legend_labels = handles, labels
#             leg.remove()

# # globale Legende unter der Figure
# if legend_handles is not None:
#     fig.legend(
#         legend_handles,
#         legend_labels,
#         title="",
#         loc="lower center",
#         ncol=len(legend_labels),
#         bbox_to_anchor=(0.5, -0.02),
#     )

# plt.tight_layout(rect=(0, 0.05, 1, 1))  # unten Platz für die Legende lassen

# plot_path = os.path.join(output_dir_plots, "loss_curves_multitask_llms.png")
# plt.savefig(plot_path, dpi=300)
# plt.close(fig)

# print("Saved loss curve plot to:", plot_path)

Saved loss curve plot to: /ceph/lfertig/Thesis/notebook/llm-peft-ppm/results/plots/loss_curves_multitask_llms.png


In [None]:
# # Parameter-Summary direkt aus df (Roh-Runs)

# param_summary = (
#     df[["log", "backbone", "total_params", "trainable_params"]]
#     .dropna(subset=["total_params", "trainable_params"])
#     .drop_duplicates()
#     .copy()
# )

# param_summary["trainable_percent"] = (
#     param_summary["trainable_params"] / param_summary["total_params"] * 100
# )

# param_summary["trainable_percent_fmt"] = (
#     param_summary["trainable_percent"].round(1).astype(str) + "%"
# )

# param_summary["total_params_fmt"] = param_summary["total_params"].apply(
#     lambda x: np.format_float_scientific(x, precision=1)
# )

# param_summary["# params\n(%trainable)"] = (
#     param_summary["total_params_fmt"]
#     + " ("
#     + param_summary["trainable_percent_fmt"]
#     + ")"
# )

# csv_path = os.path.join(output_dir_csv, "param_summary_multitask.csv")
# param_summary.to_csv(csv_path, index=False)

# print("PARAMETER SUMMARY (per log, backbone)")
# print(param_summary[["log", "backbone", "# params\n(%trainable)"]].to_string(index=False))
# print("Saved to:", csv_path)

PARAMETER SUMMARY (per log, backbone)
                    log    backbone # params\n(%trainable)
BPI20PrepaidTravelCosts    majority        1.e+00 (100.0%)
                  BPI12    majority        1.e+00 (100.0%)
  BPI20TravelPermitData    majority        1.e+00 (100.0%)
 BPI20RequestForPayment    majority        1.e+00 (100.0%)
                  BPI17    majority        1.e+00 (100.0%)
                  BPI12         rnn       8.9e+04 (100.0%)
                  BPI17         rnn       8.9e+04 (100.0%)
BPI20PrepaidTravelCosts         rnn        9.e+04 (100.0%)
  BPI20TravelPermitData         rnn       9.3e+04 (100.0%)
 BPI20RequestForPayment         rnn       8.7e+04 (100.0%)
                  BPI12         rnn       3.1e+05 (100.0%)
                  BPI17         rnn       3.1e+05 (100.0%)
BPI20PrepaidTravelCosts         rnn       3.1e+05 (100.0%)
  BPI20TravelPermitData         rnn       3.1e+05 (100.0%)
 BPI20RequestForPayment         rnn       3.0e+05 (100.0%)
                  