In [None]:
SUBTASKS = [2, 3]

In [None]:
from collections import defaultdict
from IPython.display import display
from helper import *
from evaluate import *
import pandas as pd

## Export Predictions in Valid Format

In [None]:
STRATEGIES = ["dev-train", "test-train_dev"]
LLMS = ["unsloth/gemma-3-27b-it-bnb-4bit"]
NUM_SC = [None, 5, 10, 15]
METHODS =  ["no_sc_guided", "no_sc_no_guided", "sc_guided", "sc_no_guided"]

In [None]:
        # results.append({
        #     "no_sc_guided": evaluate_predictions(labels_filtered, preds_no_sc_guided, task=subtask),
        #     "no_sc_no_guided": evaluate_predictions(labels_filtered, preds_no_sc_no_guided, task=subtask),
        #     "sc_guided": evaluate_predictions(labels_filtered, preds_sc_guided, task=subtask),
        #     "sc_no_guided": evaluate_predictions(labels_filtered, preds_sc_no_guided, task=subtask),
        # })

In [None]:
for strategy in STRATEGIES:
    for llm in LLMS:
        for num_sc in NUM_SC:
            for method in METHODS:
                for subtask in SUBTASKS:
                    for language, domain in VALID_LANGUAGES_DOMAINS:
                        predictions = get_performance(language,
                                                      domain,
                                                      subtask,
                                                      strategy,
                                                      llm=llm, num_preds_sc=num_sc if num_sc is not None else 5)[1][method]

                        output_dir = f"exported_predictions/{strategy}/{llm.replace('/', '_')}/{num_sc}/{method}/subtask_{subtask}/pred_{language}_{domain}.jsonl"
                        os.makedirs(os.path.dirname(output_dir), exist_ok=True)
                        with open(output_dir, "w", encoding="utf-8") as f:
                            for pred in predictions:
                                f.write(json.dumps(
                                    pred, ensure_ascii=False) + "\n")

## Create Tables Dataset Statistics

In [None]:
import pandas as pd

ROWS = ["train", "dev", "test"]
DOMAIN_ORDER = ["restaurant", "laptop", "hotel"]  # Definiere die gewünschte Reihenfolge
SUBTASK_ORDER = [2, 3]  # Erst Subtask 2, dann Subtask 3

records_dataset_statistics = []

for subtask in SUBTASKS:
    for language, domain in VALID_LANGUAGES_DOMAINS:

        # train + dev
        for split in ["train", "dev"]:
            count = len(get_dataset(subtask, language, domain, split=split))
            records_dataset_statistics.append((split, domain, subtask, language, count))

        # test (optional)
        try:
            count_test = len(get_dataset(subtask, language, domain, split="test"))
            records_dataset_statistics.append(("test", domain, subtask, language, count_test))
        except:
            pass

        # # test_cross_validation (als ganze Zahl)
        # train_size = len(get_dataset(subtask, language, domain, split="train"))
        # records_dataset_statistics.append((
        #     "test_cross_validation",
        #     domain,
        #     subtask,
        #     language,
        #     str(int(train_size * 0.2))
        # ))

df_dataset_statistics = pd.DataFrame(
    records_dataset_statistics,
    columns=["split", "domain", "subtask", "language", "count"]
)

# Konvertiere domain zu Categorical mit gewünschter Reihenfolge
df_dataset_statistics["domain"] = pd.Categorical(
    df_dataset_statistics["domain"], 
    categories=DOMAIN_ORDER, 
    ordered=True
)

df_dataset_statistics = (
    df_dataset_statistics
        .pivot(index=["split", "domain"],
               columns=["subtask", "language"],
               values="count")
        .sort_index(level=["split", "domain"], key=lambda x: x.map({s: i for i, s in enumerate(ROWS)} if x.name == "split" else {d: i for i, d in enumerate(DOMAIN_ORDER)}))
)

# Spalten nach gewünschter Subtask-Reihenfolge sortieren (erst 2, dann 3)
df_dataset_statistics = df_dataset_statistics.reindex(
    columns=sorted(df_dataset_statistics.columns, key=lambda x: (SUBTASK_ORDER.index(x[0]), x[1]))
)

df_dataset_statistics = df_dataset_statistics.applymap(
    lambda x: f"{int(x):,}" if pd.notna(x) else "-"
)

# get values from left to right from top to bottom as 1D list
values_list_dataset_statistics = df_dataset_statistics.values.flatten().tolist()
df_dataset_statistics

In [None]:
with open("plots/muster/dataset.txt", "r", encoding="utf-8") as f:
    dataset_muster = f.read()

# gehe von xxxx zu xxxx und trage die Werte ein
for value in values_list_dataset_statistics:
    dataset_muster = dataset_muster.replace("xxxx", value, 1)

with open("plots/dataset_statistics.txt", "w", encoding="utf-8") as f:
    f.write(dataset_muster)