In [None]:
SUBTASKS = [2, 3]

In [None]:
from collections import defaultdict
from IPython.display import display
from helper import *
from evaluate import *
import pandas as pd

In [None]:
df_subtask2_train_split = get_performance_tabular(
    "cF1", 2, strategy="train_split")
df_subtask3_train_split = get_performance_tabular(
    "cF1", 3, strategy="train_split")

df_subtasks_train_split = {
    3: df_subtask3_train_split,
    2: df_subtask2_train_split
}

display(df_subtask2_train_split)
display(df_subtask3_train_split)

In [None]:
# results_subtask2 = run_significance_tests(df_subtask2_train_split, 2)
# results_subtask3 = run_significance_tests(df_subtask3_train_split, 3)

In [None]:
display(format_table_parameter_tuning_for_latex(df_subtask2_train_split))
display(format_table_parameter_tuning_for_latex(df_subtask3_train_split))

In [None]:
# get df_subtask2_train_split values as 1d list
# get df_subtask2_train_split values as 1d list (ohne Language und Domain)
df_values_subtask2 = format_table_parameter_tuning_for_latex(df_subtask2_train_split).drop(columns=["Language", "Domain"]).values.flatten().tolist()
df_values_subtask3 = format_table_parameter_tuning_for_latex(df_subtask3_train_split).drop(columns=["Language", "Domain"]).values.flatten().tolist()

with open(os.path.join("plots", "parameter_optimization_subtask2.txt"), "w") as f:
    f.write(get_tabular_parameter_optimization(df_values_subtask2))

with open(os.path.join("plots", "parameter_optimization_subtask3.txt"), "w") as f:
    f.write(get_tabular_parameter_optimization(df_values_subtask3))

## Export Predictions in Valid Format

In [None]:
STRATEGIES = ["dev-train"] #, "test-train_dev"]

COLUMN_CONFIG = {
    "no_sc_no_guided_27b": ("unsloth/gemma-3-27b-it-bnb-4bit", None, "no_sc_no_guided"),
    "sc_5_27b": ("unsloth/gemma-3-27b-it-bnb-4bit", 5, "sc_no_guided"),
    "sc_10_27b": ("unsloth/gemma-3-27b-it-bnb-4bit", 10, "sc_no_guided"),
    "sc_15_27b": ("unsloth/gemma-3-27b-it-bnb-4bit", 15, "sc_no_guided"),
}


In [None]:
for column_config in COLUMN_CONFIG.keys():
  for strategy in STRATEGIES:
    for subtask in SUBTASKS:
        for language, domain in VALID_LANGUAGES_DOMAINS:
            if True:
                num_sc_bs = COLUMN_CONFIG[column_config][1]

                predictions = get_performance(language, domain, subtask, strategy,
                                              llm=f"unsloth/gemma-3-27b-it-bnb-4bit", num_preds_sc=num_sc_bs if num_sc_bs is not None else 5)[1][COLUMN_CONFIG[column_config][2]]
                
                output_dir = f"exported_predictions/{strategy}/{column_config}/subtask_{subtask}/pred_{language}_{domain}.jsonl"
                os.makedirs(os.path.dirname(output_dir), exist_ok=True)
                with open(output_dir, "w", encoding="utf-8") as f:
                    for pred in predictions:
                        f.write(json.dumps(pred, ensure_ascii=False) + "\n")
            # except Exception as e:
            #     print(
            #         f"Error processing Subtask {subtask} - Language: {language}, Domain: {domain}: {e}")
            #     continue
            # print(
            #     f"Subtask {subtask} - Language: {language}, Domain: {domain} => Best Strategy: {strategy}")

## Create Tables Dataset Statistics

In [None]:
import pandas as pd

ROWS = ["train", "dev", "test", "test_cross_validation"]
DOMAIN_ORDER = ["restaurant", "laptop", "hotel"]  # Definiere die gewünschte Reihenfolge
SUBTASK_ORDER = [2, 3]  # Erst Subtask 2, dann Subtask 3

records_dataset_statistics = []

for subtask in SUBTASKS:
    for language, domain in VALID_LANGUAGES_DOMAINS:

        # train + dev
        for split in ["train", "dev"]:
            count = len(get_dataset(subtask, language, domain, split=split))
            records_dataset_statistics.append((split, domain, subtask, language, count))

        # test (optional)
        try:
            count_test = len(get_dataset(subtask, language, domain, split="test"))
            records_dataset_statistics.append(("test", domain, subtask, language, count_test))
        except:
            pass

        # test_cross_validation (als ganze Zahl)
        train_size = len(get_dataset(subtask, language, domain, split="train"))
        records_dataset_statistics.append((
            "test_cross_validation",
            domain,
            subtask,
            language,
            str(int(train_size * 0.2))
        ))

df_dataset_statistics = pd.DataFrame(
    records_dataset_statistics,
    columns=["split", "domain", "subtask", "language", "count"]
)

# Konvertiere domain zu Categorical mit gewünschter Reihenfolge
df_dataset_statistics["domain"] = pd.Categorical(
    df_dataset_statistics["domain"], 
    categories=DOMAIN_ORDER, 
    ordered=True
)

df_dataset_statistics = (
    df_dataset_statistics
        .pivot(index=["split", "domain"],
               columns=["subtask", "language"],
               values="count")
        .sort_index(level=["split", "domain"], key=lambda x: x.map({s: i for i, s in enumerate(ROWS)} if x.name == "split" else {d: i for i, d in enumerate(DOMAIN_ORDER)}))
)

# Spalten nach gewünschter Subtask-Reihenfolge sortieren (erst 2, dann 3)
df_dataset_statistics = df_dataset_statistics.reindex(
    columns=sorted(df_dataset_statistics.columns, key=lambda x: (SUBTASK_ORDER.index(x[0]), x[1]))
)

df_dataset_statistics = df_dataset_statistics.applymap(
    lambda x: f"{int(x):,}" if pd.notna(x) else "-"
)

# get values from left to right from top to bottom as 1D list
values_list_dataset_statistics = df_dataset_statistics.values.flatten().tolist()
df_dataset_statistics

In [None]:
with open("plots/muster/dataset.txt", "r", encoding="utf-8") as f:
    dataset_muster = f.read()

# gehe von xxxx zu xxxx und trage die Werte ein
for value in values_list_dataset_statistics:
    dataset_muster = dataset_muster.replace("xxxx", value, 1)

with open("plots/dataset_statistics.txt", "w", encoding="utf-8") as f:
    f.write(dataset_muster)