In [10]:
SUBTASKS = [3, 2]
LANGUAGES = ["eng", "jpn", "rus", "tat", "ukr", "zho"]
DOMAINS = ["restaurant", "laptop", "hotel", "finance"]
STRATEGY = "train_split"  # "pred_dev" oder "train_split"
N_RUNS = 5 # Wie oft wurde prompt ausgef√ºhrt bei self-consistency

In [11]:
from collections import defaultdict
from helper import *
from evaluate import *
import pandas as pd
import os

In [12]:
def get_key_of_best_strategy(lang, domain, df):
    strategies = {
        "no_sc_guided": df.loc[(df["Language"] == language_mapping[lang]) & (df["Domain"] == domain_mapping[domain]), "no_sc_guided"].values[0],
        "no_sc_no_guided": df.loc[(df["Language"] == language_mapping[lang]) & (df["Domain"] == domain_mapping[domain]), "no_sc_no_guided"].values[0],
        "sc_guided": df.loc[(df["Language"] == language_mapping[lang]) & (df["Domain"] == domain_mapping[domain]), "sc_guided"].values[0],
        "sc_no_guided": df.loc[(df["Language"] == language_mapping[lang]) & (df["Domain"] == domain_mapping[domain]), "sc_no_guided"].values[0],
    }
    # throw error if any value is nan or np.float64(nan)
    for key in strategies:
        if pd.isna(strategies[key]):
            strategies[key] = None
    if all(value is None for value in strategies.values()):
        raise FileNotFoundError(
            f"No performance data found for language: {lang}, domain: {domain}")
    
    
    # Get strategy with highest score
    best_strategy = max(
        strategies, key=lambda k: strategies[k] if strategies[k] is not None else -1)
    
    return best_strategy


def get_performance_tabular(table_metric, table_subtask):
    table = defaultdict(lambda: defaultdict(dict))

    for language in LANGUAGES:
        for domain in DOMAINS:
            try:
                performance = get_performance(
                    language, domain, table_subtask, STRATEGY)
                table[language][domain]["no_sc_guided"] = performance["no_sc_guided"][table_metric]
                table[language][domain]["no_sc_no_guided"] = performance["no_sc_no_guided"][table_metric]
                table[language][domain]["sc_guided"] = performance["sc_guided"][table_metric]
                table[language][domain]["sc_no_guided"] = performance["sc_no_guided"][table_metric]
            except FileNotFoundError:
                table[language][domain]["no_sc_guided"] = None
                table[language][domain]["no_sc_no_guided"] = None
                table[language][domain]["sc_guided"] = None
                table[language][domain]["sc_no_guided"] = None

    df_rows = []
    for language in LANGUAGES:
        for domain in DOMAINS:
            row = {
                "Language": language_mapping[language],
                "Domain": domain_mapping[domain],
                "no_sc_guided": table[language][domain]["no_sc_guided"],
                "no_sc_no_guided": table[language][domain]["no_sc_no_guided"],
                "sc_guided": table[language][domain]["sc_guided"],
                "sc_no_guided": table[language][domain]["sc_no_guided"],
            }
            df_rows.append(row)
    df = pd.DataFrame(df_rows)

    # Add AVG row
    avg_row = {
        "Language": "AVG",
        "Domain": "",
        "no_sc_guided": df["no_sc_guided"].mean(skipna=True),
        "no_sc_no_guided": df["no_sc_no_guided"].mean(skipna=True),
        "sc_guided": df["sc_guided"].mean(skipna=True),
        "sc_no_guided": df["sc_no_guided"].mean(skipna=True),
    }

    df = pd.concat([df, pd.DataFrame([avg_row])], ignore_index=True)
    return df


df_subtask3 = get_performance_tabular("cF1", 3)
df_subtask2 = get_performance_tabular("cF1", 2)

df_subtasks = {
    3: df_subtask3,
    2: df_subtask2
}



In [18]:
df_subtask3

Unnamed: 0,Language,Domain,no_sc_guided,no_sc_no_guided,sc_guided,sc_no_guided
0,English,Restaurant,0.497083,0.493964,0.511052,0.506603
1,English,Laptop,0.361965,0.3636,0.352291,0.357248
2,English,Hotel,,,,
3,English,Finance,,,,
4,Japanese,Restaurant,,,,
5,Japanese,Laptop,,,,
6,Japanese,Hotel,,,,
7,Japanese,Finance,,,,
8,Russian,Restaurant,0.460527,0.453111,0.485372,0.473588
9,Russian,Laptop,,,,


In [14]:
get_key_of_best_strategy("eng", "restaurant", df_subtasks[3])

'sc_guided'

## Export Predictions in Valid Format

In [15]:
strategy_export = "pred_dev"

In [16]:
for subtask in SUBTASKS:
    for language in LANGUAGES:
        for domain in DOMAINS:
            try:
                best_strategy = get_key_of_best_strategy(
                    language, domain, df_subtasks[subtask])
                predictions = get_performance(language, domain, subtask, strategy_export, llm="unsloth/gemma-3-27b-it-bnb-4bit")[1][best_strategy]
                output_dir = f"exported_predictions/subtask_{subtask}/pred_{language}_{domain}.jsonl"
                os.makedirs(os.path.dirname(output_dir), exist_ok=True)
                with open(output_dir, "w", encoding="utf-8") as f:
                    for pred in predictions:
                        f.write(json.dumps(pred, ensure_ascii=False) + "\n")
            except Exception as e:
                print(f"Error processing Subtask {subtask} - Language: {language}, Domain: {domain}: {e}")
                continue
            print(
                f"Subtask {subtask} - Language: {language}, Domain: {domain} => Best Strategy: {best_strategy}")

Error processing Subtask 3 - Language: eng, Domain: restaurant: [Errno 2] No such file or directory: 'results/results_pred_dev/unsloth_gemma-3-27b-it-bnb-4bit/3_eng_restaurant_0_temp0_with_guidance.jsonl'
Error processing Subtask 3 - Language: eng, Domain: laptop: [Errno 2] No such file or directory: 'results/results_pred_dev/unsloth_gemma-3-27b-it-bnb-4bit/3_eng_laptop_0_temp0_with_guidance.jsonl'
Error processing Subtask 3 - Language: eng, Domain: hotel: No performance data found for language: eng, domain: hotel
Error processing Subtask 3 - Language: eng, Domain: finance: No performance data found for language: eng, domain: finance
Error processing Subtask 3 - Language: jpn, Domain: restaurant: No performance data found for language: jpn, domain: restaurant
Error processing Subtask 3 - Language: jpn, Domain: laptop: No performance data found for language: jpn, domain: laptop
Error processing Subtask 3 - Language: jpn, Domain: hotel: No performance data found for language: jpn, domain