# Evaluation Notebook

In [1]:
from evaluate import evaluate_predictions, read_jsonl_file
import pandas as pd
import os

In [2]:
VALID_COMBINATIONS = [
    (2, "eng", "restaurant"),
    (2, "eng", "laptop"),
    (2, "jpn", "hotel"),
    (2, "rus", "restaurant"),
    (2, "tat", "restaurant"),
    (2, "ukr", "restaurant"),
    (2, "zho", "restaurant"),
    (2, "zho", "laptop"),
    (3, "eng", "restaurant"),
    (3, "eng", "laptop"),
    (3, "jpn", "hotel"),
    (3, "rus", "restaurant"),
    (3, "tat", "restaurant"),
    (3, "ukr", "restaurant"),
    (3, "zho", "restaurant"),
    (3, "zho", "laptop"),
]

lang_map = {"eng": "English", "jpn": "Japanese", "rus": "Russian", "tat": "Tatar", "ukr": "Ukrainian", "zho": "Chinese"}
conditions = [("None", "no_sc_no_guided"), ("5", "sc_no_guided"), ("10", "sc_no_guided"), ("15", "sc_no_guided")]

## Validation Performance

In [3]:
mode = "dev-train"
eval_set = "dev"
guided = False
llm = "unsloth/gemma-3-27b-it-bnb-4bit"

In [4]:
cols = ["BL", "5", "10", "15"]

all_results = []
for subtask, language, dataset_name in VALID_COMBINATIONS:
    row = {"subtask": subtask, "language": lang_map[language], "domain": dataset_name.capitalize()}
    for condition, col_name in zip(conditions, cols):
        pred_file = f"exported_predictions/{mode}/{llm.replace("/", "_")}/{condition[0]}/{condition[1]}/subtask_{subtask}/pred_{language}_{dataset_name}.jsonl"
        gold_file = f"task-dataset/track_a/subtask_{subtask}/{language}/{language}_{dataset_name}_{eval_set}_task{subtask}.jsonl"
        predictions = read_jsonl_file(pred_file, task=subtask)
        golds = read_jsonl_file(gold_file, task=subtask)
        metrics = evaluate_predictions(golds, predictions, task=subtask) if predictions and golds else None
        row[col_name] = metrics['cF1'] * 100 if metrics else 0
    all_results.append(row)

df_all = pd.DataFrame(all_results)
df2 = df_all[df_all.subtask == 2].drop(columns="subtask")
df3 = df_all[df_all.subtask == 3].drop(columns="subtask")
df_merged = pd.merge(df2, df3, on=["language", "domain"], suffixes=("_S2", "_S3"))

def highlight_row(row, is_avg=False):
    res = row.copy()
    for suffix in ["_S2", "_S3"]:
        target_cols = [c + suffix for c in cols]
        vals = row[target_cols].astype(float)
        is_max = vals == vals.max()
        for c_orig, c_suff in zip(cols, target_cols):
            formatted_val = f"{vals[c_suff]:.2f}"
            res[c_suff] = f"\\textbf{{{formatted_val}}}" if is_max[c_suff] else formatted_val
    if is_avg:
        res["language"] = "\\textbf{Average}"
        res["domain"] = ""
    return res

df_formatted = df_merged.apply(lambda r: highlight_row(r), axis=1)

# Average calculations
avg_vals = df_merged.drop(columns=["language", "domain"]).mean()
avg_row_data = pd.Series({"language": "Average", "domain": "", **avg_vals.to_dict()})
avg_row_fmt = highlight_row(avg_row_data, is_avg=True)

df_final = pd.concat([df_formatted, avg_row_fmt.to_frame().T], ignore_index=True)

# Define MultiIndex for headers with three levels
mi_cols = [
    ("Language", "", ""), 
    ("Domain", "", ""),
    ("Subtask 2", "# SC Views", "BL"), ("Subtask 2", "# SC Views", "5"), ("Subtask 2", "# SC Views", "10"), ("Subtask 2", "# SC Views", "15"),
    ("Subtask 3", "# SC Views", "BL"), ("Subtask 3", "# SC Views", "5"), ("Subtask 3", "# SC Views", "10"), ("Subtask 3", "# SC Views", "15")
]
df_final.columns = pd.MultiIndex.from_tuples(mi_cols)

display(df_final)

# load muster 
with open("plots/muster/parameter_full.txt") as f:
    muster_table = f.read()
    
# convert df_final to 1D list of strings
flat_results = []
for _, row in df_final.iterrows():
    for item in row:
        flat_results.append(str(item))

# go from "xxxx" to "xxxx" in muster_table
for result in flat_results:
    muster_table = muster_table.replace("xxxx", result, 1)

print(muster_table)

Unnamed: 0_level_0,Language,Domain,Subtask 2,Subtask 2,Subtask 2,Subtask 2,Subtask 3,Subtask 3,Subtask 3,Subtask 3
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,# SC Views,# SC Views,# SC Views,# SC Views,# SC Views,# SC Views,# SC Views,# SC Views
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,BL,5,10,15,BL,5,10,15
0,English,Restaurant,77.93,78.35,78.15,\textbf{78.45},75.17,75.30,\textbf{75.39},75.26
1,English,Laptop,65.51,\textbf{66.01},64.71,65.56,35.57,34.62,\textbf{36.77},35.36
2,Japanese,Hotel,52.63,\textbf{54.89},53.91,54.28,35.93,\textbf{39.87},39.18,38.58
3,Russian,Restaurant,54.28,59.21,57.66,\textbf{59.28},49.54,\textbf{52.65},51.40,52.56
4,Tatar,Restaurant,52.72,52.83,52.99,\textbf{53.54},38.65,\textbf{44.65},43.95,43.56
5,Ukrainian,Restaurant,47.98,47.54,51.56,\textbf{52.01},44.16,45.87,\textbf{47.53},45.98
6,Chinese,Restaurant,65.12,65.05,\textbf{65.73},65.47,58.77,60.90,\textbf{60.95},60.78
7,Chinese,Laptop,45.15,45.30,45.40,\textbf{45.62},36.73,37.50,\textbf{38.33},38.15
8,\textbf{Average},,57.67,58.65,58.76,\textbf{59.28},46.82,48.92,\textbf{49.19},48.78


\midrule
English & Restaurant & 77.93 & 78.35 & 78.15 & \textbf{78.45} & 75.17 & 75.30 & \textbf{75.39} & 75.26 \\
English & Laptop & 65.51 & \textbf{66.01} & 64.71 & 65.56 & 35.57 & 34.62 & \textbf{36.77} & 35.36 \\
Japanese & Hotel & 52.63 & \textbf{54.89} & 53.91 & 54.28 & 35.93 & \textbf{39.87} & 39.18 & 38.58 \\
Russian & Restaurant & 54.28 & 59.21 & 57.66 & \textbf{59.28} & 49.54 & \textbf{52.65} & 51.40 & 52.56 \\
Tatar & Restaurant & 52.72 & 52.83 & 52.99 & \textbf{53.54} & 38.65 & \textbf{44.65} & 43.95 & 43.56 \\
Ukrainian & Restaurant & 47.98 & 47.54 & 51.56 & \textbf{52.01} & 44.16 & 45.87 & \textbf{47.53} & 45.98 \\
Chinese & Restaurant & 65.12 & 65.05 & \textbf{65.73} & 65.47 & 58.77 & 60.90 & \textbf{60.95} & 60.78 \\
Chinese & Laptop & 45.15 & 45.30 & 45.40 & \textbf{45.62} & 36.73 & 37.50 & \textbf{38.33} & 38.15 \\
\midrule
\textbf{Average} &  & 57.67 & 58.65 & 58.76 & \textbf{59.28} & 46.82 & 48.92 & \textbf{49.19} & 48.78 \\
\bottomrule


## Test Performance
(Daten noch nicht veröffentlicht, daher erwartbar kein Ergebnis)

In [5]:
mode = "dev-train"
eval_set = "dev"
guided = False
llm = "unsloth/gemma-3-27b-it-bnb-4bit"

def highlight_row_test(row):
    res = row.copy()
    metric_names = ["cPrec", "cRec", "cF1"]
    
    for metric in metric_names:
        # Finde alle Spalten für diese Metrik (z.B. ('BL', 'cPrec'), ('5', 'cPrec'), etc.)
        metric_cols = [col for col in row.index if isinstance(col, tuple) and col[1] == metric]
        if metric_cols:
            vals = row[metric_cols].astype(float)
            max_val = vals.max()
            for col in metric_cols:
                val = float(row[col])
                formatted_val = f"{val:.2f}"
                # Markiere fett (\textbf{}), wenn es der Maximalwert ist und > 0
                if val == max_val and max_val > 0:
                    res[col] = f"\\textbf{{{formatted_val}}}"
                else:
                    res[col] = formatted_val
    
    if res[("Dataset", "Language")] == "Average":
        res[("Dataset", "Language")] = "\\textbf{Average}"
        res[("Dataset", "Domain")] = ""
        
    return res

for subtask_val in [2, 3]:
    res_list = []
    task_combos = [c for c in VALID_COMBINATIONS if c[0] == subtask_val]
    
    for _, language, dataset_name in task_combos:
        row = {
            ("Dataset", "Language"): lang_map[language],
            ("Dataset", "Domain"): dataset_name.capitalize()
        }
        for condition, col_name in zip(conditions, cols):
            pred_file = f"exported_predictions/{mode}/{llm.replace('/', '_')}/{condition[0]}/{condition[1]}/subtask_{subtask_val}/pred_{language}_{dataset_name}.jsonl"
            gold_file = f"task-dataset/track_a/subtask_{subtask_val}/{language}/{language}_{dataset_name}_{eval_set}_task{subtask_val}.jsonl"
            
            predictions = read_jsonl_file(pred_file, task=subtask_val) if os.path.exists(pred_file) else []
            golds = read_jsonl_file(gold_file, task=subtask_val) if os.path.exists(gold_file) else []
            
            metrics = evaluate_predictions(golds, predictions, task=subtask_val) if predictions and golds else None
            
            row[(col_name, "cPrec")] = metrics['cPrecision'] * 100 if metrics else 0.0
            row[(col_name, "cRec")] = metrics['cRecall'] * 100 if metrics else 0.0
            row[(col_name, "cF1")] = metrics['cF1'] * 100 if metrics else 0.0
        res_list.append(row)
    
    df = pd.DataFrame(res_list)
    df.columns = pd.MultiIndex.from_tuples(df.columns)
    
    # Durchschnittsberechnung
    numeric_cols = df.select_dtypes(include=['number']).columns
    means = df[numeric_cols].mean()
    
    avg_row_dict = {
        ("Dataset", "Language"): "Average",
        ("Dataset", "Domain"): ""
    }
    for col in numeric_cols:
        avg_row_dict[col] = means[col]
        
    df = pd.concat([df, pd.DataFrame([avg_row_dict])], ignore_index=True)
    
    # Formatierung und \textbf{}-Highlighting anwenden
    df_final = df.apply(highlight_row_test, axis=1)
    
    print(f"\nPerformance für Subtask {subtask_val} ({eval_set.capitalize()}-Set)")
    display(df_final)
    
    # load muster 
    with open("plots/muster/performance_full.txt") as f:
        muster_table = f.read()
    
    # convert df_final to 1D list of strings
    flat_results = []
    for _, row in df_final.iterrows():
        for item in row:
            flat_results.append(str(item))
    # go from "xxxx" to "xxxx" in muster_table
    for result in flat_results:
        muster_table = muster_table.replace("xxxx", result, 1)

    print(muster_table)


Performance für Subtask 2 (Dev-Set)


Unnamed: 0_level_0,Dataset,Dataset,BL,BL,BL,5,5,5,10,10,10,15,15,15
Unnamed: 0_level_1,Language,Domain,cPrec,cRec,cF1,cPrec,cRec,cF1,cPrec,cRec,cF1,cPrec,cRec,cF1
0,English,Restaurant,78.91,\textbf{76.97},77.93,79.84,76.91,78.35,\textbf{80.38},76.04,78.15,80.05,76.91,\textbf{78.45}
1,English,Laptop,68.32,\textbf{62.93},65.51,\textbf{69.98},62.47,\textbf{66.01},69.52,60.53,64.71,68.86,62.56,65.56
2,Japanese,Hotel,54.24,51.12,52.63,57.54,\textbf{52.48},\textbf{54.89},\textbf{57.62},50.66,53.91,57.17,51.67,54.28
3,Russian,Restaurant,50.21,59.07,54.28,55.86,62.98,59.21,\textbf{56.84},58.51,57.66,55.93,\textbf{63.06},\textbf{59.28}
4,Tatar,Restaurant,48.04,\textbf{58.40},52.72,50.05,55.94,52.83,50.41,55.85,52.99,\textbf{50.72},56.69,\textbf{53.54}
5,Ukrainian,Restaurant,44.22,52.45,47.98,45.23,50.11,47.54,\textbf{50.35},52.82,51.56,49.69,\textbf{54.56},\textbf{52.01}
6,Chinese,Restaurant,64.7,\textbf{65.55},65.12,66.02,64.11,65.05,\textbf{67.45},64.08,\textbf{65.73},66.49,64.48,65.47
7,Chinese,Laptop,42.9,\textbf{47.65},45.15,44.10,46.58,45.30,\textbf{45.23},45.56,45.40,44.67,46.62,\textbf{45.62}
8,\textbf{Average},,56.44,59.27,57.67,58.58,58.95,58.65,\textbf{59.73},58.01,58.76,59.20,\textbf{59.57},\textbf{59.28}


\midrule
English & Restaurant & 78.91 & \textbf{76.97} & 77.93 & 79.84 & 76.91 & 78.35 & \textbf{80.38} & 76.04 & 78.15 & 80.05 & 76.91 & \textbf{78.45} \\
English & Laptop & 68.32 & \textbf{62.93} & 65.51 & \textbf{69.98} & 62.47 & \textbf{66.01} & 69.52 & 60.53 & 64.71 & 68.86 & 62.56 & 65.56 \\
Japanese & Hotel & 54.24 & 51.12 & 52.63 & 57.54 & \textbf{52.48} & \textbf{54.89} & \textbf{57.62} & 50.66 & 53.91 & 57.17 & 51.67 & 54.28 \\
Russian & Restaurant & 50.21 & 59.07 & 54.28 & 55.86 & 62.98 & 59.21 & \textbf{56.84} & 58.51 & 57.66 & 55.93 & \textbf{63.06} & \textbf{59.28} \\
Tatar & Restaurant & 48.04 & \textbf{58.40} & 52.72 & 50.05 & 55.94 & 52.83 & 50.41 & 55.85 & 52.99 & \textbf{50.72} & 56.69 & \textbf{53.54} \\
Ukrainian & Restaurant & 44.22 & 52.45 & 47.98 & 45.23 & 50.11 & 47.54 & \textbf{50.35} & 52.82 & 51.56 & 49.69 & \textbf{54.56} & \textbf{52.01} \\
Chinese & Restaurant & 64.70 & \textbf{65.55} & 65.12 & 66.02 & 64.11 & 65.05 & \textbf{67.45} & 64.08 & \textbf{65.7

Unnamed: 0_level_0,Dataset,Dataset,BL,BL,BL,5,5,5,10,10,10,15,15,15
Unnamed: 0_level_1,Language,Domain,cPrec,cRec,cF1,cPrec,cRec,cF1,cPrec,cRec,cF1,cPrec,cRec,cF1
0,English,Restaurant,76.61,\textbf{73.79},75.17,77.35,73.37,75.30,\textbf{78.06},72.89,\textbf{75.39},77.30,73.32,75.26
1,English,Laptop,37.03,\textbf{34.23},35.57,36.77,32.71,34.62,\textbf{40.05},33.98,\textbf{36.77},37.92,33.13,35.36
2,Japanese,Hotel,35.64,36.23,35.93,41.86,\textbf{38.06},\textbf{39.87},\textbf{43.77},35.47,39.18,41.16,36.3,38.58
3,Russian,Restaurant,45.14,54.88,49.54,48.89,\textbf{57.04},\textbf{52.65},49.53,53.41,51.40,\textbf{49.59},55.91,52.56
4,Tatar,Restaurant,35.75,42.06,38.65,43.22,\textbf{46.18},\textbf{44.65},\textbf{44.17},43.74,43.95,43.35,43.77,43.56
5,Ukrainian,Restaurant,40.1,\textbf{49.14},44.16,43.28,48.80,45.87,\textbf{46.21},48.93,\textbf{47.53},43.38,48.91,45.98
6,Chinese,Restaurant,58.2,59.35,58.77,61.85,\textbf{59.98},60.90,\textbf{63.32},58.75,\textbf{60.95},61.68,59.9,60.78
7,Chinese,Laptop,34.74,\textbf{38.96},36.73,37.92,37.10,37.50,\textbf{40.84},36.1,\textbf{38.33},39.41,36.98,38.15
8,\textbf{Average},,45.4,48.58,46.82,48.89,\textbf{49.15},48.92,\textbf{50.74},47.91,\textbf{49.19},49.22,48.53,48.78


\midrule
English & Restaurant & 76.61 & \textbf{73.79} & 75.17 & 77.35 & 73.37 & 75.30 & \textbf{78.06} & 72.89 & \textbf{75.39} & 77.30 & 73.32 & 75.26 \\
English & Laptop & 37.03 & \textbf{34.23} & 35.57 & 36.77 & 32.71 & 34.62 & \textbf{40.05} & 33.98 & \textbf{36.77} & 37.92 & 33.13 & 35.36 \\
Japanese & Hotel & 35.64 & 36.23 & 35.93 & 41.86 & \textbf{38.06} & \textbf{39.87} & \textbf{43.77} & 35.47 & 39.18 & 41.16 & 36.30 & 38.58 \\
Russian & Restaurant & 45.14 & 54.88 & 49.54 & 48.89 & \textbf{57.04} & \textbf{52.65} & 49.53 & 53.41 & 51.40 & \textbf{49.59} & 55.91 & 52.56 \\
Tatar & Restaurant & 35.75 & 42.06 & 38.65 & 43.22 & \textbf{46.18} & \textbf{44.65} & \textbf{44.17} & 43.74 & 43.95 & 43.35 & 43.77 & 43.56 \\
Ukrainian & Restaurant & 40.10 & \textbf{49.14} & 44.16 & 43.28 & 48.80 & 45.87 & \textbf{46.21} & 48.93 & \textbf{47.53} & 43.38 & 48.91 & 45.98 \\
Chinese & Restaurant & 58.20 & 59.35 & 58.77 & 61.85 & \textbf{59.98} & 60.90 & \textbf{63.32} & 58.75 & \textbf{60.9

## Performance Comparisson with other papers

In [6]:
performance_papers = {
    "subtask_2": {
        "eng": {
            "laptop": {
                "top_1": {"cF1": 75.34, "username": "nchellwig"},
                "top_2": {"cF1": 73.21, "username": "janedoe456"},
                "top_3": {"cF1": 70.89, "username": "alice789"}
            },
            "restaurant": {
                "top_1": {"cF1": 80.12, "username": "nchellwig"},
                "top_2": {"cF1": 78.45, "username": "datawizard"},
                "top_3": {"cF1": 76.78, "username": "mlenthusiast"}
            }
        },
        "zho": {
            "laptop": {
                "top_1": {"cF1": 68.90, "username": "zhangwei"},
                "top_2": {"cF1": 66.75, "username": "lihua"},
                "top_3": {"cF1": 64.32, "username": "wangming"}
            },
            "restaurant": {
                "top_1": {"cF1": 72.34, "username": "nchellwig"},
                "top_2": {"cF1": 70.21, "username": "yangli"},
                "top_3": {"cF1": 68.45, "username": "zhaojun"}
            }
        },
        "jpn": {
            "hotel": {
                "top_1": {"cF1": 77.89, "username": "suzukitomo"},
                "top_2": {"cF1": 75.67, "username": "tanakayu"},
                "top_3": {"cF1": 73.45, "username": "yamadak"}
            }
        },
        "rus": {
            "restaurant": {
                "top_1": {"cF1": 65.43, "username": "nchellwig"},
                "top_2": {"cF1": 63.21, "username": "petr_petrov"},
                "top_3": {"cF1": 61.09, "username": "maria_s"}
            }
        },
        "tat": {
            "restaurant": {
                "top_1": {"cF1": 55.67, "username": "tatar_pro"},
                "top_2": {"cF1": 53.45, "username": "nlp_kazan"},
                "top_3": {"cF1": 51.23, "username": "bert_tat"}
            }
        },
        "ukr": {
            "restaurant": {
                "top_1": {"cF1": 62.12, "username": "nchellwig"},
                "top_2": {"cF1": 60.45, "username": "kyiv_ml"},
                "top_3": {"cF1": 58.78, "username": "stepan_b"}
            }
        }
    },
    "subtask_3": {
        "eng": {
            "laptop": {
                "top_1": {"cF1": 78.56, "username": "mlmaster"},
                "top_2": {"cF1": 76.34, "username": "aibot"},
                "top_3": {"cF1": 74.12, "username": "deeplearner"}
            },
            "restaurant": {
                "top_1": {"cF1": 82.45, "username": "nchellwig"},
                "top_2": {"cF1": 80.67, "username": "aienthusiast"},
                "top_3": {"cF1": 78.90, "username": "techlover"}
            }
        },
        "zho": {
            "laptop": {
                "top_1": {"cF1": 70.12, "username": "lianghua"},
                "top_2": {"cF1": 68.34, "username": "nchellwig"},
                "top_3": {"cF1": 66.78, "username": "sunwei"}
            },
            "restaurant": {
                "top_1": {"cF1": 74.56, "username": "guojun"},
                "top_2": {"cF1": 72.89, "username": "fengli"},
                "top_3": {"cF1": 71.23, "username": "huaming"}
            }
        },
        "jpn": {
            "hotel": {
                "top_1": {"cF1": 75.23, "username": "nchellwig"},
                "top_2": {"cF1": 73.45, "username": "osaka_nlp"},
                "top_3": {"cF1": 71.67, "username": "kyoto_ai"}
            }
        },
        "rus": {
            "restaurant": {
                "top_1": {"cF1": 63.89, "username": "moscow_data"},
                "top_2": {"cF1": 61.56, "username": "siberia_nlp"},
                "top_3": {"cF1": 59.23, "username": "ural_ml"}
            }
        },
        "tat": {
            "restaurant": {
                "top_1": {"cF1": 54.12, "username": "nchellwig"},
                "top_2": {"cF1": 52.34, "username": "tatar_nlp"},
                "top_3": {"cF1": 50.56, "username": "volga_ml"}
            }
        },
        "ukr": {
            "restaurant": {
                "top_1": {"cF1": 60.78, "username": "lviv_data"},
                "top_2": {"cF1": 58.90, "username": "odessa_nlp"},
                "top_3": {"cF1": 57.12, "username": "dnipro_ml"}
            }
        }
    }
}

In [7]:
def format_paper_val(entry):
    if not entry:
        return ""
    user = entry["username"].replace("_", "\\_")
    score = f"{entry['cF1']:.2f}"
    text = f"{user} ({score})"
    if user == "nchellwig":
        return f"\\textbf{{{text}}}"
    return text

paper_rows = []

# Sammle alle Sprachen und Domains aus VALID_COMBINATIONS (um Konsistenz zu wahren)
unique_combos = sorted(list(set((lang, dom) for _, lang, dom in VALID_COMBINATIONS)))

for lang, dom in unique_combos:
    row = {
        ("Dataset", "Language"): lang_map[lang],
        ("Dataset", "Domain"): dom.capitalize()
    }
    
    for st in [2, 3]:
        st_key = f"subtask_{st}"
        st_label = f"Subtask {st}"
        
        # Hol Daten aus Dictionary (falls vorhanden)
        st_data = performance_papers.get(st_key, {}).get(lang, {}).get(dom, {})
        
        for rank in ["top_1", "top_2", "top_3"]:
            rank_label = rank.replace("_", " ").capitalize()
            entry = st_data.get(rank, None)
            row[(st_label, rank_label)] = format_paper_val(entry)
            
    paper_rows.append(row)

df_papers = pd.DataFrame(paper_rows)
df_papers.columns = pd.MultiIndex.from_tuples(df_papers.columns)

display(df_papers)

# load muster 
with open("plots/muster/competition_full.txt") as f:
    muster_table = f.read()
    
# convert df_papers to 1D list of strings
flat_results = []
for _, row in df_papers.iterrows():
    for item in row:
        flat_results.append(str(item))

# go from "xxxx" to "xxxx" in muster_table
for result in flat_results:
    muster_table = muster_table.replace("xxxx", result, 1)

print(muster_table)

Unnamed: 0_level_0,Dataset,Dataset,Subtask 2,Subtask 2,Subtask 2,Subtask 3,Subtask 3,Subtask 3
Unnamed: 0_level_1,Language,Domain,Top 1,Top 2,Top 3,Top 1,Top 2,Top 3
0,English,Laptop,\textbf{nchellwig (75.34)},janedoe456 (73.21),alice789 (70.89),mlmaster (78.56),aibot (76.34),deeplearner (74.12)
1,English,Restaurant,\textbf{nchellwig (80.12)},datawizard (78.45),mlenthusiast (76.78),\textbf{nchellwig (82.45)},aienthusiast (80.67),techlover (78.90)
2,Japanese,Hotel,suzukitomo (77.89),tanakayu (75.67),yamadak (73.45),\textbf{nchellwig (75.23)},osaka\_nlp (73.45),kyoto\_ai (71.67)
3,Russian,Restaurant,\textbf{nchellwig (65.43)},petr\_petrov (63.21),maria\_s (61.09),moscow\_data (63.89),siberia\_nlp (61.56),ural\_ml (59.23)
4,Tatar,Restaurant,tatar\_pro (55.67),nlp\_kazan (53.45),bert\_tat (51.23),\textbf{nchellwig (54.12)},tatar\_nlp (52.34),volga\_ml (50.56)
5,Ukrainian,Restaurant,\textbf{nchellwig (62.12)},kyiv\_ml (60.45),stepan\_b (58.78),lviv\_data (60.78),odessa\_nlp (58.90),dnipro\_ml (57.12)
6,Chinese,Laptop,zhangwei (68.90),lihua (66.75),wangming (64.32),lianghua (70.12),\textbf{nchellwig (68.34)},sunwei (66.78)
7,Chinese,Restaurant,\textbf{nchellwig (72.34)},yangli (70.21),zhaojun (68.45),guojun (74.56),fengli (72.89),huaming (71.23)


\midrule
English & Laptop & \textbf{nchellwig (75.34)} & janedoe456 (73.21) & alice789 (70.89) & mlmaster (78.56) & aibot (76.34) & deeplearner (74.12) \\
English & Restaurant & \textbf{nchellwig (80.12)} & datawizard (78.45) & mlenthusiast (76.78) & \textbf{nchellwig (82.45)} & aienthusiast (80.67) & techlover (78.90) \\
Japanese & Hotel & suzukitomo (77.89) & tanakayu (75.67) & yamadak (73.45) & \textbf{nchellwig (75.23)} & osaka\_nlp (73.45) & kyoto\_ai (71.67) \\
Russian & Restaurant & \textbf{nchellwig (65.43)} & petr\_petrov (63.21) & maria\_s (61.09) & moscow\_data (63.89) & siberia\_nlp (61.56) & ural\_ml (59.23) \\
Tatar & Restaurant & tatar\_pro (55.67) & nlp\_kazan (53.45) & bert\_tat (51.23) & \textbf{nchellwig (54.12)} & tatar\_nlp (52.34) & volga\_ml (50.56) \\
Ukrainian & Restaurant & \textbf{nchellwig (62.12)} & kyiv\_ml (60.45) & stepan\_b (58.78) & lviv\_data (60.78) & odessa\_nlp (58.90) & dnipro\_ml (57.12) \\
Chinese & Laptop & zhangwei (68.90) & lihua (66.75) & wa