In [28]:
import sys
import os
import pandas as pd
import numpy as np
import json

# Add paths for custom modules
sys.path.append(os.path.abspath("../../zero-shot-absa-quad"))
sys.path.append(os.path.abspath("../../zero-shot-absa-quad/plots"))

In [29]:
# from collections import Counter
from performance_helper import compute_f1_scores_quad, compute_scores_single, merge_aspect_lists
from table_tool import insert_line, display_table, round_numbers, minimize, bolden, bolden_column, bolden_column_header
import pandas as pd
import numpy as np
# import itertools
# import shutil
# import io, re
import pandas as pd
import json

In [30]:
N_SEEDS = 5
TASKS = ["tasd", "asqp"]
DATASETS = ["rest15", "rest16", "flightabsa", "coursera", "hotels"]
METHODS = ["dlo", "llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit"]
AUG_TECHNIQUES = ["eda", "qaie"]

raw_dataset_to_formatted = {"rest16": "Rest16", "rest15": "Rest15", "flightabsa": "FlightABSA", "coursera": "OATS Coursera", "hotels": "OATS Hotels"}
format_dataset_to_raw = {"Rest16": "rest16", "Rest15": "rest15", "FlightABSA": "flightabsa", "coursera": "OATS Coursera", "OATS Hotels": "hotels"}
raw_method_to_formatted = {"llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit": "FT-LLama-3-8B", "dlo": "DLO \citep{hu2022improving}"}
format_method_to_raw = {"FT-LLama-3-8B": "llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit", "DLO \citep{hu2022improving}": "dlo"}
raw_aug_to_formatted = {"eda": "EDA", "QAIE": "QAIE", "llm_annotator": "LLM-Annotator"}
format_aug_to_raw = {"EDA": "eda", "-": "-", "LLM-Annotator": "llm_annotator"}

In [31]:
def add_element_scores(loaded_json, task):
    labels = loaded_json["all_labels"]
    preds = loaded_json["all_preds"]
    seed_scores = compute_f1_scores_quad(preds, labels)
    seed_scores_ac = compute_scores_single(preds, labels, "single_ac")
    seed_scores_at = compute_scores_single(preds, labels, "single_at")
    seed_scores_pol = compute_scores_single(preds, labels, "single_pol")

    seed_scores["ac"] = seed_scores_ac
    seed_scores["at"] = seed_scores_at
    seed_scores["pol"] = seed_scores_pol
    if task == "asqp":
        seed_scores_ot = compute_scores_single(preds, labels, "single_ot")
        seed_scores["ot"] = seed_scores_ot
    return seed_scores

In [32]:
def calc_mean(scores):
    averages = {}
    for key in scores[0].keys():
        if isinstance(scores[0][key], dict):  # Falls geschachtelte Dicts vorhanden sind
            averages[key] = {subkey: np.mean([s[key][subkey] for s in scores]) for subkey in scores[0][key]}
        else:
            averages[key] = np.mean([s[key] for s in scores])
    return averages

In [33]:
# 1. Load LLM-annotated fine-tuned scores
scores_llm_ann_train = {}

for dataset in DATASETS:
    for task in TASKS:
        for method in METHODS:
            for fs in [0, 10, 50]:
                for n_ann_ex in ["full"]:

                    scores = []
                    for seed in range(N_SEEDS):
                        with open(
                            f"../_out_fine_tunings/01_llm_annotate_train/{method}_{n_ann_ex}_{task}_{fs}_{dataset}_{seed}.json"
                        ) as f:
                            loaded_json = json.load(f)
                            seed_scores = add_element_scores(loaded_json, task)
                            scores.append(seed_scores)
                    scores_llm_ann_train[
                        f"{method}_{n_ann_ex}_{task}_{fs}_{dataset}"
                    ] = calc_mean(scores)

In [34]:
scores_llm_ann_train.keys(
)

dict_keys(['dlo_full_tasd_0_rest15', 'dlo_full_tasd_10_rest15', 'dlo_full_tasd_50_rest15', 'llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit_full_tasd_0_rest15', 'llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit_full_tasd_10_rest15', 'llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit_full_tasd_50_rest15', 'dlo_full_asqp_0_rest15', 'dlo_full_asqp_10_rest15', 'dlo_full_asqp_50_rest15', 'llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit_full_asqp_0_rest15', 'llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit_full_asqp_10_rest15', 'llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit_full_asqp_50_rest15', 'dlo_full_tasd_0_rest16', 'dlo_full_tasd_10_rest16', 'dlo_full_tasd_50_rest16', 'llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit_full_tasd_0_rest16', 'llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit_full_tasd_10_rest16', 'llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit_full_tasd_50_rest16', 'dlo_full_asqp_0_rest16', 'dlo_full_asqp_10_rest16', 'dlo_full_asqp_50_rest16', 'llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit_full_asqp_0_rest16', 'llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit_fu

In [35]:
# 2. Load Augmented fine-tuned scores
scores_traditional_aug = {}

for dataset in DATASETS:
    for task in TASKS:
        for fs in [10, 50]:
                scores = []
                for seed in range(N_SEEDS):
                    path = f"../../QAIE-ABSA-2025-adaption/03_results/{task}_{dataset}_fs_{fs}_{seed}.json"

                    with open(path) as f:
                        loaded_json = json.load(f)
                        seed_scores = add_element_scores(loaded_json, task)
                        scores.append(seed_scores)
                scores_traditional_aug[
                    f"qaie_{task}_{fs}_{dataset}"
                ] = calc_mean(scores)

In [36]:
for dataset in DATASETS:
    for task in TASKS:
        for aug in ["eda"]:
            for method in METHODS:
                for fs in [10, 50]:
                    for n_ann_ex in [2, 5, 10]:
                        scores = []
                        for seed in range(N_SEEDS):
                            path = f"../_out_fine_tunings/03_traditional_augmentation/{method}_{aug}_{n_ann_ex}_{task}_{fs}_{dataset}_{seed}.json"
                            with open(
                                path
                            ) as f:
                                loaded_json = json.load(f)
                                seed_scores = add_element_scores(loaded_json, task)
                                scores.append(seed_scores)
                        scores_traditional_aug[
                            f"{method}_{aug}_{n_ann_ex}_{task}_{fs}_{dataset}"
                        ] = calc_mean(scores)

In [37]:
# 4. Load methods baselines
scores_00_baseline = {}

with open("../../zero-shot-absa-quad/plots/past_results.json") as f:
    past_results = json.load(f)

for dataset in DATASETS:
    for task in TASKS:
        for method in METHODS:
            for n_ann_ex in [10, 50, "full"]:

                scores = []
                for seed in range(N_SEEDS):
                    if n_ann_ex == "full":
                        file_path = f"../../zero-shot-absa-quad/generations/00_baselines/training_{task}_{dataset}_seed-{seed}_n-train_{method}.json"
                    else:
                        file_path = f"../../zero-shot-absa-quad/generations/00_baselines/training_{task}_{dataset}_seed-{seed}_n-train_{method}_{n_ann_ex}.json"
                    with open(file_path) as f:
                        loaded_json = json.load(f)
                        seed_scores = add_element_scores(loaded_json, task)
                        scores.append(seed_scores)
                scores_mean = calc_mean(scores)

                scores_00_baseline[f"{method}_{n_ann_ex}_{task}_{dataset}"] = (
                    scores_mean
                )

                for metric in ["f1", "precision", "recall"]:
                    if n_ann_ex == "full":
                        try:
                            scores_00_baseline[f"{method}_{n_ann_ex}_{task}_{dataset}"][
                                metric
                            ] = past_results[task][method][dataset][metric]
                        except:
                            pass

In [38]:
# Todo: Nachtr√§glicher Filter zero/few shot

In [39]:
# 5. Load zero-shot scores
scores_zeroshot = {}

for dataset in DATASETS:
    for task in TASKS:
        for fs in [0, 10, 20, 30, 40, 50]:
                scores = []
                for seed in range(N_SEEDS):
                    with open(
                        f"../../zero-shot-absa-quad/generations/zeroshot/{task}_{dataset}_test_gemma3:27b_{seed}_label_{fs}.json"
                    ) as f:
                        loaded_json_raw = json.load(f)

                        loaded_json = {
                            "all_preds": [j["pred_label"] for j in loaded_json_raw],
                            "all_labels": [j["tuple_list"] for j in loaded_json_raw],
                        }

                        seed_scores = add_element_scores(loaded_json, task)

                        scores.append(seed_scores)
                scores_zeroshot[f"{task}_{fs}_{dataset}"] = calc_mean(scores)

# WITH SELF-Consistency
for dataset in DATASETS:
    for task in TASKS:
        for fs in [0, 10, 20, 30, 40, 50]:
                all_example_data = []
                for seed in range(N_SEEDS):
                    with open(
                        f"../../zero-shot-absa-quad/generations/zeroshot/{task}_{dataset}_test_gemma3:27b_{seed}_label_{fs}.json"
                    ) as f:
                        loaded_json_raw = json.load(f)

                        loaded_json = {
                            "all_preds": [j["pred_label"] for j in loaded_json_raw],
                            "all_labels": [j["tuple_list"] for j in loaded_json_raw],
                        }

                        all_example_data.append(loaded_json)

                all_labels = all_example_data[0]["all_labels"]
                all_preds = [[] for _ in range(len(all_labels))]
                for seed in range(0, N_SEEDS):
                    for idx in range(len(all_labels)):
                        all_preds[idx].append(all_example_data[seed]["all_preds"][idx])
                        if seed == N_SEEDS - 1:
                            all_preds[idx] = merge_aspect_lists(all_preds[idx])
                            all_preds[idx] = [list(p) for p in all_preds[idx]]

                loaded_json = {
                    "all_preds": all_preds,
                    "all_labels": all_labels,
                }

                scores = add_element_scores(loaded_json, task)
                scores_zeroshot[f"{task}_{fs}_{dataset}_sc"] = scores

In [40]:
def get_n_train_qaie(task="tasd", dataset="rest16", fs=2):
    path = f"../../QAIE-ABSA-2025-adaption/01_augmentations/fs_examples/{task}/{dataset}/fs_{fs}/aug.txt"
    # count number of lines in the file
    with open(path, "r") as f:
        lines = f.readlines()
        n_train = len(lines)
    return n_train

In [41]:
print(scores_zeroshot.keys())
print(scores_00_baseline.keys())
print(scores_llm_ann_train.keys())
print(scores_traditional_aug.keys())

dict_keys(['tasd_0_rest15', 'tasd_10_rest15', 'tasd_20_rest15', 'tasd_30_rest15', 'tasd_40_rest15', 'tasd_50_rest15', 'asqp_0_rest15', 'asqp_10_rest15', 'asqp_20_rest15', 'asqp_30_rest15', 'asqp_40_rest15', 'asqp_50_rest15', 'tasd_0_rest16', 'tasd_10_rest16', 'tasd_20_rest16', 'tasd_30_rest16', 'tasd_40_rest16', 'tasd_50_rest16', 'asqp_0_rest16', 'asqp_10_rest16', 'asqp_20_rest16', 'asqp_30_rest16', 'asqp_40_rest16', 'asqp_50_rest16', 'tasd_0_flightabsa', 'tasd_10_flightabsa', 'tasd_20_flightabsa', 'tasd_30_flightabsa', 'tasd_40_flightabsa', 'tasd_50_flightabsa', 'asqp_0_flightabsa', 'asqp_10_flightabsa', 'asqp_20_flightabsa', 'asqp_30_flightabsa', 'asqp_40_flightabsa', 'asqp_50_flightabsa', 'tasd_0_coursera', 'tasd_10_coursera', 'tasd_20_coursera', 'tasd_30_coursera', 'tasd_40_coursera', 'tasd_50_coursera', 'asqp_0_coursera', 'asqp_10_coursera', 'asqp_20_coursera', 'asqp_30_coursera', 'asqp_40_coursera', 'asqp_50_coursera', 'tasd_0_hotels', 'tasd_10_hotels', 'tasd_20_hotels', 'tasd_30

In [42]:
FT_APPROACHES = ["llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit", "DLO"]  # , "Llama-3-8B FT"]
FT_ENCODING = {"FT-LLama-3-8B": "llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit", "DLO": "dlo", "Llama-3-8B FT": "llama"}
FT_ENCODING_REVERSE = {v: k for k, v in FT_ENCODING.items()}

N_TRAIN_EDA = [2, 5, 10]
N_SHOTS = [10, 50]


def create_f1_plot(tasks=["tasd"], metrics=["f1"]):

    n_annotated_example_column = (
        [10]
        * (len(FT_APPROACHES) * 2 + len(N_TRAIN_EDA) * len(FT_APPROACHES) + 1)
        + [50]
        * (len(FT_APPROACHES) * 2 + len(N_TRAIN_EDA) * len(FT_APPROACHES) + 1) + ["full" for _ in range(len(FT_APPROACHES))] + [0, 10, 50]
    )

    approaches_column = (
        FT_APPROACHES
        + ["LLMA \\textbackslash w " + approach for approach in FT_APPROACHES]
        + [
            "EDA \\textbackslash w " + approach
            for approach in FT_APPROACHES
            for _ in range(len(N_TRAIN_EDA))
        ]
        + ["QAIE"]
    ) * 2 + [f"{approach}" for approach in FT_APPROACHES] + ["Gemma-3-27B (Prompting)" for _ in range(len(N_SHOTS) + 1)]

    n_train_column = []

    for fs in N_SHOTS:
        n_train_column += (
            [fs] * len(FT_APPROACHES)
            + ["full"] * len(FT_APPROACHES)
            + [fs + n * fs for _ in FT_APPROACHES for n in N_TRAIN_EDA]
            + [
                " / ".join(
                    [
                        str(
                            np.round(
                                np.mean(
                                    [
                                        get_n_train_qaie(task=task, dataset=ds, fs=fs)
                                        for ds in DATASETS
                                    ]
                                ),
                                1,
                            )
                        )
                        for task in tasks
                    ]
                )
            ]
        ) 
    
    n_train_column += ["full" for _ in FT_APPROACHES]
    n_train_column += [0, 10, 50]

    performance_scores = {
        dataset: { task: {metric: [] for metric in metrics} for task in tasks }
        for dataset in DATASETS 
    }
    
    print(len(n_annotated_example_column), len(approaches_column), len(n_train_column))
    

    for dataset in DATASETS:
        for task in tasks:
            for metric in metrics:

                for fs in [10, 50]:
                    performance_scores[dataset][task][metric].extend(
                        [
                            scores_00_baseline[f"{method}_{fs}_{task}_{dataset}"][
                                metric
                            ]
                            for method in METHODS
                        ]
                    )
                    performance_scores[dataset][task][metric].extend(
                        [
                            scores_llm_ann_train[
                                f"{method}_full_{task}_{fs}_{dataset}"
                            ][metric]
                            for method in METHODS
                        ]
                    )
                    performance_scores[dataset][task][metric].extend(
                        [
                            scores_traditional_aug[
                                f"{method}_eda_{n_train}_{task}_{fs}_{dataset}"
                            ][metric]
                            for method in METHODS
                            for n_train in N_TRAIN_EDA
                        ]
                    )
                    performance_scores[dataset][task][metric].append(
                        scores_traditional_aug[f"qaie_{task}_{fs}_{dataset}"][metric]
                    )
                    
                for method in METHODS:
                   performance_scores[dataset][task][metric].append(
                        scores_00_baseline[f"{method}_full_{task}_{dataset}"][metric]
                   )
                for fs in [0, 10, 50]:
                   performance_scores[dataset][task][metric].append(
                    scores_zeroshot[f"{task}_{fs}_{dataset}_sc"][metric]
                   )

    # Create DataFrame
    print(len(n_annotated_example_column), len(approaches_column), len(n_train_column))
    df = pd.DataFrame(
        {
            "\# Annotated examples": n_annotated_example_column,
            "Approach": approaches_column,
            "\# Train": n_train_column,
            **{
                f"{dataset}_{task}_{metric}": performance_scores[
                    dataset
                ][task][metric]
                for dataset in DATASETS
                for task in tasks
                for metric in metrics
            },
        }
    )

    bolden_column = [
        f"{dataset}_{task}_{metric}"
        for dataset in DATASETS
        for task in tasks
        for metric in metrics
    ]

    df = round_numbers(df, bolden_column, n_rest=2)
    # df = minimize(df, ["\# Annotated examples"])
    # df = bolden(df, bolden_column, "\# Annotated examples")

    # for i in range(len(df.columns)):
    #     print(df.columns[i])
    #     df.rename(columns={df.columns[i]: "\\textbf{" + df.columns[i] + "}"}, inplace=True)

    column_format = (
        "p{5cm}p{6cm}" + "".join("r" for i in range(len(df.columns) - 3)) + "r"
    )

    latex_tabelle = df.to_latex(index=False, escape=False, column_format=column_format)

    # latex_tabelle = insert_line(latex_tabelle, 5, "black", 1, 8, double_line=False)
    # latex_tabelle = insert_line(latex_tabelle, 2, "gray!80", 2, 8, double_line=False)
    # display_table(latex_tabelle)

    return df


table_out = create_f1_plot(
    tasks=["tasd", "asqp"], metrics=["f1"]
)
table_out

27 27 27
27 27 27


Unnamed: 0,\# Annotated examples,Approach,\# Train,rest15_tasd_f1,rest15_asqp_f1,rest16_tasd_f1,rest16_asqp_f1,flightabsa_tasd_f1,flightabsa_asqp_f1,coursera_tasd_f1,coursera_asqp_f1,hotels_tasd_f1,hotels_asqp_f1
0,10,llm_Meta-Llama-3.1-8B-Instruct-bnb-4bit,10,15.84,4.37,13.59,5.18,16.07,4.87,22.93,4.47,18.07,3.53
1,10,DLO,10,31.79,15.38,36.26,15.63,36.62,18.5,21.41,8.2,31.31,16.55
2,10,LLMA \textbackslash w llm_Meta-Llama-3.1-8B-In...,full,49.23,37.19,62.37,46.2,61.4,46.47,39.22,23.37,55.27,31.44
3,10,LLMA \textbackslash w DLO,full,53.22,37.47,63.02,43.84,61.73,46.01,38.46,23.41,55.07,32.42
4,10,EDA \textbackslash w llm_Meta-Llama-3.1-8B-Ins...,30,24.58,6.74,16.97,9.27,21.18,12.13,28.14,9.75,22.73,5.98
5,10,EDA \textbackslash w llm_Meta-Llama-3.1-8B-Ins...,60,26.65,8.68,18.06,10.7,21.66,12.4,28.25,11.73,23.95,7.51
6,10,EDA \textbackslash w llm_Meta-Llama-3.1-8B-Ins...,110,29.31,9.22,19.16,11.24,22.66,11.77,28.48,13.0,24.57,7.44
7,10,EDA \textbackslash w DLO,30,35.96,16.24,47.23,19.62,35.36,19.21,22.76,8.9,28.15,17.32
8,10,EDA \textbackslash w DLO,60,39.59,16.85,41.92,21.59,32.69,22.23,30.03,14.18,33.52,19.29
9,10,EDA \textbackslash w DLO,110,40.1,13.69,40.2,21.22,31.38,21.35,31.79,12.72,31.15,18.49


In [43]:
'''
\begin{table*}[t]
\centering
\small
\setlength{\tabcolsep}{4pt}
\resizebox{2.0\columnwidth}{!}{%
\begin{tabular}{@{}llccccccccccc@{}}
\toprule
\multirow{2}{*}{\textbf{Approach}} & \multicolumn{2}{c}{\textbf{\# Train}} & \multicolumn{2}{c}{\textbf{Rest15}} & \multicolumn{2}{c}{\textbf{Rest16}} & \multicolumn{2}{c}{\textbf{Coursera}} & \multicolumn{2}{c}{\textbf{Hotels}} & \multicolumn{2}{c}{\textbf{FlightABSA}} \\
\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7} \cmidrule(lr){8-9} \cmidrule(lr){10-11} \cmidrule(l){12-13}
& TASD & ASQP & TASD & ASQP & TASD & ASQP & TASD & ASQP & TASD & ASQP & TASD & ASQP  \\
\midrule
Gemma-3-27B (Prompting) & 0 & 0 & 30.36 & 24.73 & 45.51 & 31.62 & 34.12 & 13.36 & 38.97 & 23.02 & 41.25 & 33.18 \\
Gemma-3-27B (Prompting) & 10 & 10 & 54.47 & 39.95 & 66.75 & 48.24 & 42.16 & 22.31 & 56.51 & 31.41 & 58.73 & 42.29 \\
\midrule
\multicolumn{13}{@{}l}{\textit{SOTA approaches trained on 10 examples or all training examples annotated by Gemma-3-27B, LLM-as-a-annotator (LLMA)}} \\
\midrule
Paraphrase & 10 & 10 & 8.75 & 1.32 & 6.66 & 3.27 & 19.35 & 4.75 & 14.91 & 2.63 & 19.82 & 7.45 \\
DLO & 10 & 10 & 15.84 & 4.37 & 13.59 & 6.85 & 25.61 & 4.47 & 18.07 & 3.53 & 22.14 & 9.26 \\
LLMA w/ Paraphrase & full & full & 49.09 & 35.04 & 62.74 & 45.32 & 40.25 & 22.47 & 55.69 & 32.83 & 54.31 & 41.56 \\
LLMA w/ DLO & full & full & 49.23 & 37.19 & 62.37 & 44.98 & 41.37 & 23.37 & 55.27 & 31.44 & 55.09 & 43.87 \\
\midrule
\multicolumn{13}{@{}l}{\textit{SOTA approaches trained on 10 examples + EDA- or QAIE-augmented examples}} \\
\midrule
EDA w/ Paraphrase & 30 & 30 & 13.15 & 1.74 & 7.67 & 3.95 & 22.48 & 6.61 & 22.05 & 3.47 & 24.37 & 10.23 \\
EDA w/ Paraphrase & 60 & 60 & 23.20 & 10.42 & 16.35 & 8.76 & 27.59 & 9.47 & 25.85 & 6.15 & 28.91 & 15.74 \\
EDA w/ Paraphrase & 110 & 110 & 26.07 & 10.52 & 16.82 & 9.05 & 30.17 & 10.35 & 24.95 & 5.84 & 29.85 & 16.47 \\
EDA w/ DLO & 30 & 30 & 24.58 & 6.74 & 16.97 & 8.33 & 29.85 & 9.75 & 22.73 & 5.98 & 28.44 & 13.92 \\
EDA w/ DLO & 60 & 60 & 26.65 & 8.68 & 18.06 & 9.14 & 31.36 & 11.73 & 23.95 & 7.51 & 30.79 & 17.36 \\
EDA w/ DLO & 110 & 110 & 29.31 & 9.22 & 19.16 & 10.43 & 32.87 & 13.00 & 24.57 & 7.44 & 32.68 & 18.25 \\
QAIE & 26.4 & 45.2 & 20.11 & 9.96 & 12.37 & 7.15 & 28.40 & 17.54 & 21.69 & 9.86 & 25.83 & 12.51 \\
\midrule
\midrule
Gemma-3-27B (Prompting) & 50 & 50 & 62.12 & 41.74 & 68.53 & 49.87 & 45.87 & 25.86 & 62.97 & 43.83 & 67.45 & 53.21 \\
\midrule
\multicolumn{13}{@{}l}{\textit{SOTA approaches trained on 50 examples or all training examples annotated by Gemma-3-27B, LLM-as-a-annotator (LLMA)}} \\
\midrule
Paraphrase & 50 & 50 & 36.92 & 25.55 & 35.87 & 24.12 & 35.78 & 19.38 & 40.10 & 23.09 & 39.65 & 30.76 \\
DLO & 50 & 50 & 39.54 & 26.63 & 43.95 & 28.44 & 38.46 & 19.08 & 44.72 & 27.20 & 43.88 & 33.47 \\
LLMA w/ Paraphrase & full & full & 56.21 & 37.61 & 62.20 & 44.73 & 43.46 & 25.71 & 60.58 & 43.19 & 62.94 & 51.85 \\
LLMA w/ DLO & full & full & 58.40 & 40.38 & 62.03 & 45.02 & 44.52 & 25.69 & 61.43 & 44.24 & 64.76 & 54.37 \\
\midrule
\multicolumn{13}{@{}l}{\textit{SOTA approaches trained on 50 examples + EDA- or QAIE-augmented examples}} \\
\midrule
EDA w/ Paraphrase & 150 & 150 & 42.70 & 27.55 & 41.59 & 27.85 & 39.73 & 19.90 & 43.44 & 27.96 & 45.28 & 34.60 \\
EDA w/ Paraphrase & 300 & 300 & 42.29 & 28.96 & 42.57 & 28.62 & 40.25 & 21.08 & 44.67 & 28.04 & 46.12 & 35.97 \\
EDA w/ Paraphrase & 550 & 550 & 41.83 & 28.70 & 43.01 & 28.77 & 40.09 & 21.37 & 42.70 & 27.97 & 45.76 & 35.42 \\
EDA w/ DLO & 150 & 150 & 43.86 & 30.41 & 46.79 & 31.43 & 40.94 & 23.53 & 45.89 & 31.39 & 48.35 & 38.72 \\
EDA w/ DLO & 300 & 300 & 44.76 & 30.90 & 46.99 & 31.99 & 41.53 & 23.53 & 45.20 & 32.55 & 49.03 & 39.87 \\
EDA w/ DLO & 550 & 550 & 44.14 & 30.93 & 45.49 & 31.25 & 41.34 & 24.57 & 45.13 & 32.51 & 48.66 & 39.45 \\
QAIE & 141.8 & 248.8 & 45.01 & 33.87 & 45.09 & 30.79 & 41.05 & 22.45 & 50.51 & 35.95 & 50.37 & 41.92 \\
\bottomrule
\end{tabular}
}
\caption{Performance comparison of different approaches on aspect-based sentiment analysis tasks across five datasets. Results are reported for two main metrics: TASD F1 (Target Aspect Sentiment Detection) and ASQP F1 (Aspect Sentiment Quad Prediction).}
\label{tab:results}
\end{table*}
'''

'\n\x08egin{table*}[t]\n\\centering\n\\small\n\\setlength{\tabcolsep}{4pt}\n\resizebox{2.0\\columnwidth}{!}{%\n\x08egin{tabular}{@{}llccccccccccc@{}}\n\toprule\n\\multirow{2}{*}{\textbf{Approach}} & \\multicolumn{2}{c}{\textbf{\\# Train}} & \\multicolumn{2}{c}{\textbf{Rest15}} & \\multicolumn{2}{c}{\textbf{Rest16}} & \\multicolumn{2}{c}{\textbf{Coursera}} & \\multicolumn{2}{c}{\textbf{Hotels}} & \\multicolumn{2}{c}{\textbf{FlightABSA}} \\\n\\cmidrule(lr){2-3} \\cmidrule(lr){4-5} \\cmidrule(lr){6-7} \\cmidrule(lr){8-9} \\cmidrule(lr){10-11} \\cmidrule(l){12-13}\n& TASD & ASQP & TASD & ASQP & TASD & ASQP & TASD & ASQP & TASD & ASQP & TASD & ASQP  \\\n\\midrule\nGemma-3-27B (Prompting) & 0 & 0 & 30.36 & 24.73 & 45.51 & 31.62 & 34.12 & 13.36 & 38.97 & 23.02 & 41.25 & 33.18 \\\nGemma-3-27B (Prompting) & 10 & 10 & 54.47 & 39.95 & 66.75 & 48.24 & 42.16 & 22.31 & 56.51 & 31.41 & 58.73 & 42.29 \\\n\\midrule\n\\multicolumn{13}{@{}l}{\textit{SOTA approaches trained on 10 examples or all training 