# Notebook: Analyse Language


## Packages


In [1]:
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from spacy.lang.de.stop_words import STOP_WORDS
from collections import Counter
import Levenshtein
import numpy as np
import random
import string
import spacy
import nltk
import json

## Constants


In [2]:
ASPECT_CATEGORIES = ["GENERAL-IMPRESSION",
                     "FOOD", "SERVICE", "AMBIENCE", "PRICE"]
LLMS = ["Llama70B", "GPT-3"]
FS_CONDITIONS = ["fixed", "random"]
PROMPTING_ENCODING = {"fixed": "25 fixed examples",
                      "random": "25 random examples"}
N_FOLDS = 3
CRITERIA_RS = "tag_with_polarity"
POLARITIES = ["POSITIVE", "NEGATIVE", "NEUTRAL"]
MENTIONING_TYPE = ["implicit", "explicit"]
COMBINATIONS = [f"{aspect}-{polarity}" for aspect in ["SERVICE", "FOOD",
                                                      "GENERAL-IMPRESSION", "AMBIENCE", "PRICE"] for polarity in POLARITIES]
RANDOM_STATE = 43

In [3]:
LLMS_ENCODED = {"GPT-3": "GPT-3.5-turbo", "Llama70B":"Llama-2-70B"}
ENCODE_CONDITION = {"fixed": "25 fixed examples", "random": "25 random examples"}

## Settings


In [4]:
nlp = spacy.load("de_core_news_lg")
nltk.download('punkt')
random.seed(RANDOM_STATE)

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/nils_hellwig/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Code


### Helper


In [5]:
def count_unique_sentences(sentences):
    unique_sentences = set(sentences)
    return len(unique_sentences)


def count_unique_tokens(tokens):
    unique_tokens = set(token.text for token in tokens)
    return len(unique_tokens)


def count_unique_lemmas(tokens):
    unique_lemmas = set(token.lemma_ for token in tokens)
    return len(unique_lemmas)


def get_avg_unique_words_in_k_words(tokens, n_selection=100, n_repetitions=1000):
    iterations_n_unique_words = []
    for i in range(n_repetitions):
        random_indices = random.sample(range(len(tokens)), n_selection)
        random_words = [tokens[index] for index in random_indices]
        n_unique_words = len(set(random_words))
        iterations_n_unique_words.append(n_unique_words)
    return np.mean(iterations_n_unique_words)


def average_word_level_levenshtein_distance(docs, norm=False):
    tokenized_texts = [
        [token.text for token in doc["tokenized_text"]] for doc in docs]

    total_distance = 0
    pair_count = 0

    for i in range(len(tokenized_texts)):
        for j in range(i + 1, len(tokenized_texts)):
            tokens1 = tokenized_texts[i]
            tokens2 = tokenized_texts[j]

            if len(tokens1) >= len(tokens2):
                max_tokens = len(tokens1)
            else:
                max_tokens = len(tokens2)

            distance = Levenshtein.distance(tokens1, tokens2)
            if norm:
                distance = distance / max_tokens
            total_distance += distance
            pair_count += 1

    average_distance = total_distance / pair_count if pair_count > 0 else 0
    return average_distance

In [6]:
def add_thousand_dots(n_sample):
    return f"{n_sample:,}"

### Load Datasets


In [7]:
dataset = {"synth": {}, "real": []}

# Load Synth
for llm in LLMS:
    dataset["synth"][llm] = {}
    for prompting in FS_CONDITIONS:
        dataset["synth"][llm][prompting] = []
        for split in range(5):
            with open(f"../07 train models/synth/{llm}/{prompting}/split_{split}.json", 'r', encoding='utf-8') as json_file:
                split_data = json.load(json_file)
            for example in split_data:
                example["tokenized_text"] = nlp(example["text"])
            dataset["synth"][llm][prompting].append(split_data)

# Load Real
for split in range(6):
    with open(f"../07 train models/real/split_{split}.json", 'r', encoding='utf-8') as json_file:
        split_data = json.load(json_file)
    for example in split_data:
        example["tokenized_text"] = nlp(example["text"])
    dataset["real"].append(split_data)

In [8]:
def get_one_hot(subset):
    labels_one_hot = []
    for i in range(len(subset)):
        tags_in_example = list(set([tag[CRITERIA_RS]
                               for tag in subset[i]["tags"]]))
        one_hot_encoded_combination = np.array(
            [1 if tag in tags_in_example else 0 for tag in COMBINATIONS])
        labels_one_hot.append(one_hot_encoded_combination)
    return labels_one_hot

In [9]:
for llm in LLMS:
    for few_shot_condition in FS_CONDITIONS:
        for iteration in range(5):
            if few_shot_condition == "random":
                subset = dataset["synth"][llm][few_shot_condition][iteration]
            else:
                subset = dataset["synth"][llm][few_shot_condition][iteration][475:]

            found_5_split = False
            restart_idx = 0
            while found_5_split == False:
                mskf = MultilabelStratifiedKFold(
                    n_splits=N_FOLDS, shuffle=True, random_state=RANDOM_STATE+restart_idx)
                section = []
                for train_index, test_index in mskf.split(subset, get_one_hot(subset)):
                    split_500 = [subset[i] for i in test_index]
                    section.append(split_500)

                if len(section[0]) == 500 and len(section[1]) == 500 and len(section[2]) == 500:
                    found_5_split = True

                restart_idx += 1

            dataset["synth"][llm][few_shot_condition][iteration] = section

In [10]:
real_examples = []
for i in [0, 1, 2, 3, 4, 5]:
    real_examples.append([])
    for k in [0, 1, 2]:
        if (i+k) < 6:
            t = i+k
        else:
            t = i+k - 6
        real_examples[i].append(dataset["real"][t])
dataset["real"] = real_examples

### Document Analysis


#### Synthetic Data


In [12]:
for idx_llm, llm in enumerate(LLMS):
    for idx_fsc, few_shot_condition in enumerate(FS_CONDITIONS):
        for idx_sample, n_sample in enumerate([500, 1000, 1500]):
            iterations_n_unique_tokens = []
            iterations_n_lemmas = []
            iterations_avg_unique_sentences = []
            for it in range(5):
                samples = [item for k in range(
                    int(n_sample / 500)) for item in dataset["synth"][llm][few_shot_condition][it][k]]
                n_unique_tokens = count_unique_tokens(
                    [token for example in samples for token in example["tokenized_text"]])
                n_unique_lemmas = count_unique_lemmas(
                    [token for example in samples for token in example["tokenized_text"]])
                n_unique_sentences = count_unique_sentences(
                    [example["text"] for example in samples])

                iterations_n_unique_tokens.append(n_unique_tokens)
                iterations_n_lemmas.append(n_unique_lemmas)
                iterations_avg_unique_sentences.append(n_unique_sentences)

            llm_print = "\multirow{6}{*}{\\textbf{" + \
                LLMS_ENCODED[llm] + \
                "}}" if idx_sample == 0 and idx_fsc == 0 else ""
            fs_condition_print = "\multirow{3}{*}{" + \
                ENCODE_CONDITION[few_shot_condition] + \
                "}" if idx_sample == 0 else ""

            print(llm_print, "&", fs_condition_print, "&", add_thousand_dots(n_sample), "&",
                  add_thousand_dots(
                      round(np.mean(iterations_avg_unique_sentences), 2)), "&",
                  add_thousand_dots(
                      round(np.mean(iterations_n_unique_tokens), 2)), "&",
                  add_thousand_dots(round(np.mean(iterations_n_lemmas), 2)), "\\\\")
        if idx_fsc == 0:
            print("\\arrayrulecolor{gray}\cline{2-6}\\arrayrulecolor{black}")
        else:
            print("\\hline")

\multirow{6}{*}{\textbf{Llama-2-70B}} & \multirow{3}{*}{25 fixed examples} & 500 & 480.0 & 694.4 & 536.6 \\
 &  & 1,000 & 934.0 & 1,023.8 & 787.0 \\
 &  & 1,500 & 1,383.2 & 1,269.2 & 973.0 \\
\arrayrulecolor{gray}\cline{2-6}\arrayrulecolor{black}
 & \multirow{3}{*}{25 random examples} & 500 & 485.4 & 751.8 & 580.2 \\
 &  & 1,000 & 949.4 & 1,103.0 & 846.8 \\
 &  & 1,500 & 1,400.0 & 1,380.4 & 1,054.0 \\
\hline
\multirow{6}{*}{\textbf{GPT-3.5-turbo}} & \multirow{3}{*}{25 fixed examples} & 500 & 307.6 & 289.6 & 208.2 \\
 &  & 1,000 & 549.0 & 369.6 & 264.4 \\
 &  & 1,500 & 769.8 & 428.4 & 307.0 \\
\arrayrulecolor{gray}\cline{2-6}\arrayrulecolor{black}
 & \multirow{3}{*}{25 random examples} & 500 & 317.0 & 295.4 & 217.0 \\
 &  & 1,000 & 561.2 & 389.4 & 281.2 \\
 &  & 1,500 & 782.2 & 456.6 & 328.4 \\
\hline


In [14]:
for idx_llm, llm in enumerate(LLMS):
    for idx_fsc, few_shot_condition in enumerate(FS_CONDITIONS):
        for idx_sample, n_sample in enumerate([500, 1000, 1500]):
            iterations_avg_unique_words_in_k_words = []
            iterations_avg_levenshtein_distance = []
            iterations_avg_levenshtein_distance_norm = []
            for it in range(5):
                samples = [item for k in range(
                    int(n_sample / 500)) for item in dataset["synth"][llm][few_shot_condition][it][k]]
                n_unique_words_in_k_words = get_avg_unique_words_in_k_words(
                    [token.text for example in samples for token in example["tokenized_text"]])
                avg_levenshtein_distance = average_word_level_levenshtein_distance(
                    samples)
                avg_levenshtein_distance_norm = average_word_level_levenshtein_distance(
                    samples, norm=True)

                iterations_avg_unique_words_in_k_words.append(
                    n_unique_words_in_k_words)
                iterations_avg_levenshtein_distance.append(
                    avg_levenshtein_distance)
                iterations_avg_levenshtein_distance_norm.append(
                    avg_levenshtein_distance_norm)
                
            llm_print = "\multirow{6}{*}{\\textbf{" + \
                LLMS_ENCODED[llm] + \
                "}}" if idx_sample == 0 and idx_fsc == 0 else ""
            fs_condition_print = "\multirow{3}{*}{" + \
                ENCODE_CONDITION[few_shot_condition] + \
                "}" if idx_sample == 0 else ""

            print(llm_print, "&", fs_condition_print, "&", add_thousand_dots(n_sample), "&",
                  add_thousand_dots(round(np.mean(iterations_avg_unique_words_in_k_words), 2)), "&",
                  add_thousand_dots(round(np.mean(iterations_avg_levenshtein_distance), 2)), "&",
                  add_thousand_dots(round(np.mean(iterations_avg_levenshtein_distance_norm), 2)))
        
        if idx_fsc == 0:
            print("\\arrayrulecolor{gray}\cline{2-6}\\arrayrulecolor{black}")
        else:
            print("\\hline")

\multirow{6}{*}{\textbf{Llama-2-70B}} & \multirow{3}{*}{25 fixed examples} & 500 & 59.2 & 11.08 & 0.85
 &  & 1,000 & 59.67 & 11.09 & 0.85
 &  & 1,500 & 59.63 & 11.06 & 0.85
\arrayrulecolor{gray}\cline{2-6}\arrayrulecolor{black}
 & \multirow{3}{*}{25 random examples} & 500 & 61.46 & 11.06 & 0.86
 &  & 1,000 & 61.33 & 11.04 & 0.86
 &  & 1,500 & 61.26 & 10.98 & 0.86
\hline
\multirow{6}{*}{\textbf{GPT-3.5-turbo}} & \multirow{3}{*}{25 fixed examples} & 500 & 47.82 & 9.79 & 0.79
 &  & 1,000 & 47.86 & 9.79 & 0.79
 &  & 1,500 & 48.01 & 9.8 & 0.79
\arrayrulecolor{gray}\cline{2-6}\arrayrulecolor{black}
 & \multirow{3}{*}{25 random examples} & 500 & 48.11 & 8.81 & 0.77
 &  & 1,000 & 48.24 & 8.84 & 0.77
 &  & 1,500 & 48.25 & 8.85 & 0.77
\hline


#### Real Data


In [None]:
for idx_sample, n_sample in enumerate([500, 1000, 1500]):
    iterations_n_unique_tokens = []
    iterations_n_lemmas = []
    iterations_avg_unique_sentences = []

    for it in range(5):
        samples = [item for k in range(
            int(n_sample / 500)) for item in dataset["real"][it][k]]
        n_unique_tokens = count_unique_tokens(
            [token for example in samples for token in example["tokenized_text"]])
        n_unique_lemmas = count_unique_lemmas(
            [token for example in samples for token in example["tokenized_text"]])
        n_unique_sentences = count_unique_sentences(
            [example["text"] for example in samples])

        iterations_n_unique_tokens.append(n_unique_tokens)
        iterations_n_lemmas.append(n_unique_lemmas)
        iterations_avg_unique_sentences.append(n_unique_sentences)

    data_source_print = "\multirow{3}{*}{\\textbf{Real Examples}}" if idx_sample == 0 else ""

    fs_condition_print = "\multirow{3}{*}{-}" if idx_sample == 0 else ""

    print(data_source_print, "&", fs_condition_print, "&", add_thousand_dots(n_sample), "&",
          add_thousand_dots(round(np.mean(iterations_avg_unique_sentences), 2)), "&",
          add_thousand_dots(round(np.mean(iterations_n_unique_tokens), 2)), "&",
          add_thousand_dots(round(np.mean(iterations_n_lemmas), 2)), "\\\\")
print("\\hline")

\multirow{3}{*}{\textbf{Real Examples}} & \multirow{3}{*}{-} & 500 & 496.4 & 1,914.8 & 1,492.8 \\
 &  & 1,000 & 988.8 & 3,064.2 & 2,352.4 \\
 &  & 1,500 & 1,480.8 & 3,998.6 & 3,041.0 \\
\hline


In [None]:
for idx_sample, n_sample in enumerate([500, 1000, 1500]):
    iterations_avg_unique_words_in_k_words = []
    iterations_avg_levenshtein_distance = []
    iterations_avg_levenshtein_distance_norm = []
    for it in range(5):
        samples = [item for k in range(
            int(n_sample / 500)) for item in dataset["real"][it][k]]

        n_unique_words_in_k_words = get_avg_unique_words_in_k_words(
            [token.text for example in samples for token in example["tokenized_text"]])
        avg_levenshtein_distance = average_word_level_levenshtein_distance(
            samples)
        avg_levenshtein_distance_norm = average_word_level_levenshtein_distance(
            samples, norm=True)

        iterations_avg_unique_words_in_k_words.append(
            n_unique_words_in_k_words)
        iterations_avg_levenshtein_distance.append(
            avg_levenshtein_distance)
        iterations_avg_levenshtein_distance_norm.append(
            avg_levenshtein_distance_norm)
        
    data_source_print = "\multirow{3}{*}{\\textbf{Real Examples}}" if idx_sample == 0 else ""
    fs_condition_print = "\multirow{3}{*}{-}" if idx_sample == 0 else ""

    print(data_source_print, "&", fs_condition_print, "&", add_thousand_dots(n_sample), "&",
          add_thousand_dots(
              round(np.mean(iterations_avg_unique_words_in_k_words), 2)), "&",
          add_thousand_dots(
              round(np.mean(iterations_avg_levenshtein_distance), 2)), "&",
          add_thousand_dots(round(np.mean(iterations_avg_levenshtein_distance_norm), 2)), "\\\\")
print("\\hline")

\multirow{3}{*}{\textbf{Real Examples}} & \multirow{3}{*}{-} & 500 & 78.32 & 16.37 & 0.93 \\
 &  & 1,000 & 78.35 & 16.42 & 0.93 \\
 &  & 1,500 & 78.22 & 16.39 & 0.93 \\
\hline


### Aspect Term Analysis

#### Synthetic Data

In [15]:
for idx_llm, llm in enumerate(LLMS):
    for idx_fsc, few_shot_condition in enumerate(FS_CONDITIONS):
        for idx_sample, n_sample in enumerate([500, 1000, 1500]):
            tags_from_splits_count = []
            tags_from_splits_count_implicit = []
            tags_from_splits_count_explicit = []
            count_unique_aspect_terms_in_split = []
            count_unique_aspect_terms_in_k_aspect_terms = []
            for it in range(5):
                tags = [tag for k in range(
                    int(n_sample / 500)) for example in dataset["synth"][llm][few_shot_condition][it][k] for tag in example["tags"]]
                tags_explicit = [tag["text"]
                                 for tag in tags if tag["type"] == "label-explicit"]
                tags_from_splits_count.append(len(tags))
                tags_from_splits_count_explicit.append(
                    len([tag for tag in tags if tag["type"] == "label-explicit"]))
                tags_from_splits_count_implicit.append(
                    len([tag for tag in tags if tag["type"] == "label-implicit"]))

                unique_tags = len(set(tags_explicit))

                # Calculate number of unique tokens in 100 aspect terms
                count_unique_aspect_terms_in_k_aspect_terms.append(
                    get_avg_unique_words_in_k_words(tags_explicit))

                count_unique_aspect_terms_in_split.append(unique_tags)

            llm_print = "\multirow{6}{*}{\\textbf{" + \
                LLMS_ENCODED[llm] + \
                "}}" if idx_sample == 0 and idx_fsc == 0 else ""
            fs_condition_print = "\multirow{3}{*}{" + \
                ENCODE_CONDITION[few_shot_condition] + \
                "}" if idx_sample == 0 else ""

            print(llm_print, "&", fs_condition_print,
                  "&", add_thousand_dots(n_sample),  # n samples
                  "&", add_thousand_dots(
                      round(np.mean(tags_from_splits_count), 2)),  # n aspects
                  "&", add_thousand_dots(
                      round(np.mean(tags_from_splits_count_implicit) / np.mean(tags_from_splits_count) * 100, 1)) + " \\%",  # % implicit
                  "\\textit{(SD = " + add_thousand_dots(round(np.std([a / b * 100 for a, b in zip(
                      tags_from_splits_count_implicit, tags_from_splits_count)]), 2)) + ")}",
                  "&", add_thousand_dots(
                      round(np.mean(tags_from_splits_count_explicit), 2)),  # n aspects
                  "&", add_thousand_dots(
                      round(np.mean(count_unique_aspect_terms_in_split), 2)),  # n unique
                  "&", add_thousand_dots(round(np.mean(count_unique_aspect_terms_in_k_aspect_terms), 1)), "\\\\")
        
        if idx_fsc == 0:
            print("\\arrayrulecolor{gray}\cline{2-8}\\arrayrulecolor{black}")
        else:
            print("\\hline")

\multirow{6}{*}{\textbf{Llama-2-70B}} & \multirow{3}{*}{25 fixed examples} & 500 & 795.6 & 27.2 \% \textit{(SD = 4.25)} & 578.8 & 104.8 & 34.1 \\
 &  & 1,000 & 1,591.4 & 27.8 \% \textit{(SD = 4.24)} & 1,149.4 & 170.0 & 33.9 \\
 &  & 1,500 & 2,388.0 & 28.0 \% \textit{(SD = 4.22)} & 1,720.4 & 220.0 & 33.4 \\
\arrayrulecolor{gray}\cline{2-8}\arrayrulecolor{black}
 & \multirow{3}{*}{25 random examples} & 500 & 704.6 & 26.6 \% \textit{(SD = 1.99)} & 517.2 & 116.8 & 38.0 \\
 &  & 1,000 & 1,407.8 & 26.3 \% \textit{(SD = 1.77)} & 1,037.4 & 194.4 & 39.0 \\
 &  & 1,500 & 2,109.6 & 26.5 \% \textit{(SD = 1.14)} & 1,550.0 & 256.6 & 38.9 \\
\hline
\multirow{6}{*}{\textbf{GPT-3.5-turbo}} & \multirow{3}{*}{25 fixed examples} & 500 & 795.6 & 35.1 \% \textit{(SD = 9.18)} & 516.2 & 38.8 & 17.9 \\
 &  & 1,000 & 1,591.4 & 35.1 \% \textit{(SD = 9.28)} & 1,032.4 & 50.6 & 17.5 \\
 &  & 1,500 & 2,388.0 & 35.1 \% \textit{(SD = 8.77)} & 1,550.4 & 61.8 & 17.4 \\
\arrayrulecolor{gray}\cline{2-8}\arrayrulecolor{bla

### Real Data

In [None]:
for idx_sample, n_sample in enumerate([500, 1000, 1500]):
    tags_from_splits_count = []
    tags_from_splits_count_implicit = []
    tags_from_splits_count_explicit = []
    count_unique_aspect_terms_in_split = []
    count_unique_aspect_terms_in_k_aspect_terms = []
    for it in range(5):
        tags = [tag for k in range(int(n_sample / 500))
                for example in dataset["real"][it][k] for tag in example["tags"]]
        tags_explicit = [tag["text"]
                         for tag in tags if tag["type"] == "label-explicit"]
        tags_from_splits_count.append(len(tags))
        tags_from_splits_count_explicit.append(
            len([tag for tag in tags if tag["type"] == "label-explicit"]))
        tags_from_splits_count_implicit.append(
            len([tag for tag in tags if tag["type"] == "label-implicit"]))

        unique_tags = len(set(tags_explicit))

        # Calculate number of unique tokens in 100 aspect terms
        count_unique_aspect_terms_in_k_aspect_terms.append(
            get_avg_unique_words_in_k_words(tags_explicit))

        count_unique_aspect_terms_in_split.append(unique_tags)

    data_source_print = "\multirow{3}{*}{\\textbf{Real Examples}}" if idx_sample == 0 else ""
    fs_condition_print = "\multirow{3}{*}{-}" if idx_sample == 0 else ""

    print(data_source_print, "&", fs_condition_print,
          "&", add_thousand_dots(n_sample),  # n samples
          "&", add_thousand_dots(
              round(np.mean(tags_from_splits_count), 2)),  # n aspects
          "&", add_thousand_dots(
              round(np.mean(tags_from_splits_count_implicit) / np.mean(tags_from_splits_count) * 100, 1)) + " \\%",  # % implicit
          "\\textit{(SD = " + add_thousand_dots(round(np.std([a / b * 100 for a, b in zip(
              tags_from_splits_count_implicit, tags_from_splits_count)]), 2)) + ")}",
          "&", add_thousand_dots(
              round(np.mean(tags_from_splits_count_explicit), 2)),  # n aspects
          "&", add_thousand_dots(
              round(np.mean(count_unique_aspect_terms_in_split), 2)),  # n unique
          "&", add_thousand_dots(round(np.mean(count_unique_aspect_terms_in_k_aspect_terms), 1)), "\\\\")

\multirow{3}{*}{\textbf{Real Examples}} & \multirow{3}{*}{-} & 500 & 703.2 & 26.5 \% \textit{(SD = 1.06)} & 517.2 & 256.4 & 68.3 \\
 &  & 1,000 & 1,403.8 & 26.6 \% \textit{(SD = 0.56)} & 1,029.8 & 438.4 & 68.2 \\
 &  & 1,500 & 2,107.2 & 26.8 \% \textit{(SD = 0.19)} & 1,542.4 & 595.6 & 68.0 \\


### Aspect Term Analysis (With Aspect Category)

### Synth Examples

In [None]:
for llm in LLMS:
    for few_shot_condition in FS_CONDITIONS:
        for aspect_category in ASPECT_CATEGORIES:
            for n_sample in [500, 1000, 1500]:
                tags_from_splits_count = []
                tags_from_splits_count_implicit = []
                tags_from_splits_count_explicit = []
                count_unique_aspect_terms_in_split = []
                count_unique_aspect_terms_in_k_aspect_terms = []
                for it in range(5):
                    tags = [tag for k in range(
                        int(n_sample / 500)) for example in dataset["synth"][llm][few_shot_condition][it][k] for tag in example["tags"] if tag["label"] == aspect_category]
                    tags_explicit = [tag["text"]
                                     for tag in tags if tag["type"] == "label-explicit"]
                    tags_from_splits_count.append(len(tags))
                    tags_from_splits_count_explicit.append(
                        len([tag for tag in tags if tag["type"] == "label-explicit"]))
                    tags_from_splits_count_implicit.append(
                        len([tag for tag in tags if tag["type"] == "label-implicit"]))

                    unique_tags = len(set(tags_explicit))

                    # Calculate number of unique tokens in 100 aspect terms
                    count_unique_aspect_terms_in_k_aspect_terms.append(
                        get_avg_unique_words_in_k_words(tags_explicit, n_selection=10))

                    count_unique_aspect_terms_in_split.append(unique_tags)
                print("\\textbf{"+LLMS_ENCODED[llm]+"}", "&", ENCODE_CONDITION[few_shot_condition], "&", aspect_category,
                      "&", add_thousand_dots(n_sample),
                      "&", add_thousand_dots(
                          round(np.mean(tags_from_splits_count), 2)),
                      "&", add_thousand_dots(
                    round(np.mean(tags_from_splits_count_implicit) / np.mean(tags_from_splits_count) * 100, 1)) + " \\%",  # % implicit
                    "\\textit{(SD = " + add_thousand_dots(round(np.std([a / b * 100 for a, b in zip(
                        tags_from_splits_count_implicit, tags_from_splits_count)]), 2)) + ")}",
                    "&", add_thousand_dots(round(
                        np.mean(count_unique_aspect_terms_in_split), 2)),
                    "&", add_thousand_dots(round(np.mean(count_unique_aspect_terms_in_k_aspect_terms), 2)), "\\\\")
        print("\\hline")

\textbf{Llama-2-70B} & 25 fixed examples & GENERAL-IMPRESSION & 500 & 155.4 & 43.0 \% \textit{(SD = 4.8)} & 34.6 & 7.51 \\
\textbf{Llama-2-70B} & 25 fixed examples & GENERAL-IMPRESSION & 1,000 & 312.2 & 44.1 \% \textit{(SD = 5.24)} & 57.0 & 7.5 \\
\textbf{Llama-2-70B} & 25 fixed examples & GENERAL-IMPRESSION & 1,500 & 468.0 & 42.6 \% \textit{(SD = 6.39)} & 78.4 & 7.52 \\
\textbf{Llama-2-70B} & 25 fixed examples & FOOD & 500 & 157.8 & 21.3 \% \textit{(SD = 4.72)} & 33.6 & 5.29 \\
\textbf{Llama-2-70B} & 25 fixed examples & FOOD & 1,000 & 316.4 & 22.4 \% \textit{(SD = 5.52)} & 56.0 & 5.2 \\
\textbf{Llama-2-70B} & 25 fixed examples & FOOD & 1,500 & 474.0 & 22.9 \% \textit{(SD = 5.09)} & 71.0 & 5.14 \\
\textbf{Llama-2-70B} & 25 fixed examples & SERVICE & 500 & 158.8 & 17.5 \% \textit{(SD = 3.74)} & 18.4 & 4.57 \\
\textbf{Llama-2-70B} & 25 fixed examples & SERVICE & 1,000 & 316.2 & 18.3 \% \textit{(SD = 4.48)} & 26.4 & 4.55 \\
\textbf{Llama-2-70B} & 25 fixed examples & SERVICE & 1,500 & 474.

### Real Examples

In [None]:
for aspect_category in ASPECT_CATEGORIES:
    for n_sample in [500, 1000, 1500]:
        tags_from_splits_count = []
        tags_from_splits_count_implicit = []
        tags_from_splits_count_explicit = []
        count_unique_aspect_terms_in_split = []
        count_unique_aspect_terms_in_k_aspect_terms = []
        for it in range(5):
            tags = [tag for k in range(int(n_sample / 500))
                    for example in dataset["real"][it][k] for tag in example["tags"] if tag["label"] == aspect_category]
            tags_explicit = [tag["text"]
                             for tag in tags if tag["type"] == "label-explicit"]
            tags_from_splits_count.append(len(tags))
            tags_from_splits_count_explicit.append(
                len([tag for tag in tags if tag["type"] == "label-explicit"]))
            tags_from_splits_count_implicit.append(
                len([tag for tag in tags if tag["type"] == "label-implicit"]))

            unique_tags = len(set(tags_explicit))

            # Calculate number of unique tokens in 100 aspect terms
            count_unique_aspect_terms_in_k_aspect_terms.append(
                get_avg_unique_words_in_k_words(tags_explicit, n_selection=10))

            count_unique_aspect_terms_in_split.append(unique_tags)
        print("\\textbf{Real Examples} &", aspect_category,
              "&", add_thousand_dots(n_sample),
              "&", add_thousand_dots(
                  round(np.mean(tags_from_splits_count), 2)),
              "&", add_thousand_dots(
                  round(np.mean(tags_from_splits_count_implicit), 2)),
              "&", add_thousand_dots(
                  round(np.mean(tags_from_splits_count_explicit), 2)),
              "&", add_thousand_dots(round(
                  np.mean(count_unique_aspect_terms_in_split), 2)),
              "&", add_thousand_dots(round(np.mean(count_unique_aspect_terms_in_k_aspect_terms), 2)), "\\\\")

\textbf{Real Examples} & GENERAL-IMPRESSION & 500 & 124.6 & 96.8 & 27.8 & 14.0 & 6.92 \\
\textbf{Real Examples} & GENERAL-IMPRESSION & 1,000 & 249.2 & 194.0 & 55.2 & 24.0 & 7.07 \\
\textbf{Real Examples} & GENERAL-IMPRESSION & 1,500 & 376.8 & 294.8 & 82.0 & 32.4 & 7.05 \\
\textbf{Real Examples} & FOOD & 500 & 281.2 & 30.8 & 250.4 & 144.8 & 8.71 \\
\textbf{Real Examples} & FOOD & 1,000 & 561.2 & 62.6 & 498.6 & 249.6 & 8.7 \\
\textbf{Real Examples} & FOOD & 1,500 & 839.4 & 94.6 & 744.8 & 339.8 & 8.69 \\
\textbf{Real Examples} & SERVICE & 500 & 174.8 & 39.6 & 135.2 & 51.4 & 7.17 \\
\textbf{Real Examples} & SERVICE & 1,000 & 349.8 & 79.6 & 270.2 & 87.2 & 7.14 \\
\textbf{Real Examples} & SERVICE & 1,500 & 525.8 & 118.8 & 407.0 & 116.8 & 7.13 \\
\textbf{Real Examples} & AMBIENCE & 500 & 80.0 & 12.0 & 68.0 & 38.0 & 7.79 \\
\textbf{Real Examples} & AMBIENCE & 1,000 & 159.0 & 23.4 & 135.6 & 66.4 & 7.81 \\
\textbf{Real Examples} & AMBIENCE & 1,500 & 238.8 & 34.4 & 204.4 & 93.2 & 7.8 \\
\textbf{R