# Notebook: Create Latex Table for Language Analysis

This notebook is used to create a table to present the language analysis

Todo: Kommazahl korrekt

## Packages

In [1]:
import json

## Code

In [2]:
with open("language_statistics.json", 'r') as json_file:
    statistics = json.load(json_file)

In [3]:
def round_number(number, decimal_places=1):
    rounded_number = round(number, decimal_places)
    return rounded_number

### Real Examples

In [4]:
rows = ""
for n_real in [500, 1000, 1500]:
    st = statistics[f"{n_real}_0_only_real"]
    rows += f"{n_real} & {round_number(st['unique_lemmas_avg'])} & {round_number(st['avg_number_of_tokens_in_example_text'])}"
    for label in ["GENERAL-IMPRESSION", "FOOD", "SERVICE", "AMBIENCE", "PRICE"]:
        rows += f" & {st[f'n_unique_aspect_terms_{label}']}"
    rows += f' & {st["n_unique_explicit_aspects_total"]}'
    rows += "\\\\"
print(rows)


500 & 1514.4 & 13.1 & 15.0 & 141.6 & 56.2 & 37.2 & 18.6 & 255.0\\1000 & 2417.0 & 13.1 & 21.6 & 246.4 & 93.6 & 66.0 & 37.8 & 438.8\\1500 & 3130.6 & 13.1 & 32.0 & 333.8 & 119.8 & 94.0 & 52.8 & 593.6\\


### Llama-2-70B

In [5]:
for few_shot_condition in ["fixed", "random"]:
    print(few_shot_condition, ":")
    rows = ""
    for n_synth in [500, 1000, 1500]:
        st = statistics[f"0_{n_synth}_Llama70B_{few_shot_condition}"]
        rows += f"{n_synth} & {round_number(st['unique_lemmas_avg'])} & {round_number(st['avg_number_of_tokens_in_example_text'])}"
        for label in ["GENERAL-IMPRESSION", "FOOD", "SERVICE", "AMBIENCE", "PRICE"]:
            rows += f" & {st[f'n_unique_aspect_terms_{label}']}"
        rows += f' & {st["n_unique_explicit_aspects_total"]}'
        rows += "\\\\"
    print(rows)

fixed :
500 & 537.4 & 10.2 & 36.0 & 29.4 & 19.8 & 28.0 & 25.2 & 112.6\\1000 & 792.2 & 10.2 & 62.2 & 55.6 & 31.4 & 46.2 & 39.2 & 192.0\\1500 & 997.4 & 10.3 & 88.4 & 74.2 & 39.0 & 63.0 & 51.2 & 256.6\\
random :
500 & 596.4 & 10.1 & 38.8 & 34.4 & 23.0 & 29.8 & 27.6 & 131.6\\1000 & 876.2 & 10.1 & 64.2 & 60.6 & 38.6 & 52.0 & 41.8 & 220.8\\1500 & 1084.6 & 10.1 & 86.8 & 80.8 & 48.4 & 69.8 & 55.4 & 291.6\\


### GPT-3

In [6]:
for few_shot_condition in ["random", "fixed"]:
    print(few_shot_condition, ":")
    rows = ""
    for n_synth in [500, 1000, 1500]:
        st = statistics[f"0_{n_synth}_GPT-3_{few_shot_condition}"]
        rows += f"{n_synth} & {round_number(st['unique_lemmas_avg'])} & {round_number(st['avg_number_of_tokens_in_example_text'])}"
        for label in ["GENERAL-IMPRESSION", "FOOD", "SERVICE", "AMBIENCE", "PRICE"]:
            rows += f" & {st[f'n_unique_aspect_terms_{label}']}"
        rows += f' & {st["n_unique_explicit_aspects_total"]}'
        rows += "\\\\"
    print(rows)

random :


KeyError: '0_500_GPT-3_random'

In [7]:
statistics

{'500_0_only_real': {'n_real': 500,
  'n_synth': 0,
  'llm': None,
  'few-shot examples': 'random',
  'top_n_lemmas': 'essen, LOC, Service, freundlich, Bedienung',
  'unique_lemmas_avg': 1514.4,
  'avg_number_of_tokens_in_example_text': 13.082,
  'sd_number_of_tokens_in_example_text': 8.604700808279159,
  'n_unique_explicit_aspects_total': 255.0,
  'total_llm_invalid_xml_schema': 0,
  'total_llm_invalid_xml_tags': 0,
  'total_llm_aspect_polarity_in_text_but_not_in_label': 0,
  'total_llm_more_than_one_sentences': 0,
  'total_llm_retries': 0,
  'n_unique_aspect_terms_FOOD': 141.6,
  'n_unique_aspect_terms_SERVICE': 56.2,
  'n_unique_aspect_terms_PRICE': 18.6,
  'n_unique_aspect_terms_GENERAL-IMPRESSION': 15.0,
  'n_unique_aspect_terms_AMBIENCE': 37.2},
 '1000_0_only_real': {'n_real': 1000,
  'n_synth': 0,
  'llm': None,
  'few-shot examples': 'random',
  'top_n_lemmas': 'essen, LOC, Service, freundlich, Bedienung',
  'unique_lemmas_avg': 2417.0,
  'avg_number_of_tokens_in_example_text':