# Notebook: Create Examples for Prompts

## Packages

In [2]:
from helper_synthesis import get_examples_as_text
from itertools import cycle, islice
import numpy as np
import tiktoken
import random
import json

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/nils_hellwig/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Parameters

In [3]:
SEED = 42
random.seed(SEED)
N_RETRY_SETS = 25

In [4]:
# Setup Classes/Polarities for Synthesis
CLASSES  = ["GENERAL-IMPRESSION", "FOOD", "SERVICE", "AMBIENCE", "PRICE"]
POLARITIES = ["POSITIVE", "NEUTRAL", "NEGATIVE"]
COMBINATIONS = [(aspect, polarity) for polarity in POLARITIES for aspect in CLASSES]

## Code

### Code to Count Prompt Size

In [7]:
with open('../prompt_template.txt', 'r') as file:
    PROMPT_TEMPLATE = file.read()

In [8]:
with open(f"../07 train models/real/split_4.json", 'r', encoding='utf-8') as json_file:
    dataset = json.load(json_file)

In [9]:
label = [('GENERAL-IMPRESSION', 'POSITIVE')]

In [10]:
examples = [example["id"] for example in dataset[:40]]

In [11]:
random.shuffle(dataset)

In [12]:
def get_n_examples_for_class(n_examples=5, aspect_category="FOOD", examples=[]):
    found_examples = []
    i = 0
    while len(found_examples) < n_examples:
        if aspect_category in [tag["label"] for tag in dataset[i]["tags"]] and dataset[i]["id"] not in examples:
            found_examples.append(dataset[i])
        i += 1
    return examples + [example["id"] for example in found_examples]


In [13]:
examples = []
for ac in CLASSES:
    examples = get_n_examples_for_class(n_examples=5, aspect_category=ac, examples=examples)
random.shuffle(examples)
        

In [14]:
few_shot_examples = [entry for entry in dataset if entry['id'] in examples]

examples_text = get_examples_as_text(few_shot_examples)
prompt_footer = f'\nLabel:{str(label)}\nPrediction:'
prompt = PROMPT_TEMPLATE + examples_text + prompt_footer
print(prompt)

Erzeuge genau einen Satz einer Restaurant-Bewertung, die für das Training eines Modells für die Aspekt-basierte Sentiment Analyse verwendet werden kann.
Gegeben ist ein Label in Form eines Arrays, wobei ein oder mehrere Tupel (Aspekt-Kategorie, Sentiment-Polarität) gegeben sind.
Für ein Label wird dann eine deutschsprachige Prediction erzeugt, wobei die Prediction ausschließlich die in dem Label definierten Kombinationen aus Aspekt-Kategorie und Aspekt-Polarität adressiert.

Folgende Aspekt-Kategorien werden betrachtet: 

* "FOOD" - Aspekte in Bezug auf das Essen im allgemeinen oder bestimmte Speisen und Getränke
* "SERVICE" - Aspekte in Bezug auf den Service im allgemeinen oder Einstellung und Professionalität des Personals, die Wartezeiten oder Service-Dienstleistungen wie Speisenmitnahme
* "PRICE" - Aspekte in Bezug auf den Preis im allgemeinen oder Speisen, Getränke oder andere Leistungen des Restaurants, deren Preis bewertet wird.
* "AMBIENCE" - Aspekte in Bezug auf das Ambiente i

### See Examples

In [213]:
# TWO ASPECTS EXAMPLE: 0d6a451c-e730-4bee-82bf-906bbf552b10
idx = 12
dataset[idx]["id"], dataset[idx]["text"], dataset[idx]["tags"]

('4dad8de4-9f51-40bf-aeb8-998a8df599b6',
 'Da passt das unverschämte Servicepersonal ideal dazu.',
 [{'end': 41,
   'start': 26,
   'tag_with_polarity': 'SERVICE-NEGATIVE',
   'tag_with_polarity_and_type': 'SERVICE-NEGATIVE-explicit',
   'text': 'Servicepersonal',
   'type': 'label-explicit',
   'label': 'SERVICE',
   'polarity': 'NEGATIVE'}])

In [214]:
# GENERAL-IMPRESSION:: 7942cf45-690f-4100-98f6-f26b5b9177af
print([(example["id"], example["text"], example["tags"])
      for example in dataset if example["id"] == "7942cf45-690f-4100-98f6-f26b5b9177af"][0])

('7942cf45-690f-4100-98f6-f26b5b9177af', 'Sehr schönes Restaurant.', [{'end': 23, 'start': 13, 'tag_with_polarity': 'GENERAL-IMPRESSION-POSITIVE', 'tag_with_polarity_and_type': 'GENERAL-IMPRESSION-POSITIVE-explicit', 'text': 'Restaurant', 'type': 'label-explicit', 'label': 'GENERAL-IMPRESSION', 'polarity': 'POSITIVE'}])


In [215]:
# FOOD:: a05d0240-9c5d-4ba4-9059-7108a6f1c5a9
print([(example["id"], example["text"], example["tags"])
      for example in dataset if example["id"] == "a05d0240-9c5d-4ba4-9059-7108a6f1c5a9"][0])

('a05d0240-9c5d-4ba4-9059-7108a6f1c5a9', 'Die Bratwurst war unglaublich lecker und perfekt gewürzt.', [{'end': 13, 'start': 4, 'tag_with_polarity': 'FOOD-POSITIVE', 'tag_with_polarity_and_type': 'FOOD-POSITIVE-explicit', 'text': 'Bratwurst', 'type': 'label-explicit', 'label': 'FOOD', 'polarity': 'POSITIVE'}])


In [216]:
# SERVICE:: 0428a331-ebcf-4410-87ee-a140c0364d38 
print([(example["id"], example["text"], example["tags"])
      for example in dataset if example["id"] == "0428a331-ebcf-4410-87ee-a140c0364d38"][0])

('0428a331-ebcf-4410-87ee-a140c0364d38', 'Bedienung leider nicht aufmerksam.', [{'end': 9, 'start': 0, 'tag_with_polarity': 'SERVICE-NEGATIVE', 'tag_with_polarity_and_type': 'SERVICE-NEGATIVE-explicit', 'text': 'Bedienung', 'type': 'label-explicit', 'label': 'SERVICE', 'polarity': 'NEGATIVE'}])


In [217]:
# PRICE:: 30526068-ebb3-4640-b4a7-6b303620af81
print([(example["id"], example["text"], example["tags"])
      for example in dataset if example["id"] == "30526068-ebb3-4640-b4a7-6b303620af81"][0])

('30526068-ebb3-4640-b4a7-6b303620af81', 'Die Preise sind unangemessen hoch, da lässt sich wohl der Chef seinen Namen bezahlen.', [{'end': 10, 'start': 4, 'tag_with_polarity': 'PRICE-NEGATIVE', 'tag_with_polarity_and_type': 'PRICE-NEGATIVE-explicit', 'text': 'Preise', 'type': 'label-explicit', 'label': 'PRICE', 'polarity': 'NEGATIVE'}])


In [218]:
# AMBIENCE:: 15764c1d-9397-4cae-81a0-7e911ba2150d
print([(example["id"], example["text"], example["tags"])
      for example in dataset if example["id"] == "15764c1d-9397-4cae-81a0-7e911ba2150d"][0])

('15764c1d-9397-4cae-81a0-7e911ba2150d', 'Es war viel zu laut, wie im Club.', [{'end': 0, 'start': 0, 'tag_with_polarity': 'AMBIENCE-NEGATIVE', 'tag_with_polarity_and_type': 'AMBIENCE-NEGATIVE-no-phrase-implicit', 'text': 'NULL', 'type': 'label-implicit', 'label': 'AMBIENCE', 'polarity': 'NEGATIVE'}])


In [219]:
# FOOD/PRICE:: 0d6a451c-e730-4bee-82bf-906bbf552b10
print([(example["id"], example["text"], example["tags"])
      for example in dataset if example["id"] == "0d6a451c-e730-4bee-82bf-906bbf552b10"][0])

('0d6a451c-e730-4bee-82bf-906bbf552b10', 'Essen lecker, jedoch überteuert.', [{'end': 5, 'start': 0, 'tag_with_polarity': 'FOOD-POSITIVE', 'tag_with_polarity_and_type': 'FOOD-POSITIVE-explicit', 'text': 'Essen', 'type': 'label-explicit', 'label': 'FOOD', 'polarity': 'POSITIVE'}, {'end': 0, 'start': 0, 'tag_with_polarity': 'PRICE-NEGATIVE', 'tag_with_polarity_and_type': 'PRICE-NEGATIVE-no-phrase-implicit', 'text': 'NULL', 'type': 'label-implicit', 'label': 'PRICE', 'polarity': 'NEGATIVE'}])


In [220]:
# SERVICE/AMBIENCE/FOOD:: 40bbda81-2d31-446f-b48f-b0c4a09a9334
print([(example["id"], example["text"], example["tags"])
      for example in dataset if example["id"] == "40bbda81-2d31-446f-b48f-b0c4a09a9334"][0])

('40bbda81-2d31-446f-b48f-b0c4a09a9334', 'Tolles Essen, tolle Atmosphäre und ganz netter und aufmerksamer Service!', [{'end': 12, 'start': 7, 'tag_with_polarity': 'FOOD-POSITIVE', 'tag_with_polarity_and_type': 'FOOD-POSITIVE-explicit', 'text': 'Essen', 'type': 'label-explicit', 'label': 'FOOD', 'polarity': 'POSITIVE'}, {'end': 30, 'start': 20, 'tag_with_polarity': 'AMBIENCE-POSITIVE', 'tag_with_polarity_and_type': 'AMBIENCE-POSITIVE-explicit', 'text': 'Atmosphäre', 'type': 'label-explicit', 'label': 'AMBIENCE', 'polarity': 'POSITIVE'}, {'end': 71, 'start': 64, 'tag_with_polarity': 'SERVICE-POSITIVE', 'tag_with_polarity_and_type': 'SERVICE-POSITIVE-explicit', 'text': 'Service', 'type': 'label-explicit', 'label': 'SERVICE', 'polarity': 'POSITIVE'}])


In [221]:
# Get Examples
end = False
k = 0
i = 0
while end == False:
    if "NEUTRAL" in [tag["polarity"] for tag in dataset[k]["tags"]]:
        print(dataset[k]["id"], "\n", dataset[k]
              ["text"], "\n", dataset[k]["tags"], "\n")
        i += 1
    if i == 100:
        end = True
    k += 1

7354bf60-4628-4464-8c4b-46724ca9601c 
 Qualität des Essens Durchschnitt. 
 [{'end': 19, 'start': 13, 'tag_with_polarity': 'FOOD-NEUTRAL', 'tag_with_polarity_and_type': 'FOOD-NEUTRAL-explicit', 'text': 'Essens', 'type': 'label-explicit', 'label': 'FOOD', 'polarity': 'NEUTRAL'}] 

eaa23be2-0512-41d5-91c1-5c576b14cb39 
 Die Qualität der Speisen war ordentlich mit ein paar kleinen Abstrichen. 
 [{'end': 24, 'start': 17, 'tag_with_polarity': 'FOOD-NEUTRAL', 'tag_with_polarity_and_type': 'FOOD-NEUTRAL-explicit', 'text': 'Speisen', 'type': 'label-explicit', 'label': 'FOOD', 'polarity': 'NEUTRAL'}] 

b2eff795-49e7-49f3-96b2-dc5099855143 
 Gestresste überhebliche Kellner und das Essen nur noch Mittelmass. 
 [{'end': 31, 'start': 24, 'tag_with_polarity': 'SERVICE-NEGATIVE', 'tag_with_polarity_and_type': 'SERVICE-NEGATIVE-explicit', 'text': 'Kellner', 'type': 'label-explicit', 'label': 'SERVICE', 'polarity': 'NEGATIVE'}, {'end': 45, 'start': 40, 'tag_with_polarity': 'FOOD-NEUTRAL', 'tag_with_pola

IndexError: list index out of range