# Notebook: Create Confirmation Subsets

## Packages

In [34]:
import pandas as pd
import json

## Settings

In [35]:
N_PARTS = 2

In [36]:
INPUT_PATH = "annotations_nils_total_dataset.csv"
OUTPUT_PATH_PREFIX = "annotation_confirmation/annotation_confirmation_real"

## Code

In [37]:
# Zuvor muss ich meine Annotationen hochladen roh, aus label studio als csv
df = pd.read_csv(INPUT_PATH)
df["second_annotator_comment"] = None

In [38]:
df["two_or_more_sentences"].unique()

array([nan, 'Two or more sentences'], dtype=object)

In [39]:
def merge_labels_implicit_to_explicit(dataframe):
    # Benutzerdefinierte Funktion, um label-implicit zu label-explicit hinzuzufügen und 'type' hinzufügen
    def merge_labels(row):
        labels_explicit = row["label-explicit"]
        labels_implicit = row["label-implicit"]

        if isinstance(labels_explicit, str) == False:
            labels_explicit = []
        else:
            labels_explicit = json.loads(labels_explicit)

        if isinstance(labels_implicit, str) == False:
            labels_implicit = []
        else:
            labels_implicit = json.loads(labels_implicit)

        labels_total = ""
        for annotation in labels_implicit:
            tuple = (annotation["labels"][0][:-len("-no-phrase")].rsplit('-', 1)[0],
                     annotation["labels"][0][:-len("-no-phrase")].rsplit('-', 1)[-1], 'NULL')
            labels_total += str(tuple) + "\n"

        for annotation in labels_explicit:
            tuple = (annotation["labels"][0].rsplit('-', 1)[0],
                     annotation["labels"][0].rsplit('-', 1)[-1], annotation["text"])
            labels_total += str(tuple) + "\n"

        row["annotation"] = labels_total

        # Sonderfälle
        # Update: Text is a bit misleading - my fault... However, I wanted to mark examples with mentioned aspects towards which no sentiment
        # was expressed by the author.
        if row["aspect_available_without_judgement"] == 'An aspect is addressed in the text without an explicit mention by the author':
            row["aspect_available_without_judgement"] = "✅"
        else:
            row["aspect_available_without_judgement"] = ""

        if row["two_or_more_sentences"] == "Two or more sentences":
            row["annotation"] = "⚠️ Mehr als 1 Satz im Text identifiziert von Annoator A"

        row["second_annotator_comment"] = ""
        return row

    dataframe = dataframe.apply(merge_labels, axis=1)

    return dataframe

In [40]:
total_rows = len(df)
rows_per_part = total_rows // N_PARTS

for idx in range(N_PARTS):
    start_idx = idx * rows_per_part
    end_idx = (idx + 1) * rows_per_part if idx < N_PARTS - 1 else total_rows
    part_df = df.iloc[start_idx:end_idx]

    filename = f"{OUTPUT_PATH_PREFIX}_{idx}.csv"

    part_df = merge_labels_implicit_to_explicit(part_df)
    part_df = part_df[["id", "second_annotator_comment", "annotation", "aspect_available_without_judgement", "text"]]

    part_df.to_csv(filename, index=False)

    print(f"Part {idx} saved to '{filename}'.")


Part 0 saved to 'annotation_confirmation/annotation_confirmation_real_0.csv'.
Part 1 saved to 'annotation_confirmation/annotation_confirmation_real_1.csv'.


In [41]:
part_df[:20]

Unnamed: 0,id,second_annotator_comment,annotation,aspect_available_without_judgement,text
2000,acb9c5da-b48e-452c-aef5-1c01601aa251,,⚠️ Mehr als 1 Satz im Text identifiziert von A...,,"Man muss sich nicht wundern, wenn sich die pot..."
2001,87903ffa-a10a-44cf-968f-a2c38f82706d,,"('FOOD', 'NEGATIVE', 'Garnelen')\n('FOOD', 'NE...",,Sowohl Hähnchenfleisch als auch Garnelen waren...
2002,99e6780c-eb6d-45ec-afe3-7c20d8500ac6,,,,Waren gestern mit 10 Personen dort.
2003,2f886ca9-59c8-4789-a8a7-484fcca42b5f,,"('SERVICE', 'POSITIVE', 'Kellner*innen')\n('AM...",,Die gute Laune der Kellner*innen und die Musik...
2004,77b108e0-690c-4764-bf41-599bf1db9f8d,,"('SERVICE', 'POSITIVE', 'Service')\n",,Netter und freundlicher Service.
2005,93ece0ca-e705-4039-a10c-de15f39210b4,,"('SERVICE', 'NEGATIVE', 'Kellner')\n",,Wir wurden erst bestimmt eine Stunde komplett ...
2006,e80e7023-2d6d-4447-9cdf-47ddf6b6d7f3,,,✅,"Als wir dem Kellner dies mitteilten, bot diese..."
2007,727344a6-6c89-4b90-a399-88566e39b560,,⚠️ Mehr als 1 Satz im Text identifiziert von A...,,"Die Lage, bzw das Restaurant selber entspreche..."
2008,ec460bbc-5255-4f9d-89b9-397f35f75aad,,"('FOOD', 'NEGATIVE', 'Schnitzel')\n('FOOD', 'N...",,"Nichts tolles, nichts übetkandiedeltes, sonder..."
2009,b711b3ff-4451-419c-8f1d-7a75bfe049af,,⚠️ Mehr als 1 Satz im Text identifiziert von A...,,Für die Vegetarier gab es immer passend zum ak...
