# Notebook: Create Confirmation Subsets

## Packages

In [1]:
import pandas as pd
import json

## Settings

In [2]:
N_PARTS = 2

In [3]:
INPUT_PATH = "synth_annotations_nils.csv"
OUTPUT_PATH_PREFIX = "annotation_confirmation/annotation_confirmation_synth"

## Code

In [4]:
# Zuvor muss ich meine Annotationen hochladen roh, aus label studio als csv
df = pd.read_csv(INPUT_PATH)
df["annotation_confirmation"] = None

In [5]:
def merge_labels_implicit_to_explicit(dataframe):
    # Benutzerdefinierte Funktion, um label-implicit zu label-explicit hinzuzufügen und 'type' hinzufügen
    def merge_labels(row):
        labels_explicit = row["label-explicit"]
        labels_implicit = row["label-implicit"]

        if isinstance(labels_explicit, str) == False:
            labels_explicit = []
        else:
            labels_explicit = json.loads(labels_explicit)

        if isinstance(labels_implicit, str) == False:
            labels_implicit = []
        else:
            labels_implicit = json.loads(labels_implicit)

        labels_total = []
        for annotation in labels_implicit:
            tuple = ('"'+annotation["labels"][0].rsplit('-', 1)[0]+'"',
                     '"'+annotation["labels"][0].rsplit('-', 1)[1]+'"', '"NULL"')
            labels_total.append(tuple)

        for annotation in labels_explicit:
            tuple = ('"'+annotation["labels"][0].rsplit('-', 1)[0]+'"', '"' +
                     annotation["labels"][0].rsplit('-', 1)[1]+'"', '"'+annotation["text"]+'"')
            labels_total.append(tuple)

        row["annotation"] = labels_total
        row["annotation_confirmation"] = ""
        return row

    dataframe = dataframe.apply(merge_labels, axis=1)

    return dataframe

In [6]:
total_rows = len(df)
rows_per_part = total_rows // N_PARTS

for idx in range(N_PARTS):
    start_idx = idx * rows_per_part
    end_idx = (idx + 1) * rows_per_part if idx < N_PARTS - 1 else total_rows
    part_df = df.iloc[start_idx:end_idx]

    filename = f"{OUTPUT_PATH_PREFIX}_{idx}.csv"

    part_df = merge_labels_implicit_to_explicit(part_df)
    part_df = part_df[["id", "annotation_confirmation", "annotation", "text"]]

    part_df.to_csv(filename, index=False)

    print(f"Part {idx} saved to '{filename}'.")


Part 0 saved to 'annotation_confirmation/annotation_confirmation_synth_0.csv'.
Part 1 saved to 'annotation_confirmation/annotation_confirmation_synth_1.csv'.


In [7]:
part_df

Unnamed: 0,id,annotation_confirmation,annotation,text
12,139eabec-943a-4440-a92a-848a32d4a42c,,"[(""GENERAL-IMPRESSION-NEGATIVE-no"", ""phrase"", ...",: - es war einfach schlecht.
13,5343d859-dd5f-4389-a1d8-d204eaa5c16a,,"[(""PRICE"", ""NEGATIVE"", ""Rechnung"")]","Die Rechnung war nicht unbedingt billig, aber ..."
14,aff88518-8c00-435d-bc0f-38503d14d551,,"[(""SERVICE"", ""POSITIVE"", ""Mitarbeiter"")]",> Mitarbeiter freundlich und hilfsbereit.
15,cfae65f5-8d8f-40c8-94eb-a13e238d87c1,,[],"``Gib mir ein Hähnchen, ich bin hungrig`` - na..."
16,95c2cd44-a246-4a74-9c0c-f4d4a3407173,,"[(""AMBIENCE"", ""POSITIVE"", ""Ambiente""), (""FOOD""...","Die Fisch war nicht gut zubereitet, aber das A..."
17,66aeb036-23b2-4ba7-b81b-e4cab8fe4110,,"[(""AMBIENCE"", ""POSITIVE"", ""Ambiente"")]",Ein Restaurant mit einem sehr schönen Ambiente.
18,9f287461-6bfd-4b7c-a1b8-c1d629aaf798,,"[(""AMBIENCE"", ""POSITIVE"", ""Atmosphäre"")]",":""Eine sehr gemütliche Atmosphäre."
19,2ef8c80d-3e3f-472b-bb0b-ab729b98076e,,"[(""AMBIENCE"", ""POSITIVE"", ""Einrichtung""), (""PR...","Die Einrichtung war sehr schön, die Preise war..."
20,22f08343-ab92-4219-96db-08fe7e3a6a18,,"[(""SERVICE"", ""POSITIVE"", ""Bedienung"")]",Die Bedienung war sehr freundlich.
21,80f92a54-7eb3-4a56-b5a8-b73e2dd37a5a,,"[(""FOOD"", ""NEGATIVE"", ""Tortellini"")]",Die Tortellini schmeckten schlecht.
