# Notebook: Create Confirmation Subsets

## Packages

In [71]:
import pandas as pd
import json

## Settings

In [72]:
N_PARTS = 2

In [73]:
INPUT_PATH = "annotations_nils_total_dataset.csv"
OUTPUT_PATH_PREFIX = "annotation_confirmation/annotation_confirmation_total_dataset"

## Code

In [74]:
# Zuvor muss ich meine Annotationen hochladen roh, aus label studio als csv
df = pd.read_csv(INPUT_PATH)
df["annotation_confirmation"] = None

In [75]:
def merge_labels_implicit_to_explicit(dataframe):
    # Benutzerdefinierte Funktion, um label-implicit zu label-explicit hinzuzufügen und 'type' hinzufügen
    def merge_labels(row):
        labels_explicit = row["label-explicit"]
        labels_implicit = row["label-implicit"]

        if isinstance(labels_explicit, str) == False:
            labels_explicit = []
        else:
            labels_explicit = json.loads(labels_explicit)

        if isinstance(labels_implicit, str) == False:
            labels_implicit = []
        else:
            labels_implicit = json.loads(labels_implicit)

        labels_total = []
        for annotation in labels_implicit:
            tuple = ('"'+annotation["labels"][0].rsplit('-', 1)[0]+'"',
                     '"'+annotation["labels"][0].rsplit('-', 1)[1]+'"', '"NULL"')
            labels_total.append(tuple)

        for annotation in labels_explicit:
            tuple = ('"'+annotation["labels"][0].rsplit('-', 1)[0]+'"', '"' +
                     annotation["labels"][0].rsplit('-', 1)[1]+'"', '"'+annotation["text"]+'"')
            labels_total.append(tuple)

        row["annotation"] = labels_total
        row["annotation_confirmation"] = ""
        return row

    dataframe = dataframe.apply(merge_labels, axis=1)

    return dataframe

In [76]:
total_rows = len(df)
rows_per_part = total_rows // N_PARTS

for idx in range(N_PARTS):
    start_idx = idx * rows_per_part
    end_idx = (idx + 1) * rows_per_part if idx < N_PARTS - 1 else total_rows
    part_df = df.iloc[start_idx:end_idx]

    filename = f"{OUTPUT_PATH_PREFIX}_{idx}.csv"

    part_df = merge_labels_implicit_to_explicit(part_df)
    part_df = part_df[["id", "annotation_confirmation", "annotation", "text"]]

    part_df.to_csv(filename, index=False)

    print(f"Part {idx} saved to '{filename}'.")


Part 0 saved to 'annotation_confirmation/annotation_confirmation_total_dataset_0.csv'.
Part 1 saved to 'annotation_confirmation/annotation_confirmation_total_dataset_1.csv'.


In [77]:
part_df

Unnamed: 0,id,annotation_confirmation,annotation,text
2000,acb9c5da-b48e-452c-aef5-1c01601aa251,,[],"Man muss sich nicht wundern, wenn sich die pot..."
2001,87903ffa-a10a-44cf-968f-a2c38f82706d,,"[(""FOOD"", ""NEGATIVE"", ""Garnelen""), (""FOOD"", ""N...",Sowohl Hähnchenfleisch als auch Garnelen waren...
2002,99e6780c-eb6d-45ec-afe3-7c20d8500ac6,,[],Waren gestern mit 10 Personen dort.
2003,2f886ca9-59c8-4789-a8a7-484fcca42b5f,,"[(""SERVICE"", ""POSITIVE"", ""Kellner*innen""), (""A...",Die gute Laune der Kellner*innen und die Musik...
2004,77b108e0-690c-4764-bf41-599bf1db9f8d,,"[(""SERVICE"", ""POSITIVE"", ""Service"")]",Netter und freundlicher Service.
...,...,...,...,...
3996,5da31017-77d5-4f8b-b7b8-b846af0a68db,,[],und Convenience-Produkte.
3997,48f9c295-f1a5-488f-8b98-e950bc0dd26b,,"[(""FOOD"", ""NEUTRAL"", ""Essen"")]","Essen war ok, aber sowas verdirbt einem nur di..."
3998,395f3ce3-8953-49ab-9789-73296025a148,,"[(""SERVICE"", ""POSITIVE"", ""Service"")]","Der Service war schnell, freundlich und Aufmer..."
3999,08912473-cc39-4993-9aa8-faab203e1be1,,"[(""AMBIENCE"", ""POSITIVE"", ""Ambiente"")]",Das Ambiente ist natürlich besonders.
