# Notebook: Create Confirmation Subsets

## Packages

In [288]:
import pandas as pd
import json

## Settings

In [289]:
N_PARTS = 3

## Code

In [290]:
df = pd.read_csv("annotations_nils.csv")
df["annotation_confirmation"] = None

In [291]:
def merge_labels_implicit_to_explicit(dataframe):
    # Benutzerdefinierte Funktion, um label-implicit zu label-explicit hinzuzufügen und 'type' hinzufügen
    def merge_labels(row):
        labels_explicit = row["label-explicit"]
        labels_implicit = row["label-implicit"]

        if isinstance(labels_explicit, str) == False:
            labels_explicit = []
        else:
            labels_explicit = json.loads(labels_explicit)

        if isinstance(labels_implicit, str) == False:
            labels_implicit = []
        else:
            labels_implicit = json.loads(labels_implicit)

        labels_total = []
        for annotation in labels_implicit:
            tuple = [annotation["text"], annotation["labels"], "implicit"]
            labels_total.append(tuple)

        for annotation in labels_explicit:
            tuple = [annotation["text"], annotation["labels"], "explicit", "start: "+str(annotation["start"]), "end: "+str(annotation["end"])]
            labels_total.append(tuple)


        row["annotation"] = labels_total
        row["annotation_confirmation"] = ""
        return row

    dataframe = dataframe.apply(merge_labels, axis=1)

    return dataframe


In [292]:
total_rows = len(df)
rows_per_part = total_rows // N_PARTS

for idx in range(N_PARTS):
    start_idx = idx * rows_per_part
    end_idx = (idx + 1) * rows_per_part if idx < N_PARTS - 1 else total_rows
    part_df = df.iloc[start_idx:end_idx]

    filename = f"annotation_confirmation/annotation_confirmation_{idx}.csv"

    part_df = merge_labels_implicit_to_explicit(part_df)
    part_df = part_df[["id", "annotation_confirmation", "annotation", "text"]]

    part_df.to_csv(filename, index=False)

    print(f"Part {idx} saved to '{filename}'.")


Part 0 saved to 'annotation_confirmation/annotation_confirmation_0.csv'.
Part 1 saved to 'annotation_confirmation/annotation_confirmation_1.csv'.
Part 2 saved to 'annotation_confirmation/annotation_confirmation_2.csv'.


In [293]:
df[0:10][["label-explicit"]]

Unnamed: 0,label-explicit
0,"[{""end"":42,""text"":""Service"",""start"":35,""labels..."
1,"[{""end"":86,""text"":""Kellner"",""start"":79,""labels..."
2,"[{""end"":25,""text"":""Personal"",""start"":17,""label..."
3,
4,"[{""end"":31,""text"":""Kuchen"",""start"":25,""labels""..."
5,
6,
7,
8,
9,


In [294]:
# Settings > Annotations > Show the Data Manager to annotators