In [62]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Colab Notebooks/Feger/suger-pie-honey-pot

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks/Feger/suger-pie-honey-pot


# Requirements

In [63]:
import os
import numpy as np
import pandas as pd
from nltk.metrics.agreement import AnnotationTask

# Evaluation

In [64]:
dfs = pd.DataFrame()
path = "./annotations/final"
# Collect all annotations
for folder in os.listdir(path):
    if folder.startswith("annotator_"):
        files = os.listdir(f"{path}/{folder}")
        assert len(files) == 3
        assert "prague.xlsx" in files and "nl2bash.xlsx" in files and "halle.xlsx" in files
        for annotation in files:
            df = pd.read_excel(f"{path}/{folder}/{annotation}")
            df["annotator"] = folder
            df["dataset"] = annotation.replace('.xlsx', '')
            dfs = pd.concat([dfs, df])
dfs = dfs.iloc[:, :-3]
annotators = dfs["annotator"].unique()
# Check if all annotators preserved the sample size
for annotator in annotators:
    assert dfs[dfs["annotator"] == annotator].shape[0] == 1406
# Check if all annotators annotated the same samples
for i in range(0, len(annotators)-1):
    for j in range(i+1, len(annotators)):
        annotator_a = annotators[i]
        annotator_b = annotators[j]
        assert sorted(dfs[dfs["annotator"] == annotator_a]["request-id"]) == sorted(dfs[dfs["annotator"] == annotator_b]["request-id"])
# Set badly formatted answers to default value and print value counts
dfs['convincing'].fillna("ANSWER", inplace=True)
dfs['convincing'] = dfs['convincing'].replace({"ANSWER": 0})
convincing_counts_after = dfs["convincing"].value_counts()
percentages = (convincing_counts_after / dfs.shape[0] * 100).round(2)
convincing_counts_after = pd.DataFrame({"Counts": convincing_counts_after, "Percentages": percentages})
# Create a DataFrame
print(convincing_counts_after)

            Counts  Percentages
convincing                     
1.0           3676        52.29
0.0           3354        47.71


In [65]:
def krippendorff(df, dataset=None, annotators = None):
    if dataset != None:
        df = df[df["dataset"] == dataset]

    if annotators != None:
        df = df[df["annotator"].isin(annotators)]

    annotations = []
    for _, row in df.iterrows():
        annotations += [(row["annotator"], row["request-id"], int(row["convincing"]))]

    task = AnnotationTask(data=annotations)
    alpha = task.alpha()
    return alpha

In [66]:
# Check if all annotators annotated the same samples
for dataset in [None, "halle", "nl2bash", "prague"]:
    print(f"{dataset if dataset else 'All'}", round(100*krippendorff(dfs, dataset = dataset), 2), "K-Alpha")
    for i in range(0, len(annotators)-1):
        for j in range(i+1, len(annotators)):
            annotator_a = annotators[i]
            annotator_b = annotators[j]
            print(f"{annotator_a} + {annotator_b}", round(100*krippendorff(dfs, dataset = dataset, annotators = [annotator_a, annotator_b]), 2), "K-Alpha")
    print()

All 57.38 K-Alpha
annotator_stefan + annotator_benedikt 40.85 K-Alpha
annotator_stefan + annotator_simon 60.86 K-Alpha
annotator_stefan + annotator_1 50.41 K-Alpha
annotator_stefan + annotator_2 63.43 K-Alpha
annotator_benedikt + annotator_simon 62.36 K-Alpha
annotator_benedikt + annotator_1 47.0 K-Alpha
annotator_benedikt + annotator_2 46.9 K-Alpha
annotator_simon + annotator_1 72.05 K-Alpha
annotator_simon + annotator_2 70.85 K-Alpha
annotator_1 + annotator_2 53.91 K-Alpha

halle 63.46 K-Alpha
annotator_stefan + annotator_benedikt 45.12 K-Alpha
annotator_stefan + annotator_simon 66.3 K-Alpha
annotator_stefan + annotator_1 59.29 K-Alpha
annotator_stefan + annotator_2 65.52 K-Alpha
annotator_benedikt + annotator_simon 66.21 K-Alpha
annotator_benedikt + annotator_1 53.43 K-Alpha
annotator_benedikt + annotator_2 53.0 K-Alpha
annotator_simon + annotator_1 80.08 K-Alpha
annotator_simon + annotator_2 78.53 K-Alpha
annotator_1 + annotator_2 63.08 K-Alpha

nl2bash 43.9 K-Alpha
annotator_stefa

# Majority Votes

In [67]:
# Convert convincing column to numeric type
dfs['convincing'] = pd.to_numeric(dfs['convincing'])

# Group by request-id and calculate mode of convincing column
majority_vote = dfs.groupby(['request-id'])['convincing'].apply(lambda x: x.mode()[0]).reset_index()

In [69]:
majority_vote.to_csv('./annotations/final/majority_votes.csv')