In [3]:
import pandas as pd
import numpy as np
import sqlite3
import krippendorff


conn = sqlite3.connect("../data/cdm_postedits.db")
df = pd.read_sql_query("SELECT * FROM cdm_postedits", conn)
conn.close()

all_rows = df.astype({"id":np.str})

def calculate_alphas(df):
    system_ids = list(set(df.id + df.system))
    system_ids_map = {x: ind for ind, x in enumerate(system_ids)}

    system_summary_size = len(system_ids)
    annotator_ids = list(set(df.annotator_id))
    print(system_summary_size)
    df = df.set_index("annotator_id")

    metric_alphas = []
    for metric in ["overall", "redundancy", "grammar"]:
        krippendorff_input = []
        for annotator_id in annotator_ids:
            annotator_scores = [np.nan] * system_summary_size
            annotations = df.loc[[annotator_id]]
            for _, annotation in annotations.iterrows():
                annotator_scores[system_ids_map[annotation["id"] + annotation["system"]]] = annotation[metric]
            krippendorff_input.append(annotator_scores)
        metric_alphas.append(krippendorff.alpha(reliability_data=krippendorff_input))

    print("system overall krippendorff alpha %0.3f" % metric_alphas[0])
    print("system redundancy krippendorff alpha %0.3f" % metric_alphas[1])
    print("system grammar krippendorff alpha %0.3f" % metric_alphas[2])
    
print("Alphas for entire dataset:")
calculate_alphas(all_rows)

Alphas for entire dataset:
1997
system overall krippendorff alpha 0.331
system redundancy krippendorff alpha 0.524
system grammar krippendorff alpha 0.237
