In [None]:
import numpy as np
import pandas as pd
import re
import krippendorff
from collections import defaultdict
from itertools import combinations

In [None]:
df_mikkel = pd.read_json("annotations_mikkel.json")

#Fix of dataframe
def parse_label_string(label_str):
    return re.findall(r"'(.*?)'", label_str)

df_mikkel['original_label'] = df_mikkel['original_label'].apply(parse_label_string)

#Check:
df_mikkel["original_label"].iloc[0][0]

In [None]:
num_encoder = {'None': 0,
               'Minimal': 1,
               'Basic': 2,
               'Good': 3,
               'Excellent': 4}

N = len(df_mikkel)
score_data = []

for i in range(len(df_mikkel)):
    labels = [df_mikkel["our_label"].iloc[i]] + df_mikkel["original_label"].iloc[i]
    scores = [num_encoder[label] for label in labels]
    score_data.append(scores) #list of lists

In [33]:
R = 5 #number of ratings

SD_list = []

for scores in score_data[1:]:
    c = len(scores)
    SD = (1 / (c * (c - 1))) * sum(
        ((scores[j] - scores[k]) / (R - 1)) ** 2
        for j in range(c)
        for k in range(c)
        if j != k
    )
    SD_list.append(SD)

MSD = np.mean(SD_list)
MSD

np.float64(0.08975168350168351)

In [None]:
R = 5 #number of ratings

SD_list = []

for scores in score_data:
    c = len(scores)
    #The Squared Difference between unordered distinct pairs for one text:
    SD = (2 / (c * (c - 1))) * sum((
        (scores[j] - scores[k]) / (R - 1)) ** 2 
        for j in range(c) 
        for k in range(j+1,c))
    SD_list.append(SD)

MSD = np.mean(SD_list)
MSD


53.3125

In [37]:
reliability_data = np.array(score_data).T 
alpha = krippendorff.alpha(reliability_data=reliability_data, level_of_measurement='ordinal')
alpha

np.float64(0.382356809230209)

In [None]:
# Step 1: Define ordinal scale and distance
ordinal_values = sorted(set(r for ratings in score_data for r in ratings))
R = len(ordinal_values)

def ordinal_distance(a, b):
    return ((a - b) / (R - 1)) ** 2

# Distance lookup table
delta = {
    (a, b): ordinal_distance(a, b)
    for a in ordinal_values
    for b in ordinal_values
}

# ---------------------
# Step 2: Build the coincidence matrix
coincidence = defaultdict(float)
label_totals = defaultdict(float)

for ratings in score_data:
    n = len(ratings)
    if n < 2:
        continue  # skip items with fewer than 2 ratings
    weight = 1 / (n - 1)
    for i in range(n):
        label_totals[ratings[i]] += 1
        for j in range(n):
            if i != j:
                a, b = ratings[i], ratings[j]
                coincidence[(a, b)] += weight

# ---------------------
# Step 3: Observed disagreement Dₒ
Do = 0.0
for (a, b), count in coincidence.items():
    Do += count * delta[(a, b)]

# ---------------------
# Step 4: Expected disagreement Dₑ
total_labels = sum(label_totals.values())
De = 0.0

for a in ordinal_values:
    for b in ordinal_values:
        expected = (label_totals[a] * label_totals[b]) / (total_labels - 1)
        De += expected * delta[(a, b)]

# ---------------------
# Step 5: Krippendorff's alpha
alpha = 1 - (Do / De) if De != 0 else float('nan')

# ---------------------
# Output
print(f"Observed disagreement Dₒ: {Do:.4f}")
print(f"Expected disagreement Dₑ: {De:.4f}")
print(f"Krippendorff’s alpha (ordinal): {alpha:.4f}")

Observed disagreement Dₒ: 35.5417
Expected disagreement Dₑ: 57.1551
Krippendorff’s alpha (ordinal): 0.3782


In [21]:
# Collect all annotation scores for each text
all_scores = []
within_ss = 0
N = 0
C = None

for i in range(len(df_mikkel)):
    labels = [df_mikkel["our_label"].iloc[i]] + df_mikkel["original_label"].iloc[i]
    scores = [num_encoder[label] for label in labels]
    all_scores.extend(scores)
    mean_i = np.mean(scores)
    within_ss += sum((x - mean_i) ** 2 for x in scores)
    N += 1
    if C is None:
        C = len(scores)

# Grand mean
grand_mean = np.mean(all_scores)

# Total sum of squares
total_ss = sum((x - grand_mean) ** 2 for x in all_scores)

# Degrees of freedom
df_within = N * (C - 1)
df_total = N * C - 1

# Mean squares
ms_within = within_ss / df_within
ms_total = total_ss / df_total

# Krippendorff's alpha
alpha = 1 - (ms_within / ms_total)
alpha

np.float64(0.3781537938708267)

In [5]:
SD_list = []

for scores in score_data:
    #Calculating Squared difference:
    c = len(scores)
    SD = (1 / (c * (c - 1))) * sum((scores[j] - scores[k]) ** 2 for j in range(c) for k in range(c))

    SD_list.append(SD)

MSD = np.mean(SD_list)
MSD

np.float64(1.4216666666666669)