In [4]:
# ▸ 1. Install & import -------------------------------------------------------
!pip install --quiet krippendorff pandas

import pandas as pd, numpy as np, ast, krippendorff

# ---------------------------------------------------------------------------
files      = ["annotations.csv", "annotations-2.csv", "annotations-3.csv"]
key_col    = "id"         # join key
label_col  = "label"      # label column
# ---------------------------------------------------------------------------

def tidy_labels(cell):
    """
    Cell looks like "['None', 'Minimal']" → 'Minimal|None'.
    Adjust if your label column is already a single string/number.
    """
    lst = ast.literal_eval(cell) if isinstance(cell, str) else [cell]
    return "|".join(sorted(set(map(str, lst))))

dfs = []
for idx, path in enumerate(files, start=1):
    df = pd.read_csv(path, dtype={key_col: str})
    df[label_col] = df[label_col].apply(tidy_labels)
    dfs.append(df[[key_col, label_col]].rename(columns={label_col: f"a{idx}"}))

# Inner join ➜ keep only items every annotator labeled
merged = dfs[0]
for df in dfs[1:]:
    merged = merged.merge(df, on=key_col, how="inner")
if merged.empty:
    raise ValueError("No items have labels from all annotators.")

# Matrix: rows = annotators, cols = items, dtype=str
data = merged[[f"a{i}" for i in range(1, len(dfs)+1)]].to_numpy(dtype=str).T

# Explicit value domain (unique set of labels)
value_domain = pd.unique(data.flatten())

alpha = krippendorff.alpha(reliability_data=data,
                           value_domain=value_domain,   # ← key change
                           level_of_measurement="nominal")  # default

print(f"Krippendorff's alpha: {alpha:.3f}  "
      f"({data.shape[0]} annotators, {data.shape[1]} items)")


Krippendorff's alpha: 0.280  (3 annotators, 50 items)


### PAIRWISE CONFUSION MATRICES

In [6]:
# ▸ 0.  INSTALL / IMPORT (run once per kernel) --------------------------------
!pip install --quiet pandas datasets krippendorff
import pandas as pd, numpy as np, ast, itertools, textwrap
from datasets import load_dataset

# ---------------------------------------------------------------------------
#  (Re-paste the helper functions from the previous cell if they’re not defined)
#   – canonical, pairwise_confusion, display_matrices –
# ---------------------------------------------------------------------------

# ▸ 1.  Local CSVs – SAME as before ------------------------------------------
csv_files = ["annotations.csv", "annotations-2.csv", "annotations-3.csv"]
dfs = []
for idx, path in enumerate(csv_files, start=1):
    df = pd.read_csv(path, dtype={"id": str})
    df["label"] = df["label"].apply(canonical)
    dfs.append(df[["id", "label"]].assign(annotator=f"A{idx}"))

csv_df  = pd.concat(dfs, ignore_index=True)
csv_mats = pairwise_confusion(csv_df)
display_matrices("Local CSV files (≈50 items)", csv_mats)

# ▸ 2.  FineWeb-C Danish – FIXED ---------------------------------------------
fw_ds = load_dataset("data-is-better-together/fineweb-c",
                     name="dan_Latn", split="train")

rows = []
for rec in fw_ds:
    text_id   = str(rec["id"])
    for annot_id, lab in zip(rec["annotator_ids"],
                             rec["educational_value_labels"]):
        rows.append({"id": text_id,
                     "annotator": f"FW{annot_id}",   # prefix for clarity
                     "label": canonical(lab)})

fw_df   = pd.DataFrame(rows)
fw_mats = pairwise_confusion(fw_df, min_overlap=5)   # require ≥5 shared docs
display_matrices("FineWeb-C Danish (~1 k rows after exploding)", fw_mats)




LOCAL CSV FILES (≈50 ITEMS)
 Pair: A1  vs.  A2   (n = 50)


lab_b,Basic,Excellent,Good,Minimal,Minimal|None,None,Problematic
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Basic,0,0,0,1,0,1,0
Excellent,1,0,1,0,0,0,0
Good,1,0,0,0,0,1,0
Minimal,1,1,0,8,0,7,0
,2,0,0,4,1,14,2
Problematic,0,0,0,0,0,0,4


 Pair: A1  vs.  A3   (n = 50)


lab_b,Basic,Good,Minimal,None,Problematic
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Basic,0,0,0,2,0
Excellent,1,1,0,0,0
Good,0,0,1,1,0
Minimal,0,0,4,13,0
,1,0,1,21,0
Problematic,0,0,0,1,3


 Pair: A2  vs.  A3   (n = 50)


lab_b,Basic,Good,Minimal,None,Problematic
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Basic,0,1,1,3,0
Excellent,0,0,0,1,0
Good,1,0,0,0,0
Minimal,1,0,4,8,0
Minimal|None,0,0,0,1,0
,0,0,1,22,0
Problematic,0,0,0,3,3



FINEWEB-C DANISH (~1 K ROWS AFTER EXPLODING)
 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FW85ac8d54-89c5-4473-95c4-797366f03cd0   (n = 978)


lab_b,Basic,Good,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Basic,16,1,38,23,0
Good,2,0,8,0,0
Minimal,10,0,133,131,2
,6,0,103,355,5
❗ Problematic Content ❗,0,0,6,33,106


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FW9987848b-debb-4ed3-a97b-14eb9b3c4322   (n = 200)


lab_b,Basic,Excellent,Good,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Basic,5,1,4,5,1,3
Good,0,1,1,0,0,2
Minimal,14,1,3,18,17,6
,10,0,2,20,43,12
❗ Problematic Content ❗,0,0,0,0,0,31


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FW95e4f810-6fe7-4bbd-8be6-a11b0d069c11   (n = 33)


lab_b,Basic,Excellent,Good,Minimal,None
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Basic,2,1,1,0,0
Minimal,1,0,0,0,4
,3,0,0,4,16
❗ Problematic Content ❗,0,0,0,0,1


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FW2668aa57-cdc3-4cac-8fa8-24b0a2475acd   (n = 44)


lab_b,Basic,Good,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Basic,1,1,2,0,0
Good,1,0,0,0,0
Minimal,0,0,2,2,0
,0,0,6,21,0
❗ Problematic Content ❗,0,0,1,3,4


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FWb98b0144-391d-4e70-bae0-743ce94e6314   (n = 80)


lab_b,Basic,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Basic,0,0,4,0
Minimal,1,11,14,1
,1,1,28,4
❗ Problematic Content ❗,0,0,1,14


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FW4e0a264e-6445-495f-ae54-8e0755b8ebd0   (n = 72)


lab_b,Basic,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Basic,1,1,2,1
Minimal,6,11,1,1
,6,8,16,8
❗ Problematic Content ❗,0,0,2,8


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FWe24d6253-5c08-4caa-85e5-03fb51ae84d1   (n = 31)


lab_b,Basic,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Basic,0,1,1,1
Minimal,3,3,5,0
,0,0,12,3
❗ Problematic Content ❗,0,0,0,2


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FWeb97a6ec-c5b8-4724-99b6-5c7ed857a6af   (n = 8)


lab_b,Excellent,Good,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Basic,0,0,1,0,0
Good,1,0,0,0,0
Minimal,0,1,0,0,0
,0,1,1,1,0
❗ Problematic Content ❗,0,0,0,0,2


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FWe9f72b47-2af5-4b06-90f2-7163de147a1d   (n = 22)


lab_b,Basic,Good,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Basic,0,0,0,1,0
Minimal,0,1,0,5,0
,1,0,1,10,0
❗ Problematic Content ❗,0,0,0,0,3


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FW82197ecd-6d0b-400a-834a-703da28164ae   (n = 16)


lab_b,Basic,Excellent,Good,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Basic,0,1,0,2,0,0
Minimal,1,1,2,2,0,0
,0,0,1,2,1,0
❗ Problematic Content ❗,1,0,0,0,0,2


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FW14af86f2-cf7e-4ad3-a002-1e6ece983846   (n = 7)


lab_b,Basic,Excellent,Good,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Basic,1,0,0,0,0,0
Minimal,1,1,0,0,0,0
,0,0,1,1,0,0
❗ Problematic Content ❗,0,0,0,0,1,1


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FW740270b9-61bf-4d85-a495-9e37270f7257   (n = 12)


lab_b,Minimal,None
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1
Basic,1,3
Minimal,1,2
,1,3
❗ Problematic Content ❗,0,1


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FWec88c396-7fa8-4746-a92a-a91784e57b7b   (n = 33)


lab_b,Basic,Excellent,Good,Minimal,None
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Basic,0,2,1,0,0
Minimal,5,0,0,2,0
,1,0,2,7,6
❗ Problematic Content ❗,1,0,0,1,5


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.
FW29b60230-70be-4917-aadd-994555f53aed   (n = 28)


lab_b,Basic,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Basic,0,0,1,0
Good,0,1,0,0
Minimal,2,5,6,0
,0,1,10,0
❗ Problematic Content ❗,0,0,0,2


 Pair: FWa0585a5c-b72f-4c3a-a2a3-17e8e0b4ea4f  vs.  FW288ddcda-
ec4c-4fe3-8866-8ae5058dc15b   (n = 5)


lab_b,None
lab_a,Unnamed: 1_level_1
Minimal,2
,2
❗ Problematic Content ❗,1


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.
FW9987848b-debb-4ed3-a97b-14eb9b3c4322   (n = 189)


lab_b,Basic,Excellent,Good,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Basic,2,2,1,2,0,0
Minimal,13,1,5,19,16,9
,14,0,2,20,41,18
❗ Problematic Content ❗,0,0,0,0,1,23


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.
FW95e4f810-6fe7-4bbd-8be6-a11b0d069c11   (n = 27)


lab_b,Basic,Good,Minimal,None
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Basic,1,0,0,1
Minimal,3,0,1,4
,2,1,2,11
❗ Problematic Content ❗,0,0,0,1


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.
FW2668aa57-cdc3-4cac-8fa8-24b0a2475acd   (n = 44)


lab_b,Basic,Good,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Basic,1,0,1,2,0
Minimal,1,1,5,5,0
,0,0,5,18,0
❗ Problematic Content ❗,0,0,0,1,4


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.
FWb98b0144-391d-4e70-bae0-743ce94e6314   (n = 68)


lab_b,Basic,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Basic,0,2,0,0
Minimal,1,4,12,1
,0,3,27,5
❗ Problematic Content ❗,0,0,2,11


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.
FW4e0a264e-6445-495f-ae54-8e0755b8ebd0   (n = 72)


lab_b,Basic,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Basic,1,0,1,0
Minimal,8,6,5,1
,4,14,15,11
❗ Problematic Content ❗,0,0,0,6


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.
FWe24d6253-5c08-4caa-85e5-03fb51ae84d1   (n = 31)


lab_b,Basic,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Minimal,1,4,7,0
,2,0,11,4
❗ Problematic Content ❗,0,0,0,2


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.
FWe9f72b47-2af5-4b06-90f2-7163de147a1d   (n = 22)


lab_b,Basic,Good,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Minimal,0,1,0,3,0
,1,0,1,13,1
❗ Problematic Content ❗,0,0,0,0,2


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.
FW82197ecd-6d0b-400a-834a-703da28164ae   (n = 11)


lab_b,Basic,Good,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Minimal,1,1,3,0,0
,0,1,1,1,0
❗ Problematic Content ❗,1,0,0,0,2


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.
FW14af86f2-cf7e-4ad3-a002-1e6ece983846   (n = 7)


lab_b,Basic,Excellent,Good,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Minimal,0,1,1,0,0,0
,2,0,0,1,1,0
❗ Problematic Content ❗,0,0,0,0,0,1


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.
FW740270b9-61bf-4d85-a495-9e37270f7257   (n = 10)


lab_b,Minimal,None
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1
Minimal,2,1
,1,5
❗ Problematic Content ❗,0,1


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.
FWec88c396-7fa8-4746-a92a-a91784e57b7b   (n = 33)


lab_b,Basic,Excellent,Good,Minimal,None
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Minimal,5,2,2,3,0
,2,0,1,7,5
❗ Problematic Content ❗,0,0,0,0,6


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.
FW29b60230-70be-4917-aadd-994555f53aed   (n = 25)


lab_b,Basic,Minimal,None,❗ Problematic Content ❗
lab_a,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Basic,1,1,0,0
Minimal,0,2,4,0
,1,2,12,0
❗ Problematic Content ❗,0,0,0,2


 Pair: FW85ac8d54-89c5-4473-95c4-797366f03cd0  vs.  FW288ddcda-
ec4c-4fe3-8866-8ae5058dc15b   (n = 5)


lab_b,None
lab_a,Unnamed: 1_level_1
Minimal,2
,2
❗ Problematic Content ❗,1
