In [1]:
import re
from typing import List, Dict, Optional


HEADER_RE = re.compile(
    r"^\s*\[(?P<span>\d+-\d+)\]\s*(?P<label>[^,]+),\s*File:\s*(?P<filename>[^,]+)",
    flags=re.IGNORECASE
)

def extract_ann_text(path: str, txt_path, encoding: str = "utf-8"):
    """
    Read `path` and return a list of dicts, each dict:
      {
        "span": "836-1053",
        "filename": "paper_16.txt",
        "label": "Coherence",
        "body": "the text after the header up to the next header (multi-line string)"
      }
    """
    with open(path, "r", encoding=encoding) as fh:
        lines = fh.readlines()

    # Find indices of header lines and capture groups
    headers = []  # list of tuples (index, matchobj)
    for i, line in enumerate(lines):
        m = HEADER_RE.match(line)
        if m:
            headers.append((i, m))

    results: List[Dict[str, Optional[str]]] = []
    if not headers:
        return results

    # For each found header, slice the following lines until next header (or EOF)
    for idx, (line_idx, match) in enumerate(headers):
        next_idx = headers[idx + 1][0] if (idx + 1) < len(headers) else len(lines)

        # Extract body: lines after the header line up to next header index
        body_lines = [ln.rstrip("\n") for ln in lines[line_idx + 1 : next_idx]]
        body = "\n".join(body_lines).strip() or None

        span = match.group("span").strip() if match.group("span") else None
        start_span = int(span.split("-")[0])
        end_span = int(span.split("-")[1])
        filename = match.group("filename").strip() if match.group("filename") else None
        label = match.group("label").strip() if match.group("label") else None

        with open(f"{txt_path}/{filename}", 'r') as f:
            full_text = f.read()

        results.append({
            "filename": filename,
            "start": start_span,
            "end": end_span,
            "label": label,
            "text": body, 
            "full_text": full_text,
        })
    
    return results

In [None]:
ann1_file = "../../annotated_data/annotations/ann1/ann_edd_1.txt"
ann2_file = "../../annotated_data/annotations/ann2/ann_iman_2.txt"
ann3_file = "../../annotated_data/annotations/ann3/ann_Ekaterina_2.txt"
ann4_file = "../../annotated_data/annotations/ann4/kaushal_annotations.txt"

txt_path = "../../to_annotate"
ann1 = extract_ann_text(ann1_file, txt_path)
ann2 = extract_ann_text(ann2_file, txt_path)
ann3 = extract_ann_text(ann3_file, txt_path)
ann4 = extract_ann_text(ann4_file, txt_path)

In [None]:
agreed_ten = "../../annotated_data/first_ten_agreed.txt"
ann_agrd_ten = extract_ann_text(agreed_ten, txt_path)

In [None]:
import json 

path = "../../annotated_data/annotations"
for i in range(1, 11):
    ann1_paper = [d for d in ann1 if d.get("filename") == f"paper_{i}.txt"]
    ann2_paper = [d for d in ann2 if d.get("filename") == f"paper_{i}.txt"]
    ann3_paper = [d for d in ann3 if d.get("filename") == f"paper_{i}.txt"]
    ann4_paper = [d for d in ann4 if d.get("filename") == f"paper_{i}.txt"]
    agreed_ten = [d for d in ann_agrd_ten if d.get("filename") == f"paper_{i}.txt"]

    with open(f"{path}/ann1/ann1_paper_{i}.json", 'w') as f:
        json.dump(ann1_paper, f, indent=4)

    with open(f"{path}  /ann2/ann2_paper_{i}.json", 'w') as f:
        json.dump(ann2_paper, f, indent=4)

    with open(f"{path}/ann3/ann3_paper_{i}.json", 'w') as f:
        json.dump(ann3_paper, f, indent=4)

    with open(f"{path}/ann4/ann4_paper_{i}.json", 'w') as f:
        json.dump(ann4_paper, f, indent=4)

    with open(f"{path}/first_ten_agreed/first_ten_agreed_paper_{i}.json", 'w') as f:
        json.dump(agreed_ten, f, indent=4)

In [4]:
from __future__ import annotations
from collections import defaultdict
from typing import Dict, List, Tuple, Any

Span = Tuple[int, int]  # [start, end)


def intersect(a, b):
    s = max(a[0], b[0])
    e = min(a[1], b[1])
    return (s, e) if e > s else None


def group_by_label(items, required_label):
    grouped = defaultdict(list)
    for d in items:
        if d["label"] == required_label:
            fn = d["filename"]
            lab = d["label"]
            grouped[(fn, lab)].append(d)
    return grouped


def dedup_span_dicts(lst):
    seen = set()
    out = []
    for d in lst:
        key = (d.get("filename"), int(d.get("start")), int(d.get("end")), d.get("label"))
        if key in seen:
            continue
        seen.add(key)
        out.append(d)
    return out

def consensus_overlaps_2(ann1, ann2, category,):
    g1 = group_by_label(ann1, category)
    g2 = group_by_label(ann2, category)
    
    keys = set(g1.keys()) | set(g2.keys())
    
    raw = []

    for (fn, lab) in sorted(keys):
        s1 = g1.get((fn, lab), [])
        s2 = g2.get((fn, lab), [])
        if not s1 or not s2:
            continue

        for a in s1:
            if category == 'Format':
                min_overlap_chars_a = int(len(a['text']) * 0.90)
            elif category == 'Unsupported claim':
                min_overlap_chars_a = int(len(a['text']) * 0.8)
            else:
                min_overlap_chars_a = int(len(a['text']) * 0.7)
            
            A = (int(a["start"]), int(a["end"]))
            for b in s2:
                if category == 'Format':
                    min_overlap_chars_b = int(len(a['text']) * 0.90)
                elif category == 'Unsupported claim':
                    min_overlap_chars_b = int(len(a['text']) * 0.8)
                else:
                    min_overlap_chars_b = int(len(a['text']) * 0.7)

                # smallest min overlapping chars from A or B will be used to determine how many characters must be present in each others annotations 
                if min_overlap_chars_a < min_overlap_chars_b:
                    min_overlap_chars = min_overlap_chars_a
                else:
                    min_overlap_chars = min_overlap_chars_b

                B = (int(b["start"]), int(b["end"]))
                ab = intersect(A, B) # calculate interesect between annotator A and annotator B 

                if not ab:
                    continue
                if (ab[1] - ab[0]) < min_overlap_chars: # only consider spans that satisfy minimum overlapping chars
                    continue
                
                raw.append(
                    {
                        "filename": fn,
                        "label": a["label"],
                        "text": a['text'],
                        "start": ab[0],
                        "end": ab[1],
                        "spans_a": [a],
                        "spans_b": [b],
                    }
                )

    # Merge duplicates: multiple pairs can yield the same intersection.
    merged = {}
    for r in raw:
        k = (r["filename"], r["label"], r['text'], r["start"], r["end"])
        if k not in merged:
            merged[k] = {
                "filename": r["filename"],
                "label": r["label"],
                "text": r['text'],
                "start": r["start"],
                "end": r["end"],
                "spans_a": [],  # spans from annotator A that overlap with span from B
                "spans_b": [],  # spans from annotator B that overlap with span from A
            }
        merged[k]["spans_a"].extend(r["spans_a"])
        merged[k]["spans_b"].extend(r["spans_b"])

    final = []
    for v in merged.values():
        v["spans_a"] = dedup_span_dicts(v["spans_a"])
        v["spans_b"] = dedup_span_dicts(v["spans_b"])
        final.append(v)

    final.sort(key=lambda x: (x["filename"], x["label"], x['text'], x["start"], x["end"]))
    return final


In [None]:
def consensus_overlaps_3(
    ann1,
    ann2,
    ann3,
    *,
    category,
) -> List[Dict[str, Any]]:
    """
    Returns consensus overlaps where all three annotators overlap.

    Output dict fields:
      filename, label, start, end,
      spans_a / spans_b / spans_c  (the contributing original spans)
    """
    g1 = group_by_label(ann1, category)
    g2 = group_by_label(ann2, category)
    g3 = group_by_label(ann3, category)

    keys = set(g1.keys()) | set(g2.keys()) | set(g3.keys())
    out = []

    for (fn, lab) in sorted(keys):
        s1 = g1.get((fn, lab), [])
        s2 = g2.get((fn, lab), [])
        s3 = g3.get((fn, lab), [])

        # If any annotator has no spans for this file/label, there can't be 3-way overlap
        if not s1 or not s2 or not s3:
            continue

        # Brute force is usually fine for typical annotation sizes.
        # If you have huge numbers of spans per file, we can sweep-line optimize.
        for a in s1:
            if category == 'Format':
                min_overlap_chars = int(len(a['text']) * 0.90)
            elif category == 'Unsupported claim':
                min_overlap_chars = int(len(a['text']) * 0.8)
            else:
                min_overlap_chars = int(len(a['text']) * 0.7)

            A = (int(a["start"]), int(a["end"]))
            for b in s2:
                B = (int(b["start"]), int(b["end"]))
                ab = intersect(A, B)
                if not ab or (ab[1] - ab[0]) < min_overlap_chars:
                    continue
                for c in s3:
                    C = (int(c["start"]), int(c["end"]))
                    abc = intersect(ab, C)
                    if not abc or (abc[1] - abc[0]) < min_overlap_chars:
                        continue

                    out.append(
                        {
                            "filename": fn,
                            "label": a["label"],
                            "start": abc[0],
                            "end": abc[1],
                            "spans_a": [a],
                            "spans_b": [b],
                            "spans_c": [c],
                        }
                    )

    # Merge duplicates: multiple triples can yield the same intersection range.
    # We'll merge by (filename,label,start,end) and collect contributing spans.
    merged: Dict[Tuple[str, str, int, int], Dict[str, Any]] = {}
    for r in out:
        k = (r["filename"], r["label"], r["start"], r["end"])
        if k not in merged:
            merged[k] = {
                "filename": r["filename"],
                "label": r["label"],
                "start": r["start"],
                "end": r["end"],
                "spans_a": [],
                "spans_b": [],
                "spans_c": [],
            }
        merged[k]["spans_a"].extend(r["spans_a"])
        merged[k]["spans_b"].extend(r["spans_b"])
        merged[k]["spans_c"].extend(r["spans_c"])

    # Optional: de-duplicate identical contributing spans
    def dedup_span_dicts(lst: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        seen = set()
        out2 = []
        for d in lst:
            key = (d.get("filename"), int(d.get("start")), int(d.get("end")), d.get("label"))
            if key in seen:
                continue
            seen.add(key)
            out2.append(d)
        return out2

    final = []
    for k, v in merged.items():
        v["spans_a"] = dedup_span_dicts(v["spans_a"])
        v["spans_b"] = dedup_span_dicts(v["spans_b"])
        v["spans_c"] = dedup_span_dicts(v["spans_c"])
        final.append(v)

    final.sort(key=lambda x: (x["filename"], x["label"], x["start"], x["end"]))
    return final


#### Consensus among:
- annotators 1 & 2 
- annotators 1 & 3
- annotators 2 & 3
- annotator 4 with final agreed first ten samples

In [None]:
import json
from tqdm import tqdm 

path = "../../annotated_data/annotations"
categories = ['Unsupported claim', 'Format', 'Coherence', 'Lacks synthesis']

total_consensus_12 = []
total_consensus_13 = []
total_consensus_23 = []
total_consensus_ann4 = []

for i in tqdm(range(1,11), desc="Processing annotation consensus"):
    ann1_paper = json.load(open(f"{path}/ann1/ann1_paper_{i}.json"))
    ann2_paper = json.load(open(f"{path}/ann2/ann2_paper_{i}.json"))
    ann3_paper = json.load(open(f"{path}/ann3/ann3_paper_{i}.json"))
    ann4_paper = json.load(open(f"{path}/ann4/ann4_paper_{i}.json"))
    first_ten_paper = json.load(open(f"{path}/first_ten_agreed/first_ten_agreed_paper_{i}.json"))
    
    for category in categories:
        consensus_12 = consensus_overlaps_2(
            ann1_paper, ann2_paper,
            category=category
        )
        consensus_13 = consensus_overlaps_2(
            ann1_paper, ann3_paper,
            category=category
        )
        consensus_23 = consensus_overlaps_2(
            ann2_paper, ann3_paper,
            category=category
        )
        ann4_comparison = consensus_overlaps_2(
            first_ten_paper, ann4_paper,
            category=category
        )

        total_consensus_12.extend(consensus_12)
        total_consensus_13.extend(consensus_13)
        total_consensus_23.extend(consensus_23)
        # consensus between 4th annotator and final agreements of first ten papers
        total_consensus_ann4.extend(ann4_comparison)
 
folder = f"{path}/agreements"

filename = "consensus_agreement_12.json"
with open(f"{folder}/{filename}", 'w') as f:
    json.dump(total_consensus_12, f, indent=4)

filename = f"consensus_agreement_13.json"
with open(f"{folder}/{filename}", 'w') as f:
    json.dump(total_consensus_13, f, indent=4)

filename = f"consensus_agreement_23.json"
with open(f"{folder}/{filename}", 'w') as f:
    json.dump(total_consensus_23, f, indent=4)

filename = f"consensus_ann4_first_10.json"
with open(f"{folder}/{filename}", 'w') as f:
    json.dump(total_consensus_ann4, f, indent=4)

print(f"Consensus agreements saved in folder:{folder}")

Processing annotation consensus: 100%|██████████| 10/10 [00:00<00:00, 42.35it/s]

Consensus agreements saved in folder:../annotated_data/annotations/agreements





### Consensus among all 3 annotators

In [None]:
import json
from tqdm import tqdm 

path = "../../annotated_data/annotations"
categories = ['Unsupported claim', 'Format', 'Coherence', 'Lacks synthesis']

total_consensus = []

for i in tqdm(range(1,11), desc="Processing annotation consensus among all 3"):
    ann1_paper = json.load(open(f"{path}/ann1/ann1_paper_{i}.json"))
    ann2_paper = json.load(open(f"{path}/ann2/ann2_paper_{i}.json"))
    ann3_paper = json.load(open(f"{path}/ann3/ann3_paper_{i}.json"))
    ann4_paper = json.load(open(f"{path}/ann4/ann4_paper_{i}.json"))
    
    for category in categories:
        consensus = consensus_overlaps_3(
            ann2_paper, ann3_paper, ann1_paper,
            category=category
        )

        total_consensus.append(consensus)

folder = f"{path}/agreements"

filename = "consensus_agreement_all_3.json"
with open(f"{folder}/{filename}", 'w') as f:
    json.dump(total_consensus, f, indent=4)

print(f"Consensus agreements saved in folder:{folder}")

Processing annotation consensus among all 3: 100%|██████████| 10/10 [00:00<00:00, 57.29it/s]

Consensus agreements saved in folder:../annotated_data/annotations/agreements





### Measuring Krippendorf's alpha

In [17]:
from __future__ import annotations
import re
from collections import defaultdict
from typing import Dict, List, Tuple, Any, Optional, Set

Token = Tuple[str, int, int]  # (token_text, start_char, end_char)


def regex_tokenize_with_offsets(text: str) -> List[Token]:
    return [(m.group(0), m.start(), m.end()) for m in re.finditer(r"\w+", text, flags=re.UNICODE)]


def overlaps(a_start: int, a_end: int, b_start: int, b_end: int) -> bool:
    return not (a_end <= b_start or b_end <= a_start)


def spans_for_label(spans: List[Dict[str, Any]], label: str) -> List[Tuple[int, int]]:
    out: List[Tuple[int, int]] = []
    for sp in spans:
        if sp.get("label") == label:
            s, e = int(sp["start"]), int(sp["end"])
            if e > s:
                out.append((s, e))
    return sorted(out)


def token_binary_labels_from_spans(tokens: List[Token], pos_spans: List[Tuple[int, int]]) -> List[int]:
    y = [0] * len(tokens)
    if not pos_spans:
        return y

    for i, (_tok, ts, te) in enumerate(tokens):
        for ss, se in pos_spans:
            if overlaps(ts, te, ss, se):
                y[i] = 1
                break
            if ss >= te:
                break
    return y


def char_binary_labels_from_spans(
    text: str,
    pos_spans: List[Tuple[int, int]],
    *,
    ignore_whitespace: bool = False,
) -> Tuple[List[int], List[int]]:
    """
    Returns:
      - labels: 0/1 per character index in `text` (or per non-whitespace char if ignore_whitespace=True)
      - char_indices: mapping from label index -> original character index in `text`
        (so unit_ids can be stable even when whitespace ignored)
    """
    n = len(text)
    if n == 0:
        return [], []

    # Which character positions are included as units
    if ignore_whitespace:
        char_indices = [i for i, ch in enumerate(text) if not ch.isspace()]
    else:
        char_indices = list(range(n))

    labels = [0] * len(char_indices)
    if not pos_spans:
        return labels, char_indices

    # Mark characters as positive if they fall inside ANY positive span.
    # Spans are assumed half-open [start, end), like Python slicing.
    span_i = 0
    spans_sorted = sorted(pos_spans)

    for j, ci in enumerate(char_indices):
        # Advance span pointer until span might include ci
        while span_i < len(spans_sorted) and spans_sorted[span_i][1] <= ci:
            span_i += 1
        if span_i >= len(spans_sorted):
            break
        ss, se = spans_sorted[span_i]
        if ss <= ci < se:
            labels[j] = 1

    return labels, char_indices


def krippendorff_alpha_nominal(units: Dict[Any, Dict[Any, Optional[int]]]) -> float:
    all_ratings: List[int] = []
    for ratings in units.values():
        for v in ratings.values():
            if v is not None:
                all_ratings.append(v)

    if len(all_ratings) < 2:
        return float("nan")

    freq = defaultdict(int)
    for v in all_ratings:
        freq[v] += 1
    N = len(all_ratings)
    sum_p2 = sum((cnt / N) ** 2 for cnt in freq.values())
    De = 1.0 - sum_p2
    if De == 0.0:
        return float("nan")

    total_disagree_pairs = 0
    total_pairs = 0

    for ratings in units.values():
        vals = [v for v in ratings.values() if v is not None]
        m = len(vals)
        if m < 2:
            continue
        pairs = m * (m - 1) // 2
        disagree = 0
        for i in range(m):
            for j in range(i + 1, m):
                if vals[i] != vals[j]:
                    disagree += 1
        total_disagree_pairs += disagree
        total_pairs += pairs

    if total_pairs == 0:
        return float("nan")

    Do = total_disagree_pairs / total_pairs
    return 1.0 - (Do / De)


def build_units_for_category(
    text_by_doc: Dict[str, str],
    spans_by_doc: Dict[str, Dict[str, List[Dict[str, Any]]]],
    category: str,
    *,
    annotators: Optional[List[str]] = None,
    require_all: bool = False,
    min_annotators_per_unit: int = 2,
    missing_means_no_spans: bool = True,
    granularity: str = "char",  # "char" or "token"
    ignore_whitespace_chars: bool = True,  # only used when granularity="char"
) -> Dict[str, Dict[str, int]]:
    """
    Builds units for ONE category as binary (0/1).

    If granularity="token": tokens are units (coarser; overlap length approximated by token count).
    If granularity="char": characters are units (finer; overlap length weighted by char count).
    """
    if min_annotators_per_unit < 2:
        raise ValueError("min_annotators_per_unit must be >= 2")
    if granularity not in {"token", "char"}:
        raise ValueError("granularity must be 'token' or 'char'")

    if annotators is None:
        ann_set: Set[str] = set()
        for doc_map in spans_by_doc.values():
            ann_set.update(doc_map.keys())
        annotators = sorted(ann_set)

    if len(annotators) < 2:
        raise ValueError(f"Need at least 2 annotators, got {len(annotators)}: {annotators}")

    units: Dict[str, Dict[str, int]] = {}

    for doc_id, text in text_by_doc.items():
        ann_map = spans_by_doc.get(doc_id, {})

        if granularity == "token":
            tokens = regex_tokenize_with_offsets(text)
            n_units = len(tokens)

            per_ann_labels: Dict[str, List[int]] = {}
            for ann in annotators:
                if ann in ann_map:
                    spans = ann_map[ann]
                else:
                    if not missing_means_no_spans:
                        continue
                    spans = []
                pos_spans = spans_for_label(spans, category)
                per_ann_labels[ann] = token_binary_labels_from_spans(tokens, pos_spans)

            for ui in range(n_units):
                unit_id = f"{doc_id}:tok:{ui}"
                ratings: Dict[str, int] = {ann: labels[ui] for ann, labels in per_ann_labels.items()}

                if require_all:
                    if len(ratings) != len(annotators):
                        continue
                else:
                    if len(ratings) < min_annotators_per_unit:
                        continue
                units[unit_id] = ratings

        else:  # granularity == "char"
            # Build a stable set of character-unit indices (optionally skipping whitespace)
            # We compute it once per doc and reuse for all annotators.
            base_labels, char_indices = char_binary_labels_from_spans(
                text, [], ignore_whitespace=ignore_whitespace_chars
            )
            n_units = len(char_indices)
            if n_units == 0:
                continue

            per_ann_labels: Dict[str, List[int]] = {}
            for ann in annotators:
                if ann in ann_map:
                    spans = ann_map[ann]
                else:
                    if not missing_means_no_spans:
                        continue
                    spans = []
                pos_spans = spans_for_label(spans, category)
                labels, _ = char_binary_labels_from_spans(
                    text, pos_spans, ignore_whitespace=ignore_whitespace_chars
                )
                per_ann_labels[ann] = labels

            for ui, ci in enumerate(char_indices):
                # include original char index in unit_id for debugging / traceability
                unit_id = f"{doc_id}:ch:{ci}"
                ratings: Dict[str, int] = {ann: labels[ui] for ann, labels in per_ann_labels.items()}

                if require_all:
                    if len(ratings) != len(annotators):
                        continue
                else:
                    if len(ratings) < min_annotators_per_unit:
                        continue
                units[unit_id] = ratings

    return units


def krippendorff_alpha_per_category(
    text_by_doc,
    spans_by_doc,
    categories,
    *,
    annotators,
    require_all= False,
    min_annotators_per_unit = 2,
    missing_means_no_spans = True,
    granularity = "char",          # <- set "char" to weight by overlap length
    ignore_whitespace_chars = True):
    out = {}
    for cat in categories:
        units = build_units_for_category(
            text_by_doc,
            spans_by_doc,
            cat,
            annotators=annotators,
            require_all=require_all,
            min_annotators_per_unit=min_annotators_per_unit,
            missing_means_no_spans=missing_means_no_spans,
            granularity=granularity,
            ignore_whitespace_chars=ignore_whitespace_chars,
        )
        out[cat] = krippendorff_alpha_nominal(units)
    return out


Krippendorf's alpha between ann1, ann2, and ann3:

In [None]:
txt_path = "../../to_annotate"
categories = ['Unsupported claim', 'Format', 'Coherence', 'Lacks synthesis']
total = []

for i in range(1, 11):
    ann1_paper = [d for d in ann1 if d.get("filename") == f"paper_{i}.txt"]
    ann2_paper = [d for d in ann2 if d.get("filename") == f"paper_{i}.txt"]
    ann3_paper = [d for d in ann3 if d.get("filename") == f"paper_{i}.txt"]
    
    file = f"{txt_path}/paper_{i}.txt"
    with open(file, 'r') as f:
        file_text = f.read()
    
    text_by_doc = {"doc1": file_text}
    spans_by_doc = {
        "doc1": {
            "annA": ann1_paper,
            "annB": ann2_paper,
            "annC": ann3_paper,
        }
    }

    alphas = krippendorff_alpha_per_category(
        text_by_doc,
        spans_by_doc,
        categories,
        annotators=["annB", "annC"],
        require_all=False,
        min_annotators_per_unit=2,  # tokens need >=2 ratings
        missing_means_no_spans=True 
    )

    total.append({'filename': f"paper_{i}.txt", 'labels': alphas})

folder = "../annotated_data/annotations/agreements"
filename = "krippendorf_3_anns.json"
with open(f"{folder}/{filename}", 'w') as f:
    json.dump(total, f, indent=4)

print(f"{filename} saved in folder: {folder}")

krippendorf_3_anns.json saved in folder: ../annotated_data/annotations/agreements


Krippendorf's alpha between ann4 and final ten agreed:

In [None]:
txt_path = "../../to_annotate"
categories = ['Unsupported claim', 'Format', 'Coherence', 'Lacks synthesis']
total = []

for i in range(1, 11):
    ann4_paper = [d for d in ann4 if d.get("filename") == f"paper_{i}.txt"]
    first_ten_paper = [d for d in first_ten_paper if d.get("filename") == f"paper_{i}.txt"]\
    
    file = f"{txt_path}/paper_{i}.txt"
    with open(file, 'r') as f:
        file_text = f.read()
    
    text_by_doc = {"doc1": file_text}
    spans_by_doc = {
        "doc1": {
            "annA": ann4_paper,
            "annB": first_ten_paper,
        }
    }

    alphas = krippendorff_alpha_per_category(
        text_by_doc,
        spans_by_doc,
        categories,
        annotators=["annA", "annB"],
        require_all=False,
        min_annotators_per_unit=2,
        missing_means_no_spans=True 
    )

    total.append({'filename': f"paper_{i}.txt", 'labels': alphas})

folder = f"{path}/agreements"
filename = "krippendorf_ann4_first_ten.json"
with open(f"{folder}/{filename}", 'w') as f:
    json.dump(total, f, indent=4)

print(f"{filename} saved in folder: {folder}")

krippendorf_ann4_first_ten.json saved in folder: ../annotated_data/annotations/agreements
