In [None]:
"""
Annotation analysis for inter-annotator agreement and class distribution

This script reads three Excel files containing annotations from three different
annotators, normalises the class labels (to fix misspellings and case
variations), computes inter‑annotator agreement (pairwise Cohen's kappa and
overall Fleiss' kappa) and produces class distribution counts for each
annotator and for the majority‑vote aggregate.

To run the script, make sure `pandas`, `scikit‑learn` and `statsmodels` are
installed in your Python environment. Execute with e.g.:

    python annotation_analysis.py

Adjust the `file_paths` list if the Excel files are named differently or
located in another directory.
"""

import pandas as pd
from collections import Counter
from sklearn.metrics import cohen_kappa_score
from statsmodels.stats.inter_rater import fleiss_kappa
import numpy as np


def normalize_label(label: str) -> str:
    """Normalise raw annotation labels to one of ``Negative``, ``Neutral`` or ``Positive``.

    The datasets sometimes contain misspellings such as ``Negetive`` or mixed
    case (e.g. ``negetive``). This function strips whitespace, lowercases
    the input and maps anything starting with ``neg`` to ``Negative``,
    anything starting with ``pos`` to ``Positive``, and everything else to
    ``Neutral``.

    Parameters
    ----------
    label : str
        The raw label from the dataset.

    Returns
    -------
    str
        The normalised label.
    """
    label_lower = label.strip().lower()
    if label_lower.startswith("neg") or label_lower == "negative":
        return "Negative"
    elif label_lower.startswith("pos"):
        return "Positive"
    else:
        return "Neutral"


def load_and_normalise(file_path: str) -> pd.Series:
    """Load an Excel file and return a pandas Series of normalised labels.

    Only the ``label`` column is used. Each entry is converted to a
    normalised category via :func:`normalize_label`.

    Parameters
    ----------
    file_path : str
        Path to the Excel file.

    Returns
    -------
    pandas.Series
        Series of normalised labels.
    """
    df = pd.read_excel(file_path)
    return df["label"].astype(str).apply(normalize_label)


def compute_pairwise_kappas(norm_labels: list) -> dict:
    """Compute Cohen's kappa for every pair of annotators.

    Parameters
    ----------
    norm_labels : list of pandas.Series
        List where each element is a Series of normalised labels for one
        annotator. All Series must be the same length and correspond to the
        same items.

    Returns
    -------
    dict
        Dictionary keyed by a tuple of annotator indices (1‑based) with the
        kappa statistic as value.
    """
    kappas = {}
    n_annotators = len(norm_labels)
    for i in range(n_annotators):
        for j in range(i + 1, n_annotators):
            kappa = cohen_kappa_score(norm_labels[i], norm_labels[j])
            kappas[(i + 1, j + 1)] = kappa
    return kappas


def compute_fleiss_kappa(norm_labels: list, categories: list) -> float:
    """Compute Fleiss' kappa across all annotators.

    Parameters
    ----------
    norm_labels : list of pandas.Series
        List of normalised labels for each annotator (length must equal the
        number of annotators).
    categories : list of str
        The possible categories. This should contain exactly the distinct
        normalised labels used in ``norm_labels``.

    Returns
    -------
    float
        Fleiss' kappa statistic.
    """
    n_samples = len(norm_labels[0])
    n_categories = len(categories)
    # Build matrix: rows are items, columns are categories, values are counts of
    # raters assigning that category to the item.
    rating_matrix = np.zeros((n_samples, n_categories), dtype=int)
    for i in range(n_samples):
        votes = [annot[i] for annot in norm_labels]
        for j, cat in enumerate(categories):
            rating_matrix[i, j] = sum(1 for v in votes if v == cat)
    return float(fleiss_kappa(rating_matrix))


def compute_majority_distribution(norm_labels: list) -> Counter:
    """Compute the majority vote for each item and return distribution counts.

    In case of ties (e.g. each annotator chooses a different label), no
    majority category is assigned and the item is counted under ``No
    Majority``.

    Parameters
    ----------
    norm_labels : list of pandas.Series
        List of normalised labels for each annotator.

    Returns
    -------
    collections.Counter
        Counts of majority categories across items.
    """
    n_samples = len(norm_labels[0])
    majority = []
    for i in range(n_samples):
        votes = [annot[i] for annot in norm_labels]
        vote_counts = Counter(votes)
        most_common = vote_counts.most_common(1)[0]
        label, count = most_common
        if count == 1:
            majority.append("No Majority")
        else:
            majority.append(label)
    return Counter(majority)


def main():
    # Paths to the annotator files
    file_paths = [
        "/content/drive/MyDrive/dataset/Annotator 1.xlsx",
        "/content/drive/MyDrive/dataset/Annotator 2.xlsx",
        "/content/drive/MyDrive/dataset/Annotator 3.xlsx",
    ]

    # Load and normalise labels
    norm_labels = [load_and_normalise(fp) for fp in file_paths]

    # Compute pairwise Cohen's kappa
    pairwise = compute_pairwise_kappas(norm_labels)
    print("Pairwise Cohen's kappa statistics:")
    for (i, j), kappa in pairwise.items():
        print(f"  Annotator {i} vs Annotator {j}: {kappa:.4f}")

    # Compute Fleiss' kappa across all annotators
    categories = sorted(set(norm_labels[0]))  # Should be ['Negative','Neutral','Positive']
    fkappa = compute_fleiss_kappa(norm_labels, categories)
    print(f"\nFleiss' kappa across all annotators: {fkappa:.4f}\n")

    # Show class distribution for each annotator with counts and percentages
    total_items = len(norm_labels[0])
    for idx, labels in enumerate(norm_labels, start=1):
        counts = Counter(labels)
        print(f"Annotator {idx} distribution:")
        for cat in categories:
            cnt = counts.get(cat, 0)
            pct = (cnt / total_items) * 100
            print(f"  {cat}: {cnt} ({pct:.2f}%)")
        print()

    # Majority vote distribution
    majority_counts = compute_majority_distribution(norm_labels)
    print("Majority vote distribution (across annotators):")
    for cat in categories + ["No Majority"]:
        if cat in majority_counts:
            cnt = majority_counts[cat]
            pct = (cnt / total_items) * 100
            print(f"  {cat}: {cnt} ({pct:.2f}%)")


if __name__ == "__main__":
    main()

Pairwise Cohen's kappa statistics:
  Annotator 1 vs Annotator 2: 0.7975
  Annotator 1 vs Annotator 3: 0.9071
  Annotator 2 vs Annotator 3: 0.8903

Fleiss' kappa across all annotators: 0.8650

Annotator 1 distribution:
  Negative: 1165 (23.30%)
  Neutral: 996 (19.92%)
  Positive: 2839 (56.78%)

Annotator 2 distribution:
  Negative: 1165 (23.30%)
  Neutral: 995 (19.90%)
  Positive: 2840 (56.80%)

Annotator 3 distribution:
  Negative: 1165 (23.30%)
  Neutral: 995 (19.90%)
  Positive: 2840 (56.80%)

Majority vote distribution (across annotators):
  Negative: 1165 (23.30%)
  Neutral: 995 (19.90%)
  Positive: 2840 (56.80%)


In [None]:
from google.colab import drive
drive.mount('/content/drive')