# Inter Rater Agreement
With this notebook, you can calculate the IAA for our categorizations.

In [None]:
import pandas as pd
import numpy as np
from krippendorff import alpha

In [18]:
# Load the info on which files are part of the final annotation round

with open('final_annotation_round.txt', 'r') as f:
    final_annotation_round = f.read().splitlines()
final_annotation_round_set = set(final_annotation_round)

In [None]:
def extract_rank_from_annotation(annotation):
    """
    Extracts the rank number from annotation strings like '1_hauptthema', '2_nebenthema', etc.

    Parameters:
        annotation (str): The annotation string.

    Returns:
        int or np.nan: The extracted rank or np.nan if extraction fails.
    """
    if pd.isna(annotation) or annotation is None:
        return np.nan

    try:
        # Extract the number before the first underscore
        rank = int(str(annotation).split('_')[0])
        return rank
    except (ValueError, IndexError):
        return np.nan

def prepare_data_from_dataframe(df, annotator_columns):
    """
    Prepares the data for Krippendorff's alpha calculation from a DataFrame.

    Parameters:
        df (pd.DataFrame): The DataFrame containing annotations.
        annotator_columns (list of str): List of column names for annotators.

    Returns:
        np.ndarray: A 2D array where rows are annotators and columns are items.
    """
    # Extract only the relevant columns
    annotations = df[annotator_columns].copy()

    # Extract ranks from annotation strings
    for col in annotator_columns:
        annotations[col] = annotations[col].apply(extract_rank_from_annotation)

    # Transpose the data to match Krippendorff's alpha format (annotators as rows)
    return annotations.T.values

def calculate_krippendorff_alpha_from_dataframe(df, annotator_columns):
    """
    Calculate Krippendorff's alpha from a DataFrame of annotations.

    Parameters:
        df (pd.DataFrame): The DataFrame containing annotations.
        annotator_columns (list of str): List of column names for annotators.

    Returns:
        dict: Dictionary containing ordinal and nominal alpha values or error messages.
    """
    results = {}

    try:
        # Prepare the data
        kripp_data = prepare_data_from_dataframe(df, annotator_columns)

        # Debug: Print some info about the data
        print(f"Data shape: {kripp_data.shape}")
        print(f"Unique values: {np.unique(kripp_data[~np.isnan(kripp_data)])}")
        print(f"Missing values per annotator: {[np.sum(np.isnan(row)) for row in kripp_data]}")

        # Calculate Krippendorff's alpha
        results["ordinal"] = alpha(reliability_data=kripp_data, level_of_measurement='ordinal')
        results["nominal"] = alpha(reliability_data=kripp_data, level_of_measurement='nominal')
    except Exception as e:
        results["error"] = str(e)

    return results


df = pd.read_csv('../data/articles.csv')
annotator_columns = ['expert_annotator_1', 'expert_annotator_2', 'student_annotator_1', 'student_annotator_2']
df = df[df['id'].isin(final_annotation_round_set)]


results = calculate_krippendorff_alpha_from_dataframe(df, annotator_columns)

if "error" in results:
    print(f"Error calculating Krippendorff's alpha: {results['error']}")
else:
    print(f"Krippendorff's Alpha (ordinal): {results['ordinal']:.3f}")
    print(f"Krippendorff's Alpha (nominal): {results['nominal']:.3f}")

Data shape: (4, 77)
Unique values: [1. 2. 3.]
Missing values per annotator: [np.int64(0), np.int64(0), np.int64(1), np.int64(0)]
Krippendorff's Alpha (ordinal): 0.870
Krippendorff's Alpha (nominal): 0.690
