In [4]:
import pandas as pd
from datasets import load_dataset
from krippendorff import alpha
import numpy as np
import itertools

# Methods

In [5]:
def compute_krippendorff_alpha(dataframe, columns, missing_data='?',
                               verbose=False
                               ):
    """
    Compute Krippendorff's alpha for inter-rater reliability.

    Parameters:
    - dataframe: pd.DataFrame, the DataFrame containing the data.
    - columns: list, the list of column names to calculate alpha for.

    Returns:
    - alpha_value: float, Krippendorff's alpha value.
    """

    # Exclude columns that are not in the dataframe and print a warning
    columns = [col for col in columns if col in dataframe.columns]
    if len(columns) < len(dataframe) and verbose:
        print(
            f"Warning: {len(dataframe) - len(columns)} runs are not in the dataframe")

    # Extract the relevant columns from the DataFrame
    data_subset = dataframe[columns]
    data_subset = data_subset.replace(missing_data, np.nan)

    # cast all columns to int except np.nan
    for col in columns:
        data_subset[col] = pd.to_numeric(data_subset[col])

    # Ensure that the data is in a format suitable for krippendorff
    data_list = np.array([data_subset[col].tolist() for col in columns])

    if verbose:
        print(data_list.shape)

    # Calculate Krippendorff's alpha
    alpha_value = alpha(reliability_data=data_list)

    return alpha_value


def compute_krippendorff_alpha_for_k_runs(df, runs, k=None, verbose=False):
    # Initialize variables to store the best combination and alpha

    # exclude runs that are not in the dataframe
    runs = [run for run in runs if run in df.columns]

    if k is None:
        k = len(runs)

    best_combination = None
    best_alpha = 0  # Assuming alpha ranges from 0 to 1

    # Iterate through all possible combinations
    for combination in itertools.combinations(runs, k):

        alpha_value = compute_krippendorff_alpha(df, list(combination))

        if verbose:
            # Print alpha for the current combination
            print(f"Combination: {combination}, Alpha: {alpha_value}")

        # Update best combination and alpha if a higher alpha is found
        if alpha_value > best_alpha:
            best_alpha = alpha_value
            best_combination = combination

    # Print the best combination and alpha
    print(
        f"Best Alpha: {best_alpha}, Best Combination: {best_combination}")

    return best_alpha, best_combination


In [6]:
# Load the dataset
merge_column = 'text'
keep_columns = ['text', 'final_label', 'label_zephyr', "label_openchat", "label_llama"]

df_anno_lex = pd.read_parquet('./data/output/anno-lexical.parquet')
df_anno_lex_train = pd.read_parquet('./data/training/anno-lexical-train.parquet')
df_anno_lex_train = df_anno_lex_train.merge(df_anno_lex[keep_columns], on=merge_column)

df_anno_lex_dev = pd.read_parquet('./data/training/anno-lexical-dev.parquet')
df_anno_lex_dev = df_anno_lex_dev.merge(df_anno_lex[keep_columns], on=merge_column)

df_anno_lex_test = pd.read_parquet('./data/training/anno-lexical-test.parquet')
df_anno_lex_test = df_anno_lex_test.merge(df_anno_lex[keep_columns], on=merge_column)

# sanity checks
print(df_anno_lex.shape[0] == df_anno_lex_train.shape[0] + df_anno_lex_dev.shape[0] + df_anno_lex_test.shape[0])
print("(train, dev, test)=", df_anno_lex_train.shape[0], df_anno_lex_dev.shape[0], df_anno_lex_test.shape[0])
print("full=",df_anno_lex.shape[0])
print((df_anno_lex_train["final_label"] == df_anno_lex_train["label"]).all())
print((df_anno_lex_dev["final_label"] == df_anno_lex_dev["label"]).all())
print((df_anno_lex_test["final_label"] == df_anno_lex_test["label"]).all())


True
(train, dev, test)= 33831 7249 7250
full= 48330
True
True
True


In [7]:
# Compute Krippendorff's alpha for the whole dataset, and the splits
krippendorff_complete = compute_krippendorff_alpha(df_anno_lex, columns=[ "label_zephyr", "label_openchat", "label_llama"])
krippendorff_train = compute_krippendorff_alpha(df_anno_lex_train, columns=[ "label_zephyr", "label_openchat", "label_llama"])
krippendorff_test = compute_krippendorff_alpha(df_anno_lex_test, columns=[ "label_zephyr", "label_openchat", "label_llama"])
krippendorff_dev = compute_krippendorff_alpha(df_anno_lex_dev, columns=[ "label_zephyr", "label_openchat", "label_llama"])


In [8]:
print("Krippendorff's alpha for the whole dataset:", krippendorff_complete)
print("Krippendorff's alpha for the training set:", krippendorff_train)
print("Krippendorff's alpha for the test set:", krippendorff_test)
print("Krippendorff's alpha for the dev set:", krippendorff_dev)

Krippendorff's alpha for the whole dataset: 0.593383092752203
Krippendorff's alpha for the training set: 0.5924767632306003
Krippendorff's alpha for the test set: 0.595402670055891
Krippendorff's alpha for the dev set: 0.5954683991396965


# Compute Krippendorff's alpha for all annotated samples

In [9]:
df_all = pd.read_parquet('./data/output/final_sentence_pool_annotated.parquet')
df_filtered = pd.read_parquet('./data/output/final_sentence_pool_annotated_conservative.parquet')
print("All annotated data: ", df_all.shape[0])
print("Convervative Filtering annotated data: ", df_filtered.shape[0])

krippendorff_all_data = compute_krippendorff_alpha(df_all, columns=[ "label_zephyr", "label_openchat", "label_llama"])
krippendorff_filtered_data = compute_krippendorff_alpha(df_filtered, columns=[ "label_zephyr", "label_openchat", "label_llama"])

print("Krippendorff's alpha for all annotated data:", krippendorff_all_data)
print("Krippendorff's alpha for filtered annotated data:", krippendorff_filtered_data)

All annotated data:  65909
Convervative Filtering annotated data:  64712
Krippendorff's alpha for all annotated data: 0.6002888450840125
Krippendorff's alpha for filtered annotated data: 0.6009601175087209
