Fleiss kappa:

In [None]:
import sys
import pandas as pd
from statsmodels.stats.inter_rater import fleiss_kappa
from fleiss_kappa import compute_fleiss_kappa, save_fleiss_kappa_results_to_csv

If the sum is not equal to 3, it means we need to clean the data before computing Fleiss' Kappa:

In [None]:


# Define dimensions for Fleiss' Kappa computation
dimensions = ['Strength_runs', 'Specificity_runs', 'Persuasiveness_runs', 'Objectivity_runs']

# Define model configurations dynamically
model_configs = [
    {
        "model_name": "Gemma2",
        "setting": "",
        "data_path": "/data/GemmaAnn.csv",
        "output_csv": "results/fleiss_kappa_Gemma2_default.csv"
    },
    {
        "model_name": "Gemma2",
        "setting": "Temperature=0.3",
        "data_path": "/data/GemmaAnn_Temperature_03.csv",
        "output_csv": "results/fleiss_kappa_Gemma2_temp3.csv"
    },
    {
        "model_name": "Gemma2",
        "setting": "Temperature=0.7",
        "data_path": "/data/GemmaAnn_cleaned_Temperature_07.csv",
        "output_csv": "results/fleiss_kappa_Gemma2_temp7.csv"
    }
]

# Iterate through model configurations
for cfg in model_configs:
    print(f"\n===== Processing Model: {cfg['model_name']} | Setting: {cfg['setting']} =====")

    # Load LLM results
    df = pd.read_csv(cfg["data_path"])

    # Compute Fleiss' Kappa
    kappa_results = compute_fleiss_kappa(df, dimensions)

    # Print results
    print("Fleiss' Kappa Results:")
    for dim, kappa in kappa_results.items():
        print(f"{dim}: {kappa:.3f}")

    # Save results to CSV
    save_fleiss_kappa_results_to_csv(
        kappa_results,
        filepath=cfg["output_csv"],
        model_name=cfg["model_name"],
        setting=cfg["setting"],
        decimal_places=2
    )

    print(f"Saved results to {cfg['output_csv']} for {cfg['model_name']} (Setting: {cfg['setting']})")
