In [3]:
from statsmodels.stats.inter_rater import fleiss_kappa
import numpy as np
from tqdm import tqdm
import pandas as pd
import os
import ast

In [4]:
def extract_number(filename):
    return int(filename.split('_')[-1].split('.')[0])

PATH = '../data'
LITBANK_INDEX=0
check_folder_path = '../data/NewLitBank_v2'
litbank_file_list = sorted(os.listdir(check_folder_path), key=extract_number)

In [None]:
df_case = pd.read_csv(check_folder_path + '/' + litbank_file_list[LITBANK_INDEX], index_col=0)
df_case['coref'] = [ast.literal_eval(data) for data in df_case['coref']]
df_case['extracted_sentence'] = [ast.literal_eval(data) for data in df_case['extracted_sentence']]
df_case['adjusted_offsets'] = [ast.literal_eval(data) for data in df_case['adjusted_offsets']]
df_case['text'] = [ast.literal_eval(data) for data in df_case['text']]
df_case['inference_offsets'] = [ast.literal_eval(data) for data in df_case['inference_offsets']]
df_case = df_case[df_case['update_text'].notna()].reset_index(drop=True)

# Each Persons's Evaluation
df_dh = pd.read_csv(PATH + '/NewLitBank_dh/' + litbank_file_list[LITBANK_INDEX], index_col=0)
df_js = pd.read_csv(PATH + '/NewLitBank_js/' + litbank_file_list[LITBANK_INDEX], index_col=0).reset_index(drop=True)
df_gy = pd.read_csv(PATH + '/NewLitBank_gy/' + litbank_file_list[LITBANK_INDEX])

In [64]:
def final_labels(count_matrix):
    argmax_labels = []
    for row in count_matrix:
        max_val = np.max(row)
        max_indices = np.where(row == max_val)[0]

        if len(max_indices) == 1:
            argmax_labels.append(max_indices[0]) 
        else:
            argmax_labels.append(1)
    return argmax_labels

In [None]:
num_classes = 3
total_count_matrix = []
for idx in tqdm(range(80)):
    df_case = pd.read_csv(check_folder_path + '/' + litbank_file_list[idx], index_col=0)
    df_case['coref'] = [ast.literal_eval(data) for data in df_case['coref']]
    df_case['extracted_sentence'] = [ast.literal_eval(data) for data in df_case['extracted_sentence']]
    df_case['adjusted_offsets'] = [ast.literal_eval(data) for data in df_case['adjusted_offsets']]
    df_case['text'] = [ast.literal_eval(data) for data in df_case['text']]
    df_case['inference_offsets'] = [ast.literal_eval(data) for data in df_case['inference_offsets']]
    df_case = df_case[df_case['update_text'].notna()].reset_index(drop=True)
    df_dh = pd.read_csv(PATH + '/NewLitBank_dh/' + litbank_file_list[idx], index_col=0)
    df_js = pd.read_csv(PATH + '/NewLitBank_JS/' + litbank_file_list[idx], index_col=0).reset_index(drop=True)
    df_gy = pd.read_csv(PATH + '/NewLitBank_gy/' + litbank_file_list[idx])
    annotators = [df_dh, df_js, df_gy]
    num_cases = len(df_dh)
    count_matrix = np.zeros((num_cases, num_classes), dtype=int)
    for df in annotators:
        if len(df['cases'])==0: 
            break
        for i, label in enumerate(df['cases']):
            count_matrix[i, label] += 1
    df_case['cases'] = final_labels(count_matrix)
    df_case.to_csv('data/NewLitBank_IAA/' + litbank_file_list[idx])
    total_count_matrix.append(count_matrix)

100%|██████████| 80/80 [00:01<00:00, 66.47it/s]


In [81]:
total_count_matrix = np.vstack(total_count_matrix)
kappa = fleiss_kappa(total_count_matrix)
print(f"Fleiss' Kappa: {kappa:.4f}")

Fleiss' Kappa: 0.4630
