# Here the Fleiss' kappa is computed, and a statistical test will be done on the results

In [2]:
from statsmodels.stats import inter_rater as irr
from scipy.stats import binomtest
import pandas as pd
import numpy as np
import os

In [19]:
annotation_files = os.listdir('Data/annotations/group_ties')
mapping = {"comb" : 2, "base" : 1, "tie": 0} #Convert the classes

for file_path in annotation_files:

    file_path = os.path.join('Data/annotations/group_ties', file_path)
    file = pd.read_csv(file_path)
    file=file.replace(mapping)

    file_conv= file.to_numpy()
    file_conv = irr.aggregate_raters(file) # returns a tuple (data, categories)
    score = irr.fleiss_kappa(file_conv[0], method='fleiss')

    print(f"Fleiss kappa for {file_path.split('/')[-1]}: {score:.2f}")

    file['majority'] = file.mode(axis=1)[0]

    try:
        wins = len(file[file['majority'] == 2]) / len(file)
    except:
        wins = 0.0

    try:
        loses = len(file[file['majority'] == 1]) / len(file)
    except:
        loses = 0.0

    ties = 1.0-wins-loses

    number_of_wins = len(file[file['majority'] == 2])
    number_of_loses = len(file[file['majority'] == 1])
    number_of_ties = len(file[file['majority'] == 0])

    print(f"wins: {(wins*100):.1f}\t {number_of_wins}")
    print(f"loses: {(loses*100):.1f}\t {number_of_loses}")
    print(f"ties: {(ties*100):.1f}\t {number_of_ties}")

    result = binomtest(number_of_wins, number_of_wins+number_of_loses, alternative='greater')

    if result.pvalue <= 0.05:
        print(f"A significant difference is found with P value: {result.pvalue:.2f}\n")

    else:
        print(f"No significant difference is found with P value: {result.pvalue:.2f}\n")


Fleiss kappa for group_ties\article_adequacy.csv: 0.61
wins: 58.3	 7
loses: 8.3	 1
ties: 33.3	 4
A significant difference is found with P value: 0.04

Fleiss kappa for group_ties\article_fluency.csv: 0.73
wins: 33.3	 4
loses: 16.7	 2
ties: 50.0	 6
No significant difference is found with P value: 0.34

Fleiss kappa for group_ties\event_adequacy.csv: 0.60
wins: 28.6	 2
loses: 0.0	 0
ties: 71.4	 5
No significant difference is found with P value: 0.25

Fleiss kappa for group_ties\event_fluency.csv: 1.00
wins: 71.4	 5
loses: 14.3	 1
ties: 14.3	 1
No significant difference is found with P value: 0.11

