# Here the Fleiss' kappa is computed, and a statistical test will be done on the results

In [2]:
from statsmodels.stats import inter_rater as irr
from statsmodels.stats.descriptivestats import sign_test
import pandas as pd
import numpy as np
import os

In [3]:
annotation_files = os.listdir('Data/annotations/group_ties')
mapping = {"comb" : 2, "base" : 1, "tie": 0} #Convert the classes

for file_path in annotation_files:

    file_path = os.path.join('Data/annotations/group_ties', file_path)
    file = pd.read_csv(file_path)
    file=file.replace(mapping)

    file_conv= file.to_numpy()
    file_conv = irr.aggregate_raters(file) # returns a tuple (data, categories)
    score = irr.fleiss_kappa(file_conv[0], method='fleiss')

    print(f"Fleiss kappa for {file_path.split('/')[-1]}: {score:.2f}")

    file['majority'] = file.mode(axis=1)[0]

    try:
        wins = len(file[file['majority'] == 2]) / len(file)
    except:
        wins = 0.0

    try:
        loses = len(file[file['majority'] == 1]) / len(file)
    except:
        loses = 0.0

    ties = 1.0-wins-loses

    print(f"wins: {(wins*100):.1f}")
    print(f"loses: {(loses*100):.1f}")
    print(f"ties: {(ties*100):.1f}")

    values = file['majority'][file['majority']>0].to_numpy()
    result_full = sign_test(values, mu0=1.1)
    result = sign_test(values, mu0=1.1)[1]

    if result <= 0.05:
        print(f"A significant difference is found with P value: {result}\n")

    else:
        print(f"No significant difference is found with P value: {result}\n")

    print("------")
    print(result_full)
    print("\n")


Fleiss kappa for group_ties\article_adequacy.csv: 0.61
wins: 58.3
loses: 8.3
ties: 33.3
No significant difference is found with P value: 0.0703125

------
(3.0, 0.0703125)


Fleiss kappa for group_ties\article_fluency.csv: 0.73
wins: 33.3
loses: 16.7
ties: 50.0
No significant difference is found with P value: 0.6875

------
(1.0, 0.6875)


Fleiss kappa for group_ties\event_adequacy.csv: 0.60
wins: 28.6
loses: 0.0
ties: 71.4
No significant difference is found with P value: 0.5

------
(1.0, 0.5)


Fleiss kappa for group_ties\event_fluency.csv: 1.00
wins: 71.4
loses: 14.3
ties: 14.3
No significant difference is found with P value: 0.21875

------
(2.0, 0.21875)


