In [49]:
import pandas as pd
import numpy as np
import itertools
from collections import Counter
from sklearn.metrics import cohen_kappa_score
from statsmodels.stats.inter_rater import fleiss_kappa
import krippendorff

In [2]:
df = pd.read_csv("annotations_simple.csv")
df.head()

Unnamed: 0,KATHLEEN,LIZ,KAYCEE,KATHLEEN_AB,LIZ_AB,KAYCEE_AB
0,Blank,Blank,Blank,Blank,Blank,Blank
1,Blank,Blank,A,Blank,Blank,AB
2,Blank,Blank,A,Blank,Blank,AB
3,Blank,Blank,Blank,Blank,Blank,Blank
4,Blank,Blank,Blank,Blank,Blank,Blank


In [62]:
AB_separate = ['KATHLEEN', 'LIZ', 'KAYCEE']
AB_together = ['KATHLEEN_AB', 'LIZ_AB', 'KAYCEE_AB']

def get_Cohen(df, annotators):
    """Print Cohen's Kappa statistics"""
    for r1, r2 in itertools.combinations(annotators, 2):
        k = cohen_kappa_score(df[r1], df[r2])
        print(f"Cohen's Kappa for ({r1}, {r2}); k = {k}")
        

get_Cohen(df, AB_separate)
print()
get_Cohen(df, AB_together)

Cohen's Kappa for (KATHLEEN, LIZ); k = 0.5606352189394579
Cohen's Kappa for (KATHLEEN, KAYCEE); k = 0.5466147694095247
Cohen's Kappa for (LIZ, KAYCEE); k = 0.6165392526614191

Cohen's Kappa for (KATHLEEN_AB, LIZ_AB); k = 0.5880720200908507
Cohen's Kappa for (KATHLEEN_AB, KAYCEE_AB); k = 0.5549935783776746
Cohen's Kappa for (LIZ_AB, KAYCEE_AB); k = 0.6384612368956308


In [61]:
def get_Fleiss(df, sep=True):
    """Print Fleiss Kappa Stats"""
    
    if sep:
        cols = [x for x in df.columns if not x.endswith("_AB")]
    else:
        cols = [x for x in df.columns if x.endswith("_AB")]
        
    tmp = df[cols]
    counts = list(tmp.apply(lambda x: Counter(x), axis=1))    
    new = pd.DataFrame(counts).fillna(0)
    k = fleiss_kappa(new,  method='fleiss')
    print(f"Fleiss for separate={sep}: {k}")

get_Fleiss(df, sep=True)
get_Fleiss(df, sep=False)

Fleiss for separate=True: 0.5737495969692085
Fleiss for separate=False: 0.5927284044165415


In [101]:
def get_Krippendorff(df, sep=True, w={'A': 1, 'B':0, 'Blank':-1}):
    """Print Krippendorff Stats"""
    if sep:
        cols = [x for x in df.columns if not x.endswith("_AB")]
    else:
        cols = [x for x in df.columns if x.endswith("_AB")]

    tmp = df[cols].copy()
#     for col in cols:
#         tmp[col] = df[col].astype('category').cat.codes
    
#     reliability_data = tmp.T.to_numpy()
#     k = krippendorff.alpha(reliability_data)
#     print(f"Krippendorff for separate={sep}: {k}")
    
    # or the transpose way
    counts = list(tmp.apply(lambda x: Counter(x), axis=1))    
    new = pd.DataFrame(counts).fillna(0).to_numpy()
    k = krippendorff.alpha(value_counts=new)
    print(k)
    
    
get_Krippendorff(df, sep=True)
print()
get_Krippendorff(df, sep=False)

0.4753388012009404

0.592749619793713


In [68]:
print('')
reliability_data_str = (
"*    *    *    *    *    3    4    1    2    1    1    3    3    *    3",  # coder A
"1    *    2    1    3    3    4    3    *    *    *    *    *    *    *",  # coder B
"*    *    2    1    3    4    4    *    2    1    1    3    3    *    4",  # coder C
)
print('\n'.join(reliability_data_str))
print('')

reliability_data = [[np.nan if v == '*' else int(v) for v in coder.split()] for coder in reliability_data_str]
reliability_data


*    *    *    *    *    3    4    1    2    1    1    3    3    *    3
1    *    2    1    3    3    4    3    *    *    *    *    *    *    *
*    *    2    1    3    4    4    *    2    1    1    3    3    *    4



[[nan, nan, nan, nan, nan, 3, 4, 1, 2, 1, 1, 3, 3, nan, 3],
 [1, nan, 2, 1, 3, 3, 4, 3, nan, nan, nan, nan, nan, nan, nan],
 [nan, nan, 2, 1, 3, 4, 4, nan, 2, 1, 1, 3, 3, nan, 4]]

NameError: name 'nltk' is not defined