# Labeling (round 2) statistics

In [1]:
import krippendorff 
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.metrics import cohen_kappa_score
from scipy.stats import pearsonr, spearmanr
import seaborn as sns

In [2]:
data_dir = Path("../data/round2_sent_pairs")
[p for p in data_dir.glob("*.csv")]

[PosixPath('../data/round2_sent_pairs/Paired Annotation - Sheet1.csv'),
 PosixPath('../data/round2_sent_pairs/annotation-upwork.csv'),
 PosixPath('../data/round2_sent_pairs/original.csv'),
 PosixPath('../data/round2_sent_pairs/annotation-zining.csv'),
 PosixPath('../data/round2_sent_pairs/annotation-bai.csv')]

In [3]:
df_bai = pd.read_csv(Path(data_dir, "annotation-bai.csv")).rename(columns={"delta": "delta_bai", "code": "code_bai"})
df_zining = pd.read_csv(Path(data_dir, "annotation-zining.csv"))\
    .replace({"logical": "atypical"})\
    .rename(columns={"delta": "delta_zining", "code": "code_zining"})
df_upwork = pd.read_csv(Path(data_dir, "annotation-upwork.csv"))\
    .drop(columns=["Unnamed: 4"])\
    .rename(columns={"delta": "delta_upwork", "code": "code_upwork"})\
    .replace({"Nonsense": "nonsense"})

In [4]:
df_bai.code_bai.drop_duplicates()

0       nonsense
1            NaN
2       atypical
6     expression
11      physical
14     syntactic
Name: code_bai, dtype: object

In [5]:
df_zining.code_zining.drop_duplicates()

0       nonsense
1            NaN
3       physical
4       atypical
5     expression
14     syntactic
Name: code_zining, dtype: object

In [6]:
df_upwork.code_upwork.drop_duplicates()

0       physical
1       nonsense
2     expression
3       atypical
5            NaN
16     syntactic
Name: code_upwork, dtype: object

In [7]:
df = pd.merge(df_bai, df_zining, on=["text1", "text2"], how="inner")
df = pd.merge(df, df_upwork, on=["text1", "text2"], how="inner")
df.head()

Unnamed: 0,text1,text2,delta_bai,code_bai,delta_zining,code_zining,delta_upwork,code_upwork
0,His knife clinked to the floor .,His knife clinked to the inside .,-2,nonsense,-2,nonsense,-2,physical
1,Claire sprints into writing .,Claire sprints into work .,0,,0,,2,nonsense
2,A stream of visitors turned up at the camp .,A stream of listeners turned up at the camp .,-2,atypical,0,,-2,expression
3,Grill the coconut until golden ; cool .,Grill the cake until golden ; cool .,2,atypical,-2,physical,2,atypical
4,The contract was concluded at that beginning .,The contract was concluded at that time .,2,nonsense,2,atypical,-1,expression


In [8]:
df.corr()

Unnamed: 0,delta_bai,delta_zining,delta_upwork
delta_bai,1.0,0.664,0.594549
delta_zining,0.664,1.0,0.552962
delta_upwork,0.594549,0.552962,1.0


In [9]:
def compute_agreements_delta():
    names = ["bai", "zining", "upwork"]
    for i in range(len(names)-1):
        for j in range(i+1, len(names)):
            col_i = df["delta_{}".format(names[i])]
            col_j = df["delta_{}".format(names[j])]
            kappa = cohen_kappa_score(col_i, col_j)
            r, p = spearmanr(col_i, col_j)
            print ("{}, {}:\t kappa={:.4f}\t r={:.4f}, p={:.4f}".format(names[i], names[j], kappa, r, p))
            
compute_agreements_delta()

bai, zining:	 kappa=0.4007	 r=0.6677, p=0.0000
bai, upwork:	 kappa=0.2672	 r=0.5929, p=0.0000
zining, upwork:	 kappa=0.3074	 r=0.5612, p=0.0000


In [10]:
from nltk.metrics.agreement import AnnotationTask

def inter_annotator_agreements():
    """
    Fleiss's Kappa
    Krippendorff's Alpha
    """
    print ("NLTK implementation:")
    data = []
    for i, row in df.iterrows():
        data.append(["bai", f"{i}", row["delta_bai"]])
        data.append(["zining", f"{i}", row["delta_zining"]])
        data.append(["upwork", f"{i}", row["delta_upwork"]])
    at = AnnotationTask(data=data)
    print("Cohen's kappa:")
    annotators = ["bai", "zining", "upwork"]
    for ai in range(len(annotators)-1):
        for aj in range(ai+1, len(annotators)):
            print("    {}, {}   \t {:.4f}".format(annotators[ai], annotators[aj], 
                                      at.kappa_pairwise(annotators[ai], annotators[aj])))
        
    print("Fleiss Kappa: {:.4f}".format(at.multi_kappa()))
    print("Krippendorff alpha: {:.4f}".format(at.alpha()))
    
    print ("")
    print ("pip install krippendorff implementation:")
    alpha = krippendorff.alpha([df["delta_bai"], df["delta_zining"], df["delta_upwork"]])
    print ("Krippendorff alpha={:.4f}".format(alpha))
    
inter_annotator_agreements()

NLTK implementation:
Cohen's kappa:
    bai, zining   	 0.4007
    bai, upwork   	 0.2672
    zining, upwork   	 0.3074
Fleiss Kappa: 0.3245
Krippendorff alpha: 0.3140

pip install krippendorff implementation:
Krippendorff alpha=0.5817


In [11]:
def code2int(L, label):
    newL = [0] * len(L)
    for i, item in enumerate(L):
        if item == label:
            newL[i] = 1
    return newL
    
def compute_agreements_code(label):
    names = ["bai", "zining", "upwork"]
    print ("Agreements for " + label)
    for i in range(len(names)-1):
        for j in range(i+1, len(names)):
            col_i = code2int(df["code_{}".format(names[i])], label)
            col_j = code2int(df["code_{}".format(names[j])], label)
            kappa = cohen_kappa_score(col_i, col_j)
            r, p = spearmanr(col_i, col_j)
            print ("{}, {}:\t kappa={:.4f}\t r={:.4f}, p={:.4f}".format(names[i], names[j], kappa, r, p))
            
compute_agreements_code("atypical")
compute_agreements_code("physical")
compute_agreements_code("expression")
compute_agreements_code("syntactic")
compute_agreements_code("nonsense")

Agreements for atypical
bai, zining:	 kappa=-0.0070	 r=-0.0071, p=0.9464
bai, upwork:	 kappa=0.0948	 r=0.0975, p=0.3553
zining, upwork:	 kappa=0.1540	 r=0.1546, p=0.1411
Agreements for physical
bai, zining:	 kappa=0.4576	 r=0.4864, p=0.0000
bai, upwork:	 kappa=0.2133	 r=0.2135, p=0.0410
zining, upwork:	 kappa=0.1379	 r=0.1491, p=0.1561
Agreements for expression
bai, zining:	 kappa=-0.0392	 r=-0.0605, p=0.5669
bai, upwork:	 kappa=0.0640	 r=0.0773, p=0.4640
zining, upwork:	 kappa=-0.0428	 r=-0.1141, p=0.2787
Agreements for syntactic
bai, zining:	 kappa=0.3320	 r=0.3777, p=0.0002
bai, upwork:	 kappa=0.1869	 r=0.1994, p=0.0567
zining, upwork:	 kappa=0.0738	 r=0.0747, p=0.4790
Agreements for nonsense
bai, zining:	 kappa=0.2782	 r=0.3279, p=0.0014
bai, upwork:	 kappa=-0.0587	 r=-0.0874, p=0.4074
zining, upwork:	 kappa=-0.0455	 r=-0.0485, p=0.6462


In [12]:
df.to_csv("round2_merged.csv", index=False)