In [1]:
import math
import krippendorff
from scipy.stats import spearmanr
import re
import pandas as pd
import numpy as np
from pathlib import Path
from itertools import combinations
from collections import defaultdict

# Load data

In [2]:
def load_uses(filename='TRoTR/data/uses.tsv', sep='\t'):
    df = list()
    with open(filename, mode='r', encoding='utf-8') as f:
        columns = f.readline().rstrip().split(sep)
        for line in f.readlines():
            df.append(dict(zip(columns, line.rstrip().split(sep))))
    
    return pd.DataFrame(df)

def load_instances(filename, dirname='TRoTR/rounds', sep='\t'):
    df = list()
    with open(f'{dirname}/{filename}', mode='r', encoding='utf-8') as f:
        columns = f.readline().rstrip().split(sep) + ['dataID1', 'dataID2']
        for line in f.readlines():
            record = dict(zip(columns, line[:-1].split('\t')))
            record['dataID1'], record['dataID2'] = record['dataIDs'].split(',')
            df.append(record)
    
    return pd.DataFrame(df)

def load_judgments(filename, dirname='TRoTR/judgments', sep='\t'):
    df = list()
    with open(f'{dirname}/{filename}', mode='r', encoding='utf-8') as f:
        columns = f.readline().rstrip().split(sep)
        for line in f.readlines():
            record = dict(zip(columns, line.rstrip().split(sep)))
            if record['label'] == '-':
                record['label'] = math.nan
            df.append(record)

    df = pd.DataFrame(df)
    df['label'] = df['label'].astype(float)
    
    return df

def merge_data(df_uses, df_instances, df_judgments):
    df = df_judgments.merge(df_instances).merge(df_uses, left_on='dataID1', right_on='dataID')
    del df['dataID']
    del df['lemma']
    df = df.rename(columns={column: f'{column}1' for column in ['context', 'indices_target_token', 'indices_target_sentence']})
    df = df.merge(df_uses, left_on='dataID2', right_on='dataID')
    del df['dataID']
    df = df.rename(columns={column: f'{column}2' for column in ['context', 'indices_target_token', 'indices_target_sentence']})
    
    column_order = ['instanceID', 'dataID1', 'dataID2', 'label', 'annotator',  'lemma', 'context1', 'context2', 'indices_target_token1', 'indices_target_sentence1', 'indices_target_sentence2', 'indices_target_token2',  'comment', 'label_set', 'non_label', 'dataIDs']
    return df[column_order]

In [3]:
round_ = 'TRoTR.tsv'
df_judgments = load_judgments(round_)
df_uses = load_uses()
df_instances = load_instances(round_)
df = merge_data(df_uses, df_instances, df_judgments)

# Statistics

In [4]:
def inter_annotator_agreement(df):
    pairwise_spearman = list()
    
    annotators = df.annotator.unique()
    for annotator1 in annotators:
        for annotator2 in annotators:
            if annotator1 == annotator2: continue
            
            df1 = df[df['annotator'] == annotator1]
            df2 = df[df['annotator'] == annotator2]
            
            if df2.shape[0] > df1.shape[0]:
                df1, df2 = df2, df1
    
            instances=df2.instanceID.values
            df1=df1[df1['instanceID'].isin(instances)].sort_values('instanceID')
            df2=df2[df2['instanceID'].isin(instances)].sort_values('instanceID')
            corr, pvalue = spearmanr(df1.label.values, df2.label.values, nan_policy='omit')
            pairwise_spearman.append(corr)

    return np.mean(pairwise_spearman).round(3)

def krippendorff_agreement(df):
    instances = df.instanceID.unique()
    annotators = df.annotator.unique()
    
    judgments = list()
    for annotator in annotators:
        df_ann = df[df['annotator']==annotator].sort_values('instanceID')

        # add nan values
        if df_ann.shape[0] < instances.shape[0]:
            nan_instances = np.setdiff1d(instances, df_ann.instanceID.values)
            new_df_ann = pd.DataFrame()
            new_df_ann['instanceID'] = nan_instances
            new_df_ann['label'] = math.nan
            df_ann = pd.concat([df_ann, new_df_ann]).sort_values('instanceID')
        
        judgments.append(df_ann.label.values)

    return round(krippendorff.alpha(judgments, level_of_measurement='ordinal'), 3)

def inter_annotator_agreement_per_target(df):
    pairwise_spearman = defaultdict(list)
    
    annotators = df.annotator.unique()
    targets = df.lemma.unique()
    for target in targets:
        for annotator1 in annotators:
            for annotator2 in annotators:
                if annotator1 == annotator2: continue
                    
                df1 = df[(df['annotator'] == annotator1) & (df['lemma'] == target)]
                df2 = df[(df['annotator'] == annotator2)  & (df['lemma'] == target)]
                
                if df2.shape[0] > df1.shape[0]:
                    df1, df2 = df2, df1
        
                instances=df2.instanceID.values
                df1=df1[df1['instanceID'].isin(instances)].sort_values('instanceID')
                df2=df2[df2['instanceID'].isin(instances)].sort_values('instanceID')
                corr, pvalue = spearmanr(df1.label.values, df2.label.values, nan_policy='omit')
                if corr == corr: # != math.nan
                    pairwise_spearman[target].append(corr)

    df_res = pd.DataFrame()
    df_res['lemma'] = targets
    df_res['avg_pairwise_spearman_agreement'] = [np.mean(pairwise_spearman[target]).round(3) for target in targets]

    return df_res

def krippendorff_agreement_per_target(df):
    targets = df.lemma.unique()
    annotators = df.annotator.unique()

    df_res = list()
    for target in targets:
        instances = df[df['lemma']==target].instanceID.unique()
        
        judgments = list()
        for annotator in annotators:
            df_ann = df[(df['annotator']==annotator) & (df['instanceID'].isin(instances))].sort_values('instanceID')
    
            # add nan values
            if df_ann.shape[0] < instances.shape[0]:
                nan_instances = np.setdiff1d(instances, df_ann.instanceID.values)
                new_df_ann = pd.DataFrame()
                new_df_ann['instanceID'] = nan_instances
                new_df_ann['label'] = math.nan
                df_ann = pd.concat([df_ann, new_df_ann]).sort_values('instanceID')
            
            judgments.append(df_ann.label.values)

        df_res.append(dict(lemma=target, krippendorff_agreement=round(krippendorff.alpha(judgments, level_of_measurement='ordinal'), 3)))

    return pd.DataFrame(df_res)

In [5]:
inter_annotator_agreement(df), krippendorff_agreement(df)

(0.522, 0.42)

In [6]:
pd.merge(inter_annotator_agreement_per_target(df), krippendorff_agreement_per_target(df))

Unnamed: 0,lemma,avg_pairwise_spearman_agreement,krippendorff_agreement
0,"They have sown the wind, and they shall reap t...",0.423,0.261
1,"But I suffer not a woman to teach, nor to usur...",0.313,0.302
2,"Judge not, that ye be not judged",0.555,0.472
3,"Love is patient, love is kind",0.382,0.282
4,"Husbands, love your wives, as Christ loved the...",0.507,0.487
5,"The heart is deceitful above all things, and d...",0.311,0.164
6,You shall have no other gods before me,0.331,0.259
7,"Therefore, if anyone is in Christ, he is a new...",0.414,0.307
8,Now faith is confidence in what we hope for an...,0.41,0.391
9,The Lord does not look at the things people lo...,0.508,0.432


# DURel format

In [7]:
durel_uses = df_uses.copy()
durel_judgments = df.copy()

# DURel preprocessing
durel_judgments['lemma'] = durel_judgments['lemma'].apply(lambda x: x.replace(' ', '_').replace(',', ''))
durel_judgments['timestamp'] = ''
durel_judgments = durel_judgments.rename(columns={'dataID1': 'identifier1', 
                                                  'dataID2': 'identifier2', 
                                                  'label':'judgment'})
columns = ['identifier1', 'identifier2', 'annotator', 'judgment', 'comment', 'lemma', 'timestamp']
durel_judgments = durel_judgments[columns]
durel_judgments = durel_judgments[~durel_judgments.judgment.isna()]

durel_uses['lemma'] = durel_uses['lemma'].apply(lambda x: x.replace(' ', '_').replace(',', ''))
durel_uses['description'] = ""
durel_uses['pos'] = ""
durel_uses['date'] = ""
durel_uses['grouping'] = ""
durel_uses = durel_uses.rename(columns={'dataID': 'identifier', 
                                        'indices_target_sentence': 'indexes_target_sentence', 
                                        'indices_target_token': 'indexes_target_token'})
columns = ['lemma', 'pos', 'date', 'grouping', 'identifier', 'description', 'context', 'indexes_target_token', 'indexes_target_sentence']
durel_uses = durel_uses[columns]

targets = durel_uses.lemma.unique()
for target in targets:    
    df_target_uses = durel_uses[durel_uses['lemma'] == target]
    df_target_judgments = durel_judgments[durel_judgments['lemma'] == target]

    # find id quote
    id_quote = re.search('\(.*\)', df_target_uses.iloc[0].identifier).group(0).replace(':', ' ')

    # make dirs and store dataframe
    Path(f'TRoTR/DURel_data/{id_quote}').mkdir(parents=True, exist_ok=True)
    Path(f'TRoTR/DURel_data/uses').mkdir(parents=True, exist_ok=True)
    df_target_uses.to_csv(f'TRoTR/DURel_data/{id_quote}/uses.tsv', index=False, sep='\t')
    df_target_uses.to_csv(f'TRoTR/DURel_data/uses/{id_quote}.tsv', index=False, sep='\t')

    # make dirs and store dataframe
    Path(f'TRoTR/DURel_data/judgments').mkdir(parents=True, exist_ok=True)
    df_target_judgments.to_csv(f'TRoTR/DURel_data/{id_quote}/judgments.tsv', index=False, sep='\t')
    df_target_judgments.to_csv(f'TRoTR/DURel_data/judgments/{id_quote}.tsv', index=False, sep='\t')