In [1]:
import pandas as pd
import sklearn.metrics

# load entities dataframes and sort
entities_f = pd.read_csv("GT_kappa/GT_femke/entities.csv")
entities_f = entities_f.loc[entities_f['doc_id'].isin([259,262,267,271,269])]
entities_f = entities_f.sort_values(by=['doc_id', "start"])

entities_d = pd.read_csv("GT_kappa/GT_dani/entities.csv")
entities_d = entities_d.loc[entities_d['doc_id'].isin([259,262,267,271,269])]
entities_d = entities_d.sort_values(by=['doc_id', "start"])

# get list of all selected dates
dates_f = entities_f.loc[~entities_f['label'].isin(['event', 'title', 'ID'])]['text'].values
dates_d = entities_d.loc[~entities_d['label'].isin(['event', 'title', 'ID'])]['text'].values

cohen_dates = sklearn.metrics.cohen_kappa_score(dates_f, dates_d)



In [2]:
# load date-event combinations and sort
date_event_f = pd.read_csv("GT_kappa/GT_femke/date_event_combinations.csv")
date_event_f = date_event_f.loc[date_event_f['doc_id'].isin([259,262,267,271,269])]
date_event_f = date_event_f.sort_values(by=['doc_id', "start"])

date_event_d = pd.read_csv("GT_kappa/GT_dani/date_event_combinations.csv")
date_event_d = date_event_d.loc[date_event_d['doc_id'].isin([259,262,267,271,269])]
date_event_d = date_event_d.sort_values(by=['doc_id', "start"])

# list of events
events_f = list(date_event_f['event'].values)
events_d = list(date_event_d['event'].values)

# list of classes
class_f = list(date_event_f['label'].values)
class_d = list(date_event_d['label'].values)

cohen_events = sklearn.metrics.cohen_kappa_score(events_d, events_f)
cohen_classes = sklearn.metrics.cohen_kappa_score(class_f, class_d)

In [3]:
relations_f = pd.read_csv("GT_kappa/GT_femke/relations.csv")
relations_f = relations_f.loc[relations_f['doc_id'].isin([259,262,267,271,269])]
relations_f = relations_f.sort_values(by=['doc_id'])

relations_d = pd.read_csv("GT_kappa/GT_dani/relations.csv")
relations_d = relations_d.loc[relations_d['doc_id'].isin([259,262,267,271,269])]
relations_d = relations_d.sort_values(by=['doc_id'])

def get_relation_ids(relations):
    ids = []
    for relation in relations:
        relation = relation.strip("[]")
        relation = relation.split(",")
        relation = [int(r.strip(" ")) for r in relation]
        ids.append(relation)
    return ids

event_ids_d = get_relation_ids(date_event_d['event_ids'].values)
event_ids_f = get_relation_ids(date_event_f['event_ids'].values)

def get_relations(event_ids, df):
    relations = []
    for i in event_ids:
        relation = ''
        for id in i:
            type = df.loc[df['to_id'] == id]['type'].values[0]
            relation += type
        relations.append(relation)
    return relations    

type_d = get_relations(event_ids_d, relations_d)
type_f = get_relations(event_ids_f, relations_f)

cohen_relations = sklearn.metrics.cohen_kappa_score(type_d, type_f)


In [4]:
print("Cohen's kappa for agreeing on labeling dates:", cohen_dates)
print("Cohen's kappa for agreeing on labeling events correctly:", cohen_events)
print("Cohen's kappa for agreeing on labeling classes correctly:", cohen_classes)
print("Cohen's kappa for agreeing on labeling relations correctly:", cohen_relations)

Cohen's kappa for agreeing on labeling dates: 1.0
Cohen's kappa for agreeing on labeling events correctly: 0.678516228748068
Cohen's kappa for agreeing on labeling classes correctly: 0.908289241622575
Cohen's kappa for agreeing on labeling relations correctly: 0.6231884057971014
