Calculation of CV*, Pearson's r and Krippendorff's alpha for Reprogen 2022 paper titled, "Two Reproductions of a Human-Assessed Comparative Evaluation of a Semantic Error Detection System".

In [41]:
import numpy as np
import csv
import matplotlib.pyplot as plt
import scipy.stats as stats
import pandas as pd

%matplotlib inline

In [42]:
# Calculation of Pearson's r.

err_orig = [34, 45, 18] #e2e - correctness - orig
err_repro1 = [36, 48, 16] #e2e - correctness - OD + ZK
err_repro2 = [41, 44, 15] #e2e - correctness - AB + TCF

r1 = stats.pearsonr(err_orig,err_repro1)
r2 = stats.pearsonr(err_orig,err_repro2)
r3 = stats.pearsonr(err_repro1,err_repro2)

print(r1[0], r2[0], r3[0])

0.9993216505720215 0.9483221129282879 0.9593644138891955


In [43]:
err_orig = [5, 30, 10, 8, 16, 17] #e2e - error classes - orig
err_repro1 = [6, 33, 13, 5, 11, 20] #e2e - error classes - OD + ZK
err_repro2 = [6, 28, 8, 22, 24, 8] #e2e - error classes - AB + TCF

r1 = stats.pearsonr(err_orig,err_repro1)
r2 = stats.pearsonr(err_orig,err_repro2)
r3 = stats.pearsonr(err_repro1,err_repro2)

print(r1[0], r2[0], r3[0])

0.9470305356573966 0.6203548025812703 0.37313382947414697


In [44]:
err_orig = [51, 42, 7] #webnlg - correctness - orig
err_repro1 = [38, 40, 15] #webnlg - correctness - OD + ZK
err_repro2 = [59, 35, 6] #webnlg - correctness - AB + TCF

r1 = stats.pearsonr(err_orig,err_repro1)
r2 = stats.pearsonr(err_orig,err_repro2)
r3 = stats.pearsonr(err_repro1,err_repro2)

print(r1[0], r2[0], r3[0])

0.9646027959825653 0.9625971482855068 0.857075946458389


In [45]:
err_orig = [22, 7, 14, 8, 15] #webnlg - error classes - orig
err_repro1 = [16, 3, 27, 25, 19] #webnlg - error classes - OD + ZK
err_repro2 = [5,10,10,28,12] #webnlg - error classes - AB + TCF

r1 = stats.pearsonr(err_orig,err_repro1)
r2 = stats.pearsonr(err_orig,err_repro2)
r3 = stats.pearsonr(err_repro1,err_repro2)

print(r1[0], r2[0], r3[0])

0.20879830038875216 -0.6301826067407483 0.4144312328399571


In [46]:
!pip3 install krippendorff

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [47]:
# code copied from krippendorff_alpha.py at https://github.com/grrrr/krippendorff-alpha/blob/master/krippendorff_alpha.py

# Calculation of Krippendorff's alpha.

from __future__ import print_function
try:
    import numpy as np
except ImportError:
    np = None


def nominal_metric(a, b):
    return a != b


def interval_metric(a, b):
    return (a-b)**2


def ratio_metric(a, b):
    return ((a-b)/(a+b))**2


def krippendorff_alpha(data, metric=interval_metric, force_vecmath=False, convert_items=float, missing_items=None):
    '''
    Calculate Krippendorff's alpha (inter-rater reliability):
    
    data is in the format
    [
        {unit1:value, unit2:value, ...},  # coder 1
        {unit1:value, unit3:value, ...},   # coder 2
        ...                            # more coders
    ]
    or 
    it is a sequence of (masked) sequences (list, numpy.array, numpy.ma.array, e.g.) with rows corresponding to coders and columns to items
    
    metric: function calculating the pairwise distance
    force_vecmath: force vector math for custom metrics (numpy required)
    convert_items: function for the type conversion of items (default: float)
    missing_items: indicator for missing items (default: None)
    '''
    
    # number of coders
    m = len(data)
    
    # set of constants identifying missing values
    if missing_items is None:
        maskitems = []
    else:
        maskitems = list(missing_items)
    if np is not None:
        maskitems.append(np.ma.masked_singleton)
    
    # convert input data to a dict of items
    units = {}
    for d in data:
        try:
            # try if d behaves as a dict
            diter = d.items()
        except AttributeError:
            # sequence assumed for d
            diter = enumerate(d)
            
        for it, g in diter:
            if g not in maskitems:
                try:
                    its = units[it]
                except KeyError:
                    its = []
                    units[it] = its
                its.append(convert_items(g))


    units = dict((it, d) for it, d in units.items() if len(d) > 1)  # units with pairable values
    n = sum(len(pv) for pv in units.values())  # number of pairable values
    
    if n == 0:
        raise ValueError("No items to compare.")
    
    np_metric = (np is not None) and ((metric in (interval_metric, nominal_metric, ratio_metric)) or force_vecmath)
    
    Do = 0.
    for grades in units.values():
        if np_metric:
            gr = np.asarray(grades)
            Du = sum(np.sum(metric(gr, gri)) for gri in gr)
        else:
            Du = sum(metric(gi, gj) for gi in grades for gj in grades)
        Do += Du/float(len(grades)-1)
    Do /= float(n)

    if Do == 0:
        return 1.

    De = 0.
    for g1 in units.values():
        if np_metric:
            d1 = np.asarray(g1)
            for g2 in units.values():
                De += sum(np.sum(metric(d1, gj)) for gj in g2)
        else:
            for g2 in units.values():
                De += sum(metric(gi, gj) for gi in g1 for gj in g2)
    De /= float(n*(n-1))

    return 1.-Do/De if (Do and De) else 1.



In [48]:
# Calculation of krippendorf's alpha - E2E all

if __name__ == '__main__': 

    # E2E data for krippendorff's alpha calculation

    data = (
        "0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    2    2    0    2    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    0    0    2    0    2    0    2    2    2    0    0    0    0    2    0    2    2    0    0    0    0    0    2    0    2    0    0    0    2    2    0    2    0    0    0    0    0    0    0    2    0    2    2    2    0    0    0    0    0    0    0    0    2    0    0    2    2    0    2    0    0    0    0    0    0    0    2    0    0    2    0    0    0    2    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    3    3    0    0    0    0    0    0    0    0    0    0    0    3    0    3    0    0    0    0    0    3    3    0    0    0    4    0    0    0    0    0    0    0    0    0    4    4    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    4    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    5    0    0    0    0    0    5    0    5    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    5    0    0    5    0    0    0    0    5    5    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    5    0    5    0    0    0    0    0    5    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    0    6    0    0    0    6    0    0    6    0    0    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    6    0    6    6    0    0    0    0    0    0    0    0    6    0    0    0    0    6    0    0    0    0    6    0    0    6    0    6    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0", #orig
        "0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    1    0    0    0    0    0    0    2    0    0    2    0    2    2    0    2    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    2    0    0    2    0    2    0    2    2    2    0    0    0    0    2    2    2    2    0    0    0    0    0    2    0    2    0    0    0    2    2    0    2    0    0    0    0    0    0    0    2    0    2    2    2    0    0    0    0    2    0    0    0    0    0    0    2    2    0    2    0    2    0    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    3    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    3    0    3    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    5    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    5    0    0    5    0    0    6    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    6    0    0    0    0    6    0    0    6    0    0    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    6    0    6    6    0    0    0    0    0    0    0    0    6    0    0    0    0    6    0    0    0    0    6    0    0    6    0    0    0    0    0    0    6    0    0    0    0    0    6    6    0    6    6", #OD + ZK
        "0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    1    1    0    0    0    0    0    0    0    0    0    0    2    2    2    0    2    0    0    0    0    2    2    0    2    0    0    0    0    0    0    2    0    0    0    0    0    0    2    2    2    2    0    0    0    0    0    0    2    2    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    2    0    2    0    0    0    0    0    0    0    0    2    0    0    0    0    2    2    0    2    2    0    0    0    2    0    0    0    0    2    2    0    0    2    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    3    0    0    0    3    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    4    0    0    4    0    0    4    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    4    0    0    0    0    4    4    0    0    0    0    0    0    4    4    0    0    0    4    0    0    4    0    0    0    4    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    4    0    4    4    0    0    0    0    0    0    0    4    4    0    0    4    0    0    0    0    0    0    4    0    0    0    0    0    5    0    5    5    0    5    0    0    0    0    5    0    5    5    0    0    5    0    0    0    0    5    0    5    0    0    0    0    5    5    5    0    0    0    5    0    0    5    5    0    0    0    0    5    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    0    0    0    0    0    5    5    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    6    0    0    0    0    0    6    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0", #AB + TCF
        )

    missing = '*' # indicator for missing values
    array = [d.split() for d in data]  # convert to 2D list of string items
    #print(array)
    
    print("nominal metric: %.3f" % krippendorff_alpha(array, nominal_metric, missing_items=missing))
    print("interval metric: %.3f" % krippendorff_alpha(array, interval_metric, missing_items=missing))

nominal metric: 0.467
interval metric: 0.378


In [49]:
# Calculation of krippendorf's alpha - E2E orig vs OD + ZK

if __name__ == '__main__': 
    
    # E2E data for krippendorff's alpha calculation

    data = (
        "0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    2    2    0    2    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    0    0    2    0    2    0    2    2    2    0    0    0    0    2    0    2    2    0    0    0    0    0    2    0    2    0    0    0    2    2    0    2    0    0    0    0    0    0    0    2    0    2    2    2    0    0    0    0    0    0    0    0    2    0    0    2    2    0    2    0    0    0    0    0    0    0    2    0    0    2    0    0    0    2    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    3    3    0    0    0    0    0    0    0    0    0    0    0    3    0    3    0    0    0    0    0    3    3    0    0    0    4    0    0    0    0    0    0    0    0    0    4    4    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    4    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    5    0    0    0    0    0    5    0    5    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    5    0    0    5    0    0    0    0    5    5    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    5    0    5    0    0    0    0    0    5    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    0    6    0    0    0    6    0    0    6    0    0    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    6    0    6    6    0    0    0    0    0    0    0    0    6    0    0    0    0    6    0    0    0    0    6    0    0    6    0    6    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0", #orig
        "0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    1    0    0    0    0    0    0    2    0    0    2    0    2    2    0    2    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    2    0    0    2    0    2    0    2    2    2    0    0    0    0    2    2    2    2    0    0    0    0    0    2    0    2    0    0    0    2    2    0    2    0    0    0    0    0    0    0    2    0    2    2    2    0    0    0    0    2    0    0    0    0    0    0    2    2    0    2    0    2    0    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    3    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    3    0    3    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    5    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    5    0    0    5    0    0    6    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    6    0    0    0    0    6    0    0    6    0    0    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    6    0    6    6    0    0    0    0    0    0    0    0    6    0    0    0    0    6    0    0    0    0    6    0    0    6    0    0    0    0    0    0    6    0    0    0    0    0    6    6    0    6    6", #OD + ZK
        #"0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    1    1    0    0    0    0    0    0    0    0    0    0    2    2    2    0    2    0    0    0    0    2    2    0    2    0    0    0    0    0    0    2    0    0    0    0    0    0    2    2    2    2    0    0    0    0    0    0    2    2    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    2    0    2    0    0    0    0    0    0    0    0    2    0    0    0    0    2    2    0    2    2    0    0    0    2    0    0    0    0    2    2    0    0    2    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    3    0    0    0    3    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    4    0    0    4    0    0    4    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    4    0    0    0    0    4    4    0    0    0    0    0    0    4    4    0    0    0    4    0    0    4    0    0    0    4    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    4    0    4    4    0    0    0    0    0    0    0    4    4    0    0    4    0    0    0    0    0    0    4    0    0    0    0    0    5    0    5    5    0    5    0    0    0    0    5    0    5    5    0    0    5    0    0    0    0    5    0    5    0    0    0    0    5    5    5    0    0    0    5    0    0    5    5    0    0    0    0    5    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    0    0    0    0    0    5    5    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    6    0    0    0    0    0    6    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0", #AB + TCF
        )

    missing = '*' # indicator for missing values
    array = [d.split() for d in data]  # convert to 2D list of string items
    #print(array)
    
    print("nominal metric: %.3f" % krippendorff_alpha(array, nominal_metric, missing_items=missing))
    print("interval metric: %.3f" % krippendorff_alpha(array, interval_metric, missing_items=missing))

nominal metric: 0.735
interval metric: 0.707


In [50]:
# Calculation of krippendorf's alpha - E2E orig vs AB + TCF

if __name__ == '__main__': 

    # E2E data for krippendorff's alpha calculation

    data = (
        "0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    2    2    0    2    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    0    0    2    0    2    0    2    2    2    0    0    0    0    2    0    2    2    0    0    0    0    0    2    0    2    0    0    0    2    2    0    2    0    0    0    0    0    0    0    2    0    2    2    2    0    0    0    0    0    0    0    0    2    0    0    2    2    0    2    0    0    0    0    0    0    0    2    0    0    2    0    0    0    2    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    3    3    0    0    0    0    0    0    0    0    0    0    0    3    0    3    0    0    0    0    0    3    3    0    0    0    4    0    0    0    0    0    0    0    0    0    4    4    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    4    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    5    0    0    0    0    0    5    0    5    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    5    0    0    5    0    0    0    0    5    5    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    5    0    5    0    0    0    0    0    5    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    0    6    0    0    0    6    0    0    6    0    0    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    6    0    6    6    0    0    0    0    0    0    0    0    6    0    0    0    0    6    0    0    0    0    6    0    0    6    0    6    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0", #orig
        #"0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    1    0    0    0    0    0    0    2    0    0    2    0    2    2    0    2    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    2    0    0    2    0    2    0    2    2    2    0    0    0    0    2    2    2    2    0    0    0    0    0    2    0    2    0    0    0    2    2    0    2    0    0    0    0    0    0    0    2    0    2    2    2    0    0    0    0    2    0    0    0    0    0    0    2    2    0    2    0    2    0    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    3    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    3    0    3    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    5    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    5    0    0    5    0    0    6    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    6    0    0    0    0    6    0    0    6    0    0    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    6    0    6    6    0    0    0    0    0    0    0    0    6    0    0    0    0    6    0    0    0    0    6    0    0    6    0    0    0    0    0    0    6    0    0    0    0    0    6    6    0    6    6", #OD + ZK
        "0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    1    1    0    0    0    0    0    0    0    0    0    0    2    2    2    0    2    0    0    0    0    2    2    0    2    0    0    0    0    0    0    2    0    0    0    0    0    0    2    2    2    2    0    0    0    0    0    0    2    2    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    2    0    2    0    0    0    0    0    0    0    0    2    0    0    0    0    2    2    0    2    2    0    0    0    2    0    0    0    0    2    2    0    0    2    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    3    0    0    0    3    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    4    0    0    4    0    0    4    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    4    0    0    0    0    4    4    0    0    0    0    0    0    4    4    0    0    0    4    0    0    4    0    0    0    4    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    4    0    4    4    0    0    0    0    0    0    0    4    4    0    0    4    0    0    0    0    0    0    4    0    0    0    0    0    5    0    5    5    0    5    0    0    0    0    5    0    5    5    0    0    5    0    0    0    0    5    0    5    0    0    0    0    5    5    5    0    0    0    5    0    0    5    5    0    0    0    0    5    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    0    0    0    0    0    5    5    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    6    0    0    0    0    0    6    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0", #AB + TCF
        )

    missing = '*' # indicator for missing values
    array = [d.split() for d in data]  # convert to 2D list of string items
    #print(array)
    
    print("nominal metric: %.3f" % krippendorff_alpha(array, nominal_metric, missing_items=missing))
    print("interval metric: %.3f" % krippendorff_alpha(array, interval_metric, missing_items=missing))

nominal metric: 0.347
interval metric: 0.233


In [51]:
# Calculation of krippendorf's alpha - E2E OD + ZK vs AB + TCF

if __name__ == '__main__': 
    

    # E2E data for krippendorff's alpha calculation

    data = (
        #"0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    2    2    0    2    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    0    0    2    0    2    0    2    2    2    0    0    0    0    2    0    2    2    0    0    0    0    0    2    0    2    0    0    0    2    2    0    2    0    0    0    0    0    0    0    2    0    2    2    2    0    0    0    0    0    0    0    0    2    0    0    2    2    0    2    0    0    0    0    0    0    0    2    0    0    2    0    0    0    2    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    3    3    0    0    0    0    0    0    0    0    0    0    0    3    0    3    0    0    0    0    0    3    3    0    0    0    4    0    0    0    0    0    0    0    0    0    4    4    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    4    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    5    0    0    0    0    0    5    0    5    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    5    0    0    5    0    0    0    0    5    5    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    5    0    5    0    0    0    0    0    5    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    0    6    0    0    0    6    0    0    6    0    0    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    6    0    6    6    0    0    0    0    0    0    0    0    6    0    0    0    0    6    0    0    0    0    6    0    0    6    0    6    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0", #orig
        "0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    1    0    0    0    0    0    0    2    0    0    2    0    2    2    0    2    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    2    0    0    2    0    2    0    2    2    2    0    0    0    0    2    2    2    2    0    0    0    0    0    2    0    2    0    0    0    2    2    0    2    0    0    0    0    0    0    0    2    0    2    2    2    0    0    0    0    2    0    0    0    0    0    0    2    2    0    2    0    2    0    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    3    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    3    0    3    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    5    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    5    0    0    5    0    0    6    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    6    0    0    0    0    6    0    0    6    0    0    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    6    0    6    6    0    0    0    0    0    0    0    0    6    0    0    0    0    6    0    0    0    0    6    0    0    6    0    0    0    0    0    0    6    0    0    0    0    0    6    6    0    6    6", #OD + ZK
        "0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    1    1    0    0    0    0    0    0    0    0    0    0    2    2    2    0    2    0    0    0    0    2    2    0    2    0    0    0    0    0    0    2    0    0    0    0    0    0    2    2    2    2    0    0    0    0    0    0    2    2    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    2    0    2    0    0    0    0    0    0    0    0    2    0    0    0    0    2    2    0    2    2    0    0    0    2    0    0    0    0    2    2    0    0    2    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    3    0    0    0    3    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    4    0    0    4    0    0    4    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    4    0    0    0    0    4    4    0    0    0    0    0    0    4    4    0    0    0    4    0    0    4    0    0    0    4    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    4    0    4    4    0    0    0    0    0    0    0    4    4    0    0    4    0    0    0    0    0    0    4    0    0    0    0    0    5    0    5    5    0    5    0    0    0    0    5    0    5    5    0    0    5    0    0    0    0    5    0    5    0    0    0    0    5    5    5    0    0    0    5    0    0    5    5    0    0    0    0    5    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    0    0    0    0    0    5    5    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    6    0    6    0    0    0    0    0    6    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0", #AB + TCF
        )

    missing = '*' # indicator for missing values
    array = [d.split() for d in data]  # convert to 2D list of string items
    #print(array)
    
    print("nominal metric: %.3f" % krippendorff_alpha(array, nominal_metric, missing_items=missing))
    print("interval metric: %.3f" % krippendorff_alpha(array, interval_metric, missing_items=missing))

nominal metric: 0.330
interval metric: 0.200


In [52]:
# Calculation of krippendorf's alpha - WebNLG all

if __name__ == '__main__': 
    


    # WebNLG data for krippendorff's alpha calculation

    data = (
        "1    0    0    0    0    0    1    0    0    0    0    0    1    0    0    1    1    0    1    0    0    1    1    0    1    1    0    0    0    0    0    0    0    1    0    0    0    1    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    1    0    0    0    0    0    0    0    0    0    0    0    1    1    0    0    0    1    0    0    0    0    1    0    1    0    1    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    3    0    0    0    3    0    0    3    0    3    0    0    0    0    0    0    3    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    3    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    4    4    0    0    0    0    5    5    0    0    5    5    5    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    5    0    0    0    0    5    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    5    0    0    0", #orig_WebNLG
        "0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    1    0    1    0    0    0    1    0    0    1    0    0    0    0    0    0    1    0    0    0    1    1    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    1    1    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    3    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    0    3    0    0    3    0    0    3    0    3    0    0    0    0    0    0    3    0    0    0    0    0    3    3    0    0    3    0    3    3    0    0    0    0    0    0    0    3    0    0    0    3    3    0    0    0    0    0    0    3    0    3    3    0    3    0    0    0    0    0    0    0    3    0    0    0    3    0    0    3    0    0    0    3    0    3    0    3    0    3    0    0    4    0    4    0    0    4    0    0    4    0    4    0    0    4    4    0    0    0    0    0    4    0    0    4    0    0    0    0    4    0    4    0    0    0    0    0    0    0    4    0    0    0    4    4    0    0    0    0    4    4    4    4    0    0    0    0    4    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    4    0    0    4    0    0    4    0    0    0    0    0    0    0    4    0    0    0    0    0    5    5    0    0    5    0    5    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    5    0    0    0    0    0    0    5    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    5    0    5    5    5    0    0    0    0    5    0    0    5    0    0    0    0    5    0    0    5    0    0    0    0    0    0    0    0    0    5    0    0    0    5    0    0    0", #OD+ZK_WebNLG
        "0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    2    0    0    0    0    0    0    0    0    0    0    0    2    2    0    2    2    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    3    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    3    0    0    3    3    0    3    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    4    0    0    0    0    4    0    4    4    4    0    4    0    0    0    4    0    0    4    0    0    0    0    4    0    0    4    4    0    0    0    0    0    0    0    4    0    0    4    0    0    4    0    4    0    0    4    0    0    0    4    4    0    4    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    4    0    4    0    0    0    4    0    0    4    0    4    4    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    5    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    5    5    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0", #AB+TF_WebNLG
    )

    missing = '*' # indicator for missing values
    array = [d.split() for d in data]  # convert to 2D list of string items
    #print(array)
    
    print("nominal metric: %.3f" % krippendorff_alpha(array, nominal_metric, missing_items=missing))
    print("interval metric: %.3f" % krippendorff_alpha(array, interval_metric, missing_items=missing))

nominal metric: 0.165
interval metric: 0.201


In [53]:
# Calculation of krippendorf's alpha - WebNLG orig vs OD + ZK

if __name__ == '__main__': 
    


    # WebNLG data for krippendorff's alpha calculation

    data = (
        "1    0    0    0    0    0    1    0    0    0    0    0    1    0    0    1    1    0    1    0    0    1    1    0    1    1    0    0    0    0    0    0    0    1    0    0    0    1    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    1    0    0    0    0    0    0    0    0    0    0    0    1    1    0    0    0    1    0    0    0    0    1    0    1    0    1    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    3    0    0    0    3    0    0    3    0    3    0    0    0    0    0    0    3    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    3    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    4    4    0    0    0    0    5    5    0    0    5    5    5    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    5    0    0    0    0    5    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    5    0    0    0", #orig_WebNLG
        "0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    1    0    1    0    0    0    1    0    0    1    0    0    0    0    0    0    1    0    0    0    1    1    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    1    1    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    3    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    0    3    0    0    3    0    0    3    0    3    0    0    0    0    0    0    3    0    0    0    0    0    3    3    0    0    3    0    3    3    0    0    0    0    0    0    0    3    0    0    0    3    3    0    0    0    0    0    0    3    0    3    3    0    3    0    0    0    0    0    0    0    3    0    0    0    3    0    0    3    0    0    0    3    0    3    0    3    0    3    0    0    4    0    4    0    0    4    0    0    4    0    4    0    0    4    4    0    0    0    0    0    4    0    0    4    0    0    0    0    4    0    4    0    0    0    0    0    0    0    4    0    0    0    4    4    0    0    0    0    4    4    4    4    0    0    0    0    4    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    4    0    0    4    0    0    4    0    0    0    0    0    0    0    4    0    0    0    0    0    5    5    0    0    5    0    5    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    5    0    0    0    0    0    0    5    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    5    0    5    5    5    0    0    0    0    5    0    0    5    0    0    0    0    5    0    0    5    0    0    0    0    0    0    0    0    0    5    0    0    0    5    0    0    0", #OD+ZK_WebNLG
        #"0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    2    0    0    0    0    0    0    0    0    0    0    0    2    2    0    2    2    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    3    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    3    0    0    3    3    0    3    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    4    0    0    0    0    4    0    4    4    4    0    4    0    0    0    4    0    0    4    0    0    0    0    4    0    0    4    4    0    0    0    0    0    0    0    4    0    0    4    0    0    4    0    4    0    0    4    0    0    0    4    4    0    4    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    4    0    4    0    0    0    4    0    0    4    0    4    4    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    5    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    5    5    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0", #AB+TF_WebNLG
    )

    missing = '*' # indicator for missing values
    array = [d.split() for d in data]  # convert to 2D list of string items
    #print(array)
    
    print("nominal metric: %.3f" % krippendorff_alpha(array, nominal_metric, missing_items=missing))
    print("interval metric: %.3f" % krippendorff_alpha(array, interval_metric, missing_items=missing))

nominal metric: 0.207
interval metric: 0.209


In [54]:
# Calculation of krippendorf's alpha - WebNLG orig vs AB + TCF

if __name__ == '__main__': 
    


    # WebNLG data for krippendorff's alpha calculation

    data = (
        "1    0    0    0    0    0    1    0    0    0    0    0    1    0    0    1    1    0    1    0    0    1    1    0    1    1    0    0    0    0    0    0    0    1    0    0    0    1    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    1    0    0    0    0    0    0    0    0    0    0    0    1    1    0    0    0    1    0    0    0    0    1    0    1    0    1    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    3    0    0    0    3    0    0    3    0    3    0    0    0    0    0    0    3    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    3    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    4    4    0    0    0    0    5    5    0    0    5    5    5    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    5    0    0    0    0    5    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    5    0    0    0", #orig_WebNLG
        #"0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    1    0    1    0    0    0    1    0    0    1    0    0    0    0    0    0    1    0    0    0    1    1    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    1    1    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    3    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    0    3    0    0    3    0    0    3    0    3    0    0    0    0    0    0    3    0    0    0    0    0    3    3    0    0    3    0    3    3    0    0    0    0    0    0    0    3    0    0    0    3    3    0    0    0    0    0    0    3    0    3    3    0    3    0    0    0    0    0    0    0    3    0    0    0    3    0    0    3    0    0    0    3    0    3    0    3    0    3    0    0    4    0    4    0    0    4    0    0    4    0    4    0    0    4    4    0    0    0    0    0    4    0    0    4    0    0    0    0    4    0    4    0    0    0    0    0    0    0    4    0    0    0    4    4    0    0    0    0    4    4    4    4    0    0    0    0    4    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    4    0    0    4    0    0    4    0    0    0    0    0    0    0    4    0    0    0    0    0    5    5    0    0    5    0    5    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    5    0    0    0    0    0    0    5    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    5    0    5    5    5    0    0    0    0    5    0    0    5    0    0    0    0    5    0    0    5    0    0    0    0    0    0    0    0    0    5    0    0    0    5    0    0    0", #OD+ZK_WebNLG
        "0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    2    0    0    0    0    0    0    0    0    0    0    0    2    2    0    2    2    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    3    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    3    0    0    3    3    0    3    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    4    0    0    0    0    4    0    4    4    4    0    4    0    0    0    4    0    0    4    0    0    0    0    4    0    0    4    4    0    0    0    0    0    0    0    4    0    0    4    0    0    4    0    4    0    0    4    0    0    0    4    4    0    4    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    4    0    4    0    0    0    4    0    0    4    0    4    4    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    5    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    5    5    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0", #AB+TF_WebNLG
    )

    missing = '*' # indicator for missing values
    array = [d.split() for d in data]  # convert to 2D list of string items
    #print(array)
    
    print("nominal metric: %.3f" % krippendorff_alpha(array, nominal_metric, missing_items=missing))
    print("interval metric: %.3f" % krippendorff_alpha(array, interval_metric, missing_items=missing))

nominal metric: 0.114
interval metric: 0.206


In [55]:
# Calculation of krippendorf's alpha - WebNLG OD + ZK vs AB + TCF

if __name__ == '__main__': 
    


    # WebNLG data for krippendorff's alpha calculation

    data = (
        #"1    0    0    0    0    0    1    0    0    0    0    0    1    0    0    1    1    0    1    0    0    1    1    0    1    1    0    0    0    0    0    0    0    1    0    0    0    1    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    1    0    0    0    0    0    0    0    0    0    0    0    1    1    0    0    0    1    0    0    0    0    1    0    1    0    1    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    3    0    0    0    3    0    0    3    0    3    0    0    0    0    0    0    3    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    3    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    4    4    0    0    0    0    5    5    0    0    5    5    5    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    5    0    0    0    0    5    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    5    5    0    0    0    5    0    0    0", #orig_WebNLG
        "0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    1    0    1    0    0    0    1    0    0    1    0    0    0    0    0    0    1    0    0    0    1    1    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    1    1    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    3    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    0    3    0    0    3    0    0    3    0    3    0    0    0    0    0    0    3    0    0    0    0    0    3    3    0    0    3    0    3    3    0    0    0    0    0    0    0    3    0    0    0    3    3    0    0    0    0    0    0    3    0    3    3    0    3    0    0    0    0    0    0    0    3    0    0    0    3    0    0    3    0    0    0    3    0    3    0    3    0    3    0    0    4    0    4    0    0    4    0    0    4    0    4    0    0    4    4    0    0    0    0    0    4    0    0    4    0    0    0    0    4    0    4    0    0    0    0    0    0    0    4    0    0    0    4    4    0    0    0    0    4    4    4    4    0    0    0    0    4    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    0    0    0    4    0    0    4    0    0    4    0    0    0    0    0    0    0    4    0    0    0    0    0    5    5    0    0    5    0    5    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    5    0    0    0    0    0    0    5    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    5    0    5    5    5    0    0    0    0    5    0    0    5    0    0    0    0    5    0    0    5    0    0    0    0    0    0    0    0    0    5    0    0    0    5    0    0    0", #OD+ZK_WebNLG
        "0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    2    0    0    0    0    0    0    0    0    0    0    0    2    2    0    2    2    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0    0    2    0    0    0    0    0    0    0    0    3    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    3    0    0    3    3    0    3    0    0    0    0    0    0    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    3    0    0    0    3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    4    0    0    0    0    4    0    4    4    4    0    4    0    0    0    4    0    0    4    0    0    0    0    4    0    0    4    4    0    0    0    0    0    0    0    4    0    0    4    0    0    4    0    4    0    0    4    0    0    0    4    4    0    4    0    0    0    0    0    0    0    0    0    0    0    4    0    0    0    0    4    0    4    0    0    0    4    0    0    4    0    4    4    0    0    4    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    5    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    5    5    0    5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0    0    0    5    0    0    0    0    0    0    0    0", #AB+TF_WebNLG
    )

    missing = '*' # indicator for missing values
    array = [d.split() for d in data]  # convert to 2D list of string items
    #print(array)
    
    print("nominal metric: %.3f" % krippendorff_alpha(array, nominal_metric, missing_items=missing))
    print("interval metric: %.3f" % krippendorff_alpha(array, interval_metric, missing_items=missing))

nominal metric: 0.166
interval metric: 0.187


In [56]:
# code copied from small_sample_cv.ipynb at https://github.com/asbelz/coeff-var

# Calculation of CV* for E2E.

# -*- coding: utf-8 -*-
"""This code computes the coefficient of variation (CV) and some other stats for small samples (indicated by the * added to CV) 
for a given set of measurements which are assumed to be for the same or similar object, using the same measurand. 
Stats are adjusted for small sample size. Paper ref: Belz, Popovic & Mille (2022) Quantified Reproducibility Assessment of NLP Results,
ACL'22.

In this self-contained version, the set of measurements on which CV is computed is assigned to the variable set_of_set_of_measurements
(see examples in code below).

The reproducibility stats reported in the output are: 
* the unbiased coefficient of variation
* the sample mean
* the unbiased sample standard deviation with 95% confidence intervals, estimated on the basis of the standard error of the unbiassed sample variance
* the sample size
* the percentage of measured valued within two standard deviations
* the percentage of measured valued within one standard deviation

Example narrative output:

The unbiased coefficient of variation is 1.5616560359100269 \
for a mean of 85.58285714285714 , \
unbiased sample standard deviation of 1.2904233075765223 with 95\% CI (0.4514829817654973, 2.1293636333875474) ,\
and a sample size of 7 . \
100.0 % of measured values fall within two standard deviations. \
71.429 % of measured values fall within one standard deviation. 

NOTE:
* CV assumes all measurements are positive; if they're not, shift measurement scale to start at 0
* for fair comparison across studies, measurements on a scale that doesn't start at 0 need to be shifted to a scale that does start at 0 

KNOWN ISSUES:

none
"""

import math
import numpy as np
from scipy.stats import t

# measurements from Belz et al. (2022)
# --- Table 4: BLEU scores for 7 NTS_def repros ---
# set_of_set_of_measurements = [[84.51, 84.50, 87.46, 85.60, 84.20, 86.61, 86.20]] # 7, NTS_def, BLEU
# --- Table 4: SARI scores for 5 NTS_def repros ---
#set_of_set_of_measurements = [[30.65, 30.65, 29.13, 30.65, 29.96]] # 5, NTS_def, SARI 
# --- Table 4: BLEU scores for 6 NTS-w2v_def repros ---
#set_of_set_of_measurements = [[87.50, 80.75, 89.36, 88.10, 89.64, 88.80]] # 6, NTS-w2v_def, BLEU
# --- Table 4: SARI scores for 4 NTS_w2v_def repros ---
#set_of_set_of_measurements = [[31.11, 30.28, 31.11, 29.12]] # 4, NTS-w2v_def, SARI
# --- Table 5: blue highlights (i.e. subsets of BLEU scores from above where outputs were reused, not regenerated) ---
#set_of_set_of_measurements = [[84.51, 84.50, 85.60, 84.20]] # 7, NTS_def, BLEU
#set_of_set_of_measurements = [[87.50, 89.36, 88.10]] # 6, NTS-w2v_def, BLEU
# --- Section 4.1, fourth paragraph: subset of scores where outputs were regenerated and Nisioi's BLEU script was used
#set_of_set_of_measurements = [[84.51, 87.46, 86.61], # 7, NTS_def, BLEU
#                              [30.65, 29.13, 29.96], # 5, NTS_def, SARI 
#                              [87.50, 80.75, 89.64], # 6, NTS-w2v_def, BLEU
#                              [31.11, 30.28, 29.12]] # 4, NTS-w2v_def, SARI
# --- Table 6: wF1 scores for 11 system variants ---
#set_of_set_of_measurements = [[0.428, 0.493, 0.426, 0.574, 0.579, 0.590, 0.574, 0.600],
#                              [0.721, 0.603, 0.605, 0.606, 0.720, 0.732, 0.606, 0.740],
#                              [0.719, 0.604, 0.607, 0.607, 0.723, 0.733, 0.607, 0.736],
#                              [0.726, 0.681, 0.680, 0.680, 0.722, 0.728, 0.680, 0.732],
#                              [0.724, 0.680, 0.680, 0.681, 0.725, 0.729, 0.681, 0.731],
#                              [0.703, 0.660, 0.650, 0.651, 0.699, 0.711, 0.651, 0.710],
#                              [0.693, 0.661, 0.652, 0.653, 0.699, 0.712, 0.653, 0.716],
#                              [0.449, 0.600, 0.433, 0.597, 0.635, 0.646, 0.597, 0.698],
#                              [0.471, 0.647, 0.447, 0.647, 0.696, 0.711, 0.647, 0.726],
#                              [0.693, 0.658, 0.683, 0.668, 0.692, 0.689, 0.659, 0.391],
#                              [0.689, 0.662, 0.681, 0.659, 0.681, 0.684, 0.657, 0.401]]

# E2E - Table 1 - orig, OD + ZK

'''set_of_set_of_measurements = [[34,	36],
                             [45,	48],
                             [18,	16],
                             [5,	6],
                             [30,	33],
                             [10,	13],
                             [8,	5],
                             [16,	11],
                             [17,	20]]'''

# E2E - Table 1 - orig, AB + TCF

'''set_of_set_of_measurements = [[34,	41],
                             [45,	44],
                             [18,	15],
                             [5,	6],
                             [30,	28],
                             [10,	8],
                             [8,	22],
                             [16,	24],
                             [17,	8]]'''

# E2E - Table 2 - all

set_of_set_of_measurements = [[34,	41,	31,	37,	50],
                             [45,	45,	53,	41,	47],
                             [18,	14,	15,	22,	3],
                             [5,	10,	5,	2,	8],
                             [30,	31,	39,	42,	9],
                             [10,	11,	10,	8,	1],
                             [8,	8,	3,	38,	0],
                             [16,	10,	14,	42,	6],
                             [17,	15,	24,	19,	4]]




for set_of_measurements in set_of_set_of_measurements:
  if len(set_of_measurements) < 2:
    print(set_of_measurements, ": set of measurements is smaller than 2")
    break

  sample_mean = np.mean(set_of_measurements)
  if sample_mean <= 0:
    print(set_of_measurements, ": mean is 0 or negative")
    break

  sample_size = len(set_of_measurements)
  degrees_of_freedom = sample_size-1
  sum_of_squared_differences = np.sum(np.square(sample_mean-set_of_measurements))

  # unbiassed sample variance s^2
  unbiassed_sample_variance = sum_of_squared_differences/degrees_of_freedom
  # corrected sample standard deviation s
  corrected_sample_standard_deviation = np.sqrt(unbiassed_sample_variance)
  # Gamma(N/2)
  gamma_N_over_2 = math.gamma(sample_size/2)
  # Gamma((N-1)/2)
  gamma_df_over_2 = math.gamma(degrees_of_freedom/2)
  # c_4(N)
  c_4_N = math.sqrt(2/degrees_of_freedom)*gamma_N_over_2/gamma_df_over_2
  # unbiassed sample std dev s/c_4
  unbiassed_sample_std_dev_s_c_4 = corrected_sample_standard_deviation/c_4_N
  # standard error of the unbiassed sample variance (assumes normally distributed population)
  standard_error_of_unbiassed_sample_variance = unbiassed_sample_variance*np.sqrt(2/degrees_of_freedom)
  # estimated std err of std dev based on std err of unbiassed sample variance
  est_SE_of_SD_based_on_SE_of_unbiassed_sample_variance = standard_error_of_unbiassed_sample_variance/(2*unbiassed_sample_std_dev_s_c_4)

  # COEFFICIENT OF VARIATION CV
  coefficient_of_variation = (unbiassed_sample_std_dev_s_c_4/sample_mean)*100
  # SMALL SAMPLE CORRECTED COEFFICIENT OF VARIATION CV*
  small_sample_coefficient_of_variation = (1+(1/(4*sample_size)))*coefficient_of_variation

  # compute percentage of measured values within 1 and 2 standard deviations from the mean
  # initialise counts
  count_within_1_sd = 0
  count_within_2_sd = 0
  # for each measured value
  for m in set_of_measurements:
    # if it's within two std devs, increment count_within_2_sd
    if np.abs(m-sample_mean) < 2*unbiassed_sample_std_dev_s_c_4:
      count_within_2_sd += 1
      #if it's also within one std devs, increment count_within_1_sd
      if np.abs(m-sample_mean) < unbiassed_sample_std_dev_s_c_4:
        count_within_1_sd += 1

  # report results as described in code description above
  print("The unbiased coefficient of variation is",small_sample_coefficient_of_variation)
  print("for a mean of",sample_mean,", ")
  print("unbiased sample standard deviation of",unbiassed_sample_std_dev_s_c_4,", with 95\% CI",t.interval(0.95, degrees_of_freedom, loc=unbiassed_sample_std_dev_s_c_4, scale=est_SE_of_SD_based_on_SE_of_unbiassed_sample_variance),",")
  print("and a sample size of",sample_size,".")
  print(count_within_2_sd/sample_size*100,"% of measured values fall within two standard deviations.")
  print(round(count_within_1_sd/sample_size*100, 3),"% of measured values fall within one standard deviation.", )



The unbiased coefficient of variation is 21.324590853646352
for a mean of 38.6 , 
unbiased sample standard deviation of 7.839325780483326 , with 95\% CI (1.040009955796486, 14.638641605170164) ,
and a sample size of 5 .
100.0 % of measured values fall within two standard deviations.
80.0 % of measured values fall within one standard deviation.
The unbiased coefficient of variation is 10.59440902392319
for a mean of 46.2 , 
unbiased sample standard deviation of 4.661539970526204 , with 95\% CI (0.6184266497458424, 8.704653291306565) ,
and a sample size of 5 .
100.0 % of measured values fall within two standard deviations.
60.0 % of measured values fall within one standard deviation.
The unbiased coefficient of variation is 55.01607426713517
for a mean of 14.4 , 
unbiased sample standard deviation of 7.545061613778536 , with 95\% CI (1.0009711823640774, 14.089152045192995) ,
and a sample size of 5 .
100.0 % of measured values fall within two standard deviations.
60.0 % of measured values

In [57]:
# code copied from small_sample_cv.ipynb at https://github.com/asbelz/coeff-var

# Calculation of CV* for WebNLG.


# WebNLG - Table 1 - orig, OD + ZK

'''set_of_set_of_measurements = [[51,	38],
                            [42,	40],
                            [7,	15],
                            [22,	16],
                            [7,	3],
                            [14,	27],
                            [8,	25],
                            [15,	19]]'''

# WebNLG - Table 1 - orig, AB + TCF

'''set_of_set_of_measurements = [[51,	59],
                            [42,	35],
                            [7,	6],
                            [22,	5],
                            [7,	10],
                            [14,	10],
                            [8,	28],
                            [15,	12]]'''

# WebNLG - Table 2 - all

set_of_set_of_measurements = [[51,	43,	34,	55,	48],
                            [42,	44,	30,	37,	48],
                            [7,	12,	13,	8,	4],
                            [22,	18,	16,	7,	2],
                            [7,	1,	3,	26,	0],
                            [14,	27,	15,	9,	6],
                            [8,	31,	17,	48,	0],
                            [15,	16,	25,	26,	1]]
                                                        


for set_of_measurements in set_of_set_of_measurements:
  if len(set_of_measurements) < 2:
    print(set_of_measurements, ": set of measurements is smaller than 2")
    break

  sample_mean = np.mean(set_of_measurements)
  if sample_mean <= 0:
    print(set_of_measurements, ": mean is 0 or negative")
    break

  sample_size = len(set_of_measurements)
  degrees_of_freedom = sample_size-1
  sum_of_squared_differences = np.sum(np.square(sample_mean-set_of_measurements))

  # unbiassed sample variance s^2
  unbiassed_sample_variance = sum_of_squared_differences/degrees_of_freedom
  # corrected sample standard deviation s
  corrected_sample_standard_deviation = np.sqrt(unbiassed_sample_variance)
  # Gamma(N/2)
  gamma_N_over_2 = math.gamma(sample_size/2)
  # Gamma((N-1)/2)
  gamma_df_over_2 = math.gamma(degrees_of_freedom/2)
  # c_4(N)
  c_4_N = math.sqrt(2/degrees_of_freedom)*gamma_N_over_2/gamma_df_over_2
  # unbiassed sample std dev s/c_4
  unbiassed_sample_std_dev_s_c_4 = corrected_sample_standard_deviation/c_4_N
  # standard error of the unbiassed sample variance (assumes normally distributed population)
  standard_error_of_unbiassed_sample_variance = unbiassed_sample_variance*np.sqrt(2/degrees_of_freedom)
  # estimated std err of std dev based on std err of unbiassed sample variance
  est_SE_of_SD_based_on_SE_of_unbiassed_sample_variance = standard_error_of_unbiassed_sample_variance/(2*unbiassed_sample_std_dev_s_c_4)

  # COEFFICIENT OF VARIATION CV
  coefficient_of_variation = (unbiassed_sample_std_dev_s_c_4/sample_mean)*100
  # SMALL SAMPLE CORRECTED COEFFICIENT OF VARIATION CV*
  small_sample_coefficient_of_variation = (1+(1/(4*sample_size)))*coefficient_of_variation

  # compute percentage of measured values within 1 and 2 standard deviations from the mean
  # initialise counts
  count_within_1_sd = 0
  count_within_2_sd = 0
  # for each measured value
  for m in set_of_measurements:
    # if it's within two std devs, increment count_within_2_sd
    if np.abs(m-sample_mean) < 2*unbiassed_sample_std_dev_s_c_4:
      count_within_2_sd += 1
      #if it's also within one std devs, increment count_within_1_sd
      if np.abs(m-sample_mean) < unbiassed_sample_std_dev_s_c_4:
        count_within_1_sd += 1

  # report results as described in code description above
  print("The unbiased coefficient of variation is",small_sample_coefficient_of_variation)
  print("for a mean of",sample_mean,", ")
  print("unbiased sample standard deviation of",unbiassed_sample_std_dev_s_c_4,", with 95\% CI",t.interval(0.95, degrees_of_freedom, loc=unbiassed_sample_std_dev_s_c_4, scale=est_SE_of_SD_based_on_SE_of_unbiassed_sample_variance),",")
  print("and a sample size of",sample_size,".")
  print(count_within_2_sd/sample_size*100,"% of measured values fall within two standard deviations.")
  print(round(count_within_1_sd/sample_size*100, 3),"% of measured values fall within one standard deviation.", )

The unbiased coefficient of variation is 19.597867016429742
for a mean of 46.2 , 
unbiased sample standard deviation of 8.623061487229085 , with 95\% CI (1.1439848332990952, 16.102138141159074) ,
and a sample size of 5 .
100.0 % of measured values fall within two standard deviations.
60.0 % of measured values fall within one standard deviation.
The unbiased coefficient of variation is 19.291480717687847
for a mean of 40.2 , 
unbiased sample standard deviation of 7.385881189057632 , with 95\% CI (0.9798533935244027, 13.791908984590862) ,
and a sample size of 5 .
100.0 % of measured values fall within two standard deviations.
60.0 % of measured values fall within one standard deviation.
The unbiased coefficient of variation is 46.983537053764096
for a mean of 8.8 , 
unbiased sample standard deviation of 3.937667867363086 , with 95\% CI (0.5223936215118035, 7.352942113214368) ,
and a sample size of 5 .
100.0 % of measured values fall within two standard deviations.
60.0 % of measured valu