In [1]:
import numpy as np
import pandas as pd
from agreement import krippendorffs_alpha

In [2]:
def read_annotations(path):
    # Load Excel file into a Pandas ExcelFile object
    xlsx_file = pd.ExcelFile(path)

    # Read all sheets except the first one into a dictionary of dataframes with column headers set to the first row
    dfs_dict = pd.read_excel(xlsx_file, sheet_name=xlsx_file.sheet_names[1:], header=0)

    # Rename the first column header to "annot_id" for all dataframes
    for df in dfs_dict.values():
        df.rename(columns={df.columns[0]: 'annot_id'}, inplace=True)
        df.rename(columns={df.columns[-1]: 'annotation'}, inplace=True)

    # Concatenate all dataframes in the dictionary into a single dataframe
    result_df = pd.concat(dfs_dict.values(), ignore_index=True)

    return result_df


In [3]:
def project_group_annotations(df):
    df_grouped = df.groupby('annot_id').agg({'project': 'first',
                                         'url': 'first',
                                         'label': 'first',
                                         'annotator': list,
                                         'annotation': [('annotation1', lambda x: x.iloc[0]),
                                                        ('annotation2', lambda x: x.iloc[1])]}).reset_index()

    df_grouped.columns = list(map(lambda x: x[0] if x[1] in ['list', 'first'] else x[1], df_grouped.columns.values))
    df_grouped.columns = ['annot_id'] + list(df_grouped.columns.values)[1:]
    return df_grouped

In [4]:
def others_group_annotations(df, level='file'):
    df_grouped = df.groupby('annot_id').agg({
                                        'project': 'first',
                                         'url': 'first',
                                         level: 'first',
                                         'label_1': 'first',
                                         'label_2': 'first',
                                         'label_3': 'first',
                                         'annotator': list,
                                         'annotation': [('annotation1', lambda x: x.iloc[0]),
                                                        ('annotation2', lambda x: x.iloc[1])]}).reset_index()

    df_grouped.columns = list(map(lambda x: x[0] if x[1] in ['list', 'first'] else x[1], df_grouped.columns.values))
    df_grouped.columns = ['annot_id'] + list(df_grouped.columns.values)[1:]
    return df_grouped

In [5]:
def create_annot_table(df):
    question_id = df['annot_id']
    annotator_id = df['annotator']
    annotation = df['annotation']

    annot_table = np.column_stack((question_id, annotator_id, annotation))
    return annot_table

In [32]:
def load_disagreement(path):
    df = pd.read_csv(path, header=0)
    df.rename(columns={df.columns[-1]: 'resolved'}, inplace=True)
    df = df[df.columns.intersection(['annot_id', 'resolved'])]
    return df


def join_disagreement(df, disagreement_df):
    df = df.merge(disagreement_df, how='left', left_on='annot_id', right_on='annot_id')
    df['final'] = df['resolved']
    df['final'].fillna(df['annotation1'], inplace=True)
    df['final'] = df['final'].astype('uint8')
    return df


In [33]:
df = read_annotations('/home/sasce/PycharmProjects/CodeGraphClassification/data/processed/manual_eval/project_human_eval_best-voting.xlsx')
# df = group_annotations(df)
annot_table = create_annot_table(df)

In [34]:
df['annotation'] = df['annotation'].replace('-', '0').fillna(0).astype('int64')
agg_df = project_group_annotations(df)


In [35]:
disagreement_df = load_disagreement('/home/sasce/PycharmProjects/CodeGraphClassification/data/processed/manual_eval/project_human_disagreements.xlsx')

In [36]:
resolved_df = join_disagreement(agg_df, disagreement_df)


In [37]:
resolved_df

Unnamed: 0,annot_id,project,url,label,annotator,annotation1,annotation2,resolved,final
0,0,wandora-team|wandora,https://github.com/wandora-team/wandora,Containerization,"[G, H]",0,0,,0
1,1,wandora-team|wandora,https://github.com/wandora-team/wandora,bioinformatics,"[G, H]",0,0,,0
2,2,wandora-team|wandora,https://github.com/wandora-team/wandora,graphical user interface,"[G, H]",1,1,,1
3,3,wandora-team|wandora,https://github.com/wandora-team/wandora,science,"[G, H]",1,1,,1
4,4,wandora-team|wandora,https://github.com/wandora-team/wandora,shell tool,"[G, H]",0,0,,0
...,...,...,...,...,...,...,...,...,...
811,811,apache|brooklyn-server,https://github.com/apache/brooklyn-server,big data,"[F, G]",0,0,,0
812,812,apache|brooklyn-server,https://github.com/apache/brooklyn-server,web application security,"[F, G]",0,0,,0
813,813,apache|brooklyn-server,https://github.com/apache/brooklyn-server,random forest,"[F, G]",0,0,,0
814,814,apache|brooklyn-server,https://github.com/apache/brooklyn-server,computer configuration,"[F, G]",0,1,1.0,1


In [243]:
from agreement.utils.transform import pivot_table_frequency
answer_table = pivot_table_frequency(np.array(df['annot_id']), np.array(df['annotation'].replace('-', '0').fillna(0)).astype('int64'))
user_table = pivot_table_frequency(np.array(df['annotator']), np.array(df['annotation'].replace('-', '0').fillna(0)).astype('int64'))
alpha = krippendorffs_alpha(answer_table)
alpha

0.5561987099806434

In [244]:
from agreement import cohens_kappa

kappa = cohens_kappa(answer_table, user_table)
kappa

0.5567438677681285

In [245]:
np.array(df['annotation'])

array([1, 0, 0, ..., 0, 0, 0], dtype=object)

In [246]:
np.array(df['annot_id']).dtype

dtype('int64')

In [247]:
from collections import Counter

Counter(df['annotation'].replace('-', '0').fillna(0).astype('int64')).most_common()

[(0, 954), (1, 678)]

In [248]:
disagreement =  agg_df[agg_df['annotation1'] != agg_df['annotation2']]

In [249]:
disagreement

Unnamed: 0,annot_id,project,url,label,annotator,annotation1,annotation2
9,9,Waikato|weka-3.8,https://github.com/Waikato/weka-3.8,database,"[G, H]",1,0
59,59,OpenOLAT|OpenOLAT,https://github.com/OpenOLAT/OpenOLAT,data binding,"[G, H]",1,0
70,70,Camelcade|Perl5-IDEA,https://github.com/Camelcade/Perl5-IDEA,debugger,"[G, H]",1,0
71,71,Camelcade|Perl5-IDEA,https://github.com/Camelcade/Perl5-IDEA,Parser combinator,"[G, H]",1,0
73,73,Camelcade|Perl5-IDEA,https://github.com/Camelcade/Perl5-IDEA,code generation,"[G, H]",1,0
...,...,...,...,...,...,...,...
779,779,epam|cloud-pipeline,https://github.com/epam/cloud-pipeline,web server,"[F, G]",0,1
791,791,aliyun|aliyun-odps-java-sdk,https://github.com/aliyun/aliyun-odps-java-sdk,file system,"[F, G]",0,1
798,798,aliyun|aliyun-odps-java-sdk,https://github.com/aliyun/aliyun-odps-java-sdk,File Transfer Protocol,"[F, G]",0,1
814,814,apache|brooklyn-server,https://github.com/apache/brooklyn-server,computer configuration,"[F, G]",0,1


In [250]:
disagreement.to_csv('/home/sasce/PycharmProjects/CodeGraphClassification/data/processed/manual_eval/project_human_disagreements.xlsx', index=False)

In [251]:
df = read_annotations('/home/sasce/PycharmProjects/CodeGraphClassification/data/processed/manual_eval/file_human_eval_best-voting.xlsx')

In [252]:
df

Unnamed: 0,annot_id,project,url,file,label_1,label_2,label_3,annotator,annotation
0,125,OpenOLAT|OpenOLAT,https://github.com/OpenOLAT/OpenOLAT,src/main/java/org/olat/instantMessaging/syncse...,education,user interface,microservices,A,0
1,126,OpenOLAT|OpenOLAT,https://github.com/OpenOLAT/OpenOLAT,src/main/java/org/olat/course/nodes/fo/FOPeekv...,data binding,education,website,A,2
2,127,OpenOLAT|OpenOLAT,https://github.com/OpenOLAT/OpenOLAT,src/main/java/org/olat/course/assessment/Asses...,education,microservices,website,A,1
3,128,OpenOLAT|OpenOLAT,https://github.com/OpenOLAT/OpenOLAT,src/main/java/org/olat/user/ProfileFormControl...,user interface,content management system,graphical user interface,A,1
4,129,OpenOLAT|OpenOLAT,https://github.com/OpenOLAT/OpenOLAT,src/main/java/org/olat/portal/calendar/Calenda...,education,website,web application,A,1
...,...,...,...,...,...,...,...,...,...
1995,245,aliyun|aliyun-odps-java-sdk,https://github.com/aliyun/aliyun-odps-java-sdk,odps-sdk/odps-sdk-core/src/main/java/com/aliyu...,Other,,,H,0
1996,246,aliyun|aliyun-odps-java-sdk,https://github.com/aliyun/aliyun-odps-java-sdk,odps-sdk/odps-sdk-core/src/main/java/com/aliyu...,big data,data binding,data structure,H,0
1997,247,aliyun|aliyun-odps-java-sdk,https://github.com/aliyun/aliyun-odps-java-sdk,odps-sdk/odps-sdk-udf/src/main/java/com/aliyun...,big data,random forest,artificial neural network,H,0
1998,248,aliyun|aliyun-odps-java-sdk,https://github.com/aliyun/aliyun-odps-java-sdk,odps-sdk/odps-sdk-core/src/main/java/com/aliyu...,big data,,,H,0


In [253]:
agg_df = others_group_annotations(df)

In [254]:
agg_df

Unnamed: 0,annot_id,project,url,file,label_1,label_2,label_3,annotator,annotation1,annotation2
0,0,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.ui/hydrograph.ui.validators/src/mai...,"extract, transform, load",,,"[G, H]",0,0
1,1,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.ui/hydrograph.ui.common/src/main/ja...,"extract, transform, load",Web Components,big data,"[G, H]",0,0
2,2,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.engine/hydrograph.engine.core/src/m...,Other,,,"[G, H]",0,0
3,3,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.engine/hydrograph.engine.core/src/m...,Web Components,language model,graphical user interface,"[G, H]",0,0
4,4,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.ui/hydrograph.ui.expression.editor/...,graphical user interface,user interface,,"[G, H]",1,0
...,...,...,...,...,...,...,...,...,...,...
995,995,zaproxy|zaproxy,https://github.com/zaproxy/zaproxy,zap/src/main/java/org/zaproxy/zap/extension/ps...,security,text editor,user interface,"[F, G]",1,1
996,996,zaproxy|zaproxy,https://github.com/zaproxy/zaproxy,zap/src/main/java/org/zaproxy/zap/view/message...,security,web browser engine,,"[F, G]",0,0
997,997,zaproxy|zaproxy,https://github.com/zaproxy/zaproxy,zap/src/main/java/org/zaproxy/zap/extension/st...,web server,security,,"[F, G]",1,1
998,998,zaproxy|zaproxy,https://github.com/zaproxy/zaproxy,zap/src/main/java/org/zaproxy/zap/view/Context...,graphical user interface,security,text editor,"[F, G]",1,0


In [255]:
Counter(df['annotation'].replace('-', '0').fillna(0).astype('int64')).most_common()

[(0, 1021), (1, 549), (2, 277), (3, 152), (9, 1)]

In [256]:
disagreement =  agg_df[agg_df['annotation1'] != agg_df['annotation2']]

In [257]:
disagreement

Unnamed: 0,annot_id,project,url,file,label_1,label_2,label_3,annotator,annotation1,annotation2
4,4,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.ui/hydrograph.ui.expression.editor/...,graphical user interface,user interface,,"[G, H]",1,0
7,7,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.ui/hydrograph.ui.propertywindow/src...,graphical user interface,"extract, transform, load",Web Components,"[G, H]",1,2
8,8,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.ui/hydrograph.ui.engine/src/main/ja...,big data,,,"[G, H]",1,0
9,9,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.engine/hydrograph.engine.core/src/m...,"extract, transform, load",big data,data binding,"[G, H]",1,3
10,10,Camelcade|Perl5-IDEA,https://github.com/Camelcade/Perl5-IDEA,tt2/src/com/perl5/lang/tt2/TemplateToolkitLang...,language model,integrated development environment,debugger,"[G, H]",0,2
...,...,...,...,...,...,...,...,...,...,...
878,878,silentbalanceyh|vertx-zero,https://github.com/silentbalanceyh/vertx-zero,vertx-istio/zero-eternal/aeon-inlet/src/main/j...,web server,,,"[F, G]",1,0
911,911,swimos|swim,https://github.com/swimos/swim,swim-java/swim-runtime/swim-polyglot/swim.dyna...,serverless computing,WebSocket,real-time computing,"[F, G]",0,1
914,914,swimos|swim,https://github.com/swimos/swim,swim-java/swim-runtime/swim-core/swim.warp/src...,serverless computing,WebSocket,real-time computing,"[F, G]",0,2
916,916,swimos|swim,https://github.com/swimos/swim,swim-java/swim-runtime/swim-core/swim.uri/src/...,serverless computing,WebSocket,microservices,"[F, G]",0,1


In [258]:
disagreement.to_csv('/home/sasce/PycharmProjects/CodeGraphClassification/data/processed/manual_eval/file_human_disagreements.xlsx', index=False)

In [259]:
binarized_df = df.copy(deep=True)
binarized_df.loc[df["annotation"].replace('-', '0').fillna(0).astype('int64') > 0, "annotation"] = 1

In [260]:
binarized_df

Unnamed: 0,annot_id,project,url,file,label_1,label_2,label_3,annotator,annotation
0,125,OpenOLAT|OpenOLAT,https://github.com/OpenOLAT/OpenOLAT,src/main/java/org/olat/instantMessaging/syncse...,education,user interface,microservices,A,0
1,126,OpenOLAT|OpenOLAT,https://github.com/OpenOLAT/OpenOLAT,src/main/java/org/olat/course/nodes/fo/FOPeekv...,data binding,education,website,A,1
2,127,OpenOLAT|OpenOLAT,https://github.com/OpenOLAT/OpenOLAT,src/main/java/org/olat/course/assessment/Asses...,education,microservices,website,A,1
3,128,OpenOLAT|OpenOLAT,https://github.com/OpenOLAT/OpenOLAT,src/main/java/org/olat/user/ProfileFormControl...,user interface,content management system,graphical user interface,A,1
4,129,OpenOLAT|OpenOLAT,https://github.com/OpenOLAT/OpenOLAT,src/main/java/org/olat/portal/calendar/Calenda...,education,website,web application,A,1
...,...,...,...,...,...,...,...,...,...
1995,245,aliyun|aliyun-odps-java-sdk,https://github.com/aliyun/aliyun-odps-java-sdk,odps-sdk/odps-sdk-core/src/main/java/com/aliyu...,Other,,,H,0
1996,246,aliyun|aliyun-odps-java-sdk,https://github.com/aliyun/aliyun-odps-java-sdk,odps-sdk/odps-sdk-core/src/main/java/com/aliyu...,big data,data binding,data structure,H,0
1997,247,aliyun|aliyun-odps-java-sdk,https://github.com/aliyun/aliyun-odps-java-sdk,odps-sdk/odps-sdk-udf/src/main/java/com/aliyun...,big data,random forest,artificial neural network,H,0
1998,248,aliyun|aliyun-odps-java-sdk,https://github.com/aliyun/aliyun-odps-java-sdk,odps-sdk/odps-sdk-core/src/main/java/com/aliyu...,big data,,,H,0


In [261]:
Counter(binarized_df['annotation'].replace('-', '0').fillna(0).astype('int64')).most_common()

[(0, 1021), (1, 979)]

In [262]:
agg_binarized_df = others_group_annotations(binarized_df)

In [263]:
disagreement_bin =  agg_binarized_df[agg_binarized_df['annotation1'] != agg_binarized_df['annotation2']]

In [264]:
disagreement_bin

Unnamed: 0,annot_id,project,url,file,label_1,label_2,label_3,annotator,annotation1,annotation2
4,4,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.ui/hydrograph.ui.expression.editor/...,graphical user interface,user interface,,"[G, H]",1,0
8,8,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.ui/hydrograph.ui.engine/src/main/ja...,big data,,,"[G, H]",1,0
10,10,Camelcade|Perl5-IDEA,https://github.com/Camelcade/Perl5-IDEA,tt2/src/com/perl5/lang/tt2/TemplateToolkitLang...,language model,integrated development environment,debugger,"[G, H]",0,1
11,11,Camelcade|Perl5-IDEA,https://github.com/Camelcade/Perl5-IDEA,mojo/core/src/com/perl5/lang/mojolicious/psi/i...,integrated development environment,text editor,,"[G, H]",0,1
13,13,Camelcade|Perl5-IDEA,https://github.com/Camelcade/Perl5-IDEA,plugin/core/src/com/perl5/lang/perl/extensions...,integrated development environment,dependency injection,language model,"[G, H]",1,0
...,...,...,...,...,...,...,...,...,...,...
878,878,silentbalanceyh|vertx-zero,https://github.com/silentbalanceyh/vertx-zero,vertx-istio/zero-eternal/aeon-inlet/src/main/j...,web server,,,"[F, G]",1,0
911,911,swimos|swim,https://github.com/swimos/swim,swim-java/swim-runtime/swim-polyglot/swim.dyna...,serverless computing,WebSocket,real-time computing,"[F, G]",0,1
914,914,swimos|swim,https://github.com/swimos/swim,swim-java/swim-runtime/swim-core/swim.warp/src...,serverless computing,WebSocket,real-time computing,"[F, G]",0,1
916,916,swimos|swim,https://github.com/swimos/swim,swim-java/swim-runtime/swim-core/swim.uri/src/...,serverless computing,WebSocket,microservices,"[F, G]",0,1


In [265]:
disagreement_bin.to_csv('/home/sasce/PycharmProjects/CodeGraphClassification/data/processed/manual_eval/file_human_disagreements_binary.xlsx', index=False)

In [266]:
answer_table = pivot_table_frequency(np.array(df['annot_id']), np.array(df['annotation'].replace('-', '0').fillna(0)).astype('int64'))
user_table = pivot_table_frequency(np.array(df['annotator']), np.array(df['annotation'].replace('-', '0').fillna(0)).astype('int64'))
alpha = krippendorffs_alpha(answer_table)
alpha

0.5057872163309504

In [267]:
answer_table = pivot_table_frequency(np.array(binarized_df['annot_id']), np.array(binarized_df['annotation'].replace('-', '0').fillna(0)).astype('int64'))
user_table = pivot_table_frequency(np.array(binarized_df['annotator']), np.array(binarized_df['annotation'].replace('-', '0').fillna(0)).astype('int64'))
alpha = krippendorffs_alpha(answer_table)
alpha

0.5460267978178377

In [268]:
kappa = cohens_kappa(answer_table, user_table)
kappa

0.5462095215466232

In [269]:
df = read_annotations(
    '/home/sasce/PycharmProjects/CodeGraphClassification/data/processed/manual_eval/package_human_eval_best-voting.xlsx')
df
agg_df = others_group_annotations(df, 'package')
agg_df
Counter(df['annotation'].replace('-', '0').fillna(0).astype('int64')).most_common()

[(0, 926), (1, 633), (2, 270), (3, 171)]

In [270]:
disagreement = agg_df[agg_df['annotation1'] != agg_df['annotation2']]
disagreement

Unnamed: 0,annot_id,project,url,package,label_1,label_2,label_3,annotator,annotation1,annotation2
0,0,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.engine.jaxb.sort,"extract, transform, load",data structure,big data,"[G, H]",0,1
1,1,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.engine.jaxb.ifxml,"extract, transform, load",big data,data binding,"[G, H]",3,1
3,3,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.ui.graph.model.helper,language model,graphical user interface,data binding,"[G, H]",0,2
6,6,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.engine.jaxb.ofmixedscheme,"extract, transform, load",big data,data binding,"[G, H]",3,1
12,12,Camelcade|Perl5-IDEA,https://github.com/Camelcade/Perl5-IDEA,com.perl5.lang.perl.lexer,integrated development environment,regular expression,interpreter,"[G, H]",1,3
...,...,...,...,...,...,...,...,...,...,...
966,966,wso2|micro-integrator,https://github.com/wso2/micro-integrator,org.wso2.carbon.inbound.endpoint.protocol.http...,microservices,instant messaging,server,"[F, G]",1,3
969,969,wso2|micro-integrator,https://github.com/wso2/micro-integrator,org.wso2.esb.integration.common.clients.inboun...,web service,,,"[F, G]",1,3
970,970,xap|xap,https://github.com/xap/xap,org.openspaces.persistency.kafka.internal,distributed computing,microservices,big data,"[F, G]",3,0
974,974,xap|xap,https://github.com/xap/xap,org.openspaces.core.map,distributed computing,big data,microservices,"[F, G]",0,1


In [271]:
disagreement.to_csv(
    '/home/sasce/PycharmProjects/CodeGraphClassification/data/processed/manual_eval/package_human_disagreements.xlsx',
    index=False)
binarized_df = df.copy(deep=True)
binarized_df.loc[df["annotation"].replace('-', '0').fillna(0).astype('int64') > 0, "annotation"] = 1
binarized_df
Counter(binarized_df['annotation'].replace('-', '0').fillna(0).astype('int64')).most_common()

[(1, 1074), (0, 926)]

In [272]:

agg_binarized_df = others_group_annotations(binarized_df, 'package')
disagreement_bin = agg_binarized_df[agg_binarized_df['annotation1'] != agg_binarized_df['annotation2']]
disagreement_bin

Unnamed: 0,annot_id,project,url,package,label_1,label_2,label_3,annotator,annotation1,annotation2
0,0,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.engine.jaxb.sort,"extract, transform, load",data structure,big data,"[G, H]",0,1
3,3,BitwiseInc|Hydrograph,https://github.com/BitwiseInc/Hydrograph,hydrograph.ui.graph.model.helper,language model,graphical user interface,data binding,"[G, H]",0,1
22,22,Daimler|sechub,https://github.com/Daimler/sechub,com.mercedesbenz.sechub.integrationtest.scenario6,security,client,server,"[G, H]",0,1
24,24,Daimler|sechub,https://github.com/Daimler/sechub,com.mercedesbenz.sechub.domain.administration....,security,continuous integration,web application,"[G, H]",0,1
26,26,Daimler|sechub,https://github.com/Daimler/sechub,com.mercedesbenz.sechub.sharedkernel.usecases....,security,microservices,user interface,"[G, H]",1,0
...,...,...,...,...,...,...,...,...,...,...
961,961,wso2|micro-integrator,https://github.com/wso2/micro-integrator,org.wso2.micro.integrator.dataservices.core.sc...,microservices,database,instant messaging,"[F, G]",1,0
965,965,wso2|micro-integrator,https://github.com/wso2/micro-integrator,org.wso2.micro.integrator.security.user.core.jdbc,microservices,web application security,instant messaging,"[F, G]",1,0
970,970,xap|xap,https://github.com/xap/xap,org.openspaces.persistency.kafka.internal,distributed computing,microservices,big data,"[F, G]",1,0
974,974,xap|xap,https://github.com/xap/xap,org.openspaces.core.map,distributed computing,big data,microservices,"[F, G]",0,1


In [273]:

disagreement_bin.to_csv(
    '/home/sasce/PycharmProjects/CodeGraphClassification/data/processed/manual_eval/package_human_disagreements_binary.xlsx',
    index=False)
answer_table = pivot_table_frequency(np.array(df['annot_id']),
                                     np.array(df['annotation'].replace('-', '0').fillna(0)).astype('int64'))
user_table = pivot_table_frequency(np.array(df['annotator']),
                                   np.array(df['annotation'].replace('-', '0').fillna(0)).astype('int64'))
alpha = krippendorffs_alpha(answer_table)
alpha


0.46232783042276865

In [274]:
kappa = cohens_kappa(answer_table, user_table)
kappa

0.46370919010376205

In [275]:
answer_table = pivot_table_frequency(np.array(binarized_df['annot_id']),
                                     np.array(binarized_df['annotation'].replace('-', '0').fillna(0)).astype('int64'))
user_table = pivot_table_frequency(np.array(binarized_df['annotator']),
                                   np.array(binarized_df['annotation'].replace('-', '0').fillna(0)).astype('int64'))
alpha = krippendorffs_alpha(answer_table)
alpha
kappa = cohens_kappa(answer_table, user_table)
kappa

0.49929959165735843