## Carga datasets

In [12]:
import os

os.listdir('datasets')

['D2 - Anotación Grupo 6.tsv',
 'D1 - Anotación Grupo 6.tsv',
 'D2 - Anotación Original.tsv',
 'D3 - Anotación Original.tsv',
 'D1 - Anotación Original.tsv']

In [30]:
import pandas as pd
import numpy as np
import krippendorff

drop_cols = ['nro', 'title','text','context_tweet','body']

d1_grupo_6 = pd.read_csv('datasets/D1 - Anotación Grupo 6.tsv', header=0, delimiter='\t').drop(columns=drop_cols)
d1_original = pd.read_csv('datasets/D1 - Anotación Original.tsv', header=0, delimiter='\t').drop(columns=drop_cols)
d2_grupo_6 = pd.read_csv('datasets/D2 - Anotación Grupo 6.tsv', header=0, delimiter='\t').drop(columns=drop_cols)
d2_original = pd.read_csv('datasets/D2 - Anotación Original.tsv', header=0, delimiter='\t').drop(columns=drop_cols)
d3_original = pd.read_csv('datasets/D3 - Anotación Original.tsv', header=0, delimiter='\t').drop(columns=drop_cols)

# Filtramos tweets que fueron anotados por todos para calcular sobre ellos el acuerdo entre anotadores
d1_iaa = d1_grupo_6[d1_grupo_6.duplicated('id', keep=False)]
d2_iaa = d2_grupo_6[d2_grupo_6.duplicated('id', keep=False)]

d1_iaa.head()

Unnamed: 0,id,Nombre anotador,HATEFUL,WOMEN,LGBTI,RACISM,CLASS,POLITICS,DISABLED,APPEARANCE,CRIMINAL
120,397746,Tobias Muñoz,0,0,0,0,0,0.0,0,0,0
121,397747,Tobias Muñoz,0,0,0,0,0,0.0,0,0,0
122,397748,Tobias Muñoz,1,0,0,0,0,1.0,0,0,0
123,397750,Tobias Muñoz,1,0,0,0,0,1.0,0,0,0
124,397751,Tobias Muñoz,1,0,0,0,1,1.0,0,0,0


## Calculo de alpha de Krippendorff

In [31]:
def krippendorff_alpha(dataset):
    discrimination_types = ['HATEFUL','WOMEN','LGBTI','RACISM','CLASS','POLITICS','DISABLED','APPEARANCE','CRIMINAL']
    annot_dict = {f'{i}': dataset.pivot(index='Nombre anotador', columns='id', values=i) for i in discrimination_types}
    scores_df = pd.DataFrame(columns=['alpha'])
    
    for i in discrimination_types:
      annotations = annot_dict[i]
      if len(np.unique(annotations)) == 1:
        scores_df.loc[i, 'alpha'] = 1.00
      else:
        scores_df.loc[i, 'alpha'] = round(krippendorff.alpha(reliability_data=annotations, level_of_measurement='nominal'), 2)
    
    return scores_df

In [32]:
krippendorff_alpha(d1_iaa)

Unnamed: 0,alpha
HATEFUL,0.49
WOMEN,0.32
LGBTI,1.0
RACISM,1.0
CLASS,0.47
POLITICS,0.43
DISABLED,1.0
APPEARANCE,0.27
CRIMINAL,1.0


In [33]:
krippendorff_alpha(d2_iaa)

Unnamed: 0,alpha
HATEFUL,0.9
WOMEN,0.18
LGBTI,1.0
RACISM,1.0
CLASS,0.0
POLITICS,1.0
DISABLED,1.0
APPEARANCE,0.32
CRIMINAL,1.0
