## Carga datasets

In [1]:
import os

os.listdir('datasets')

['D2 - Anotación Grupo 6.tsv',
 'D1 - Anotación Grupo 6.tsv',
 'D2 - Anotación ChatGPT.tsv',
 'D1 - Anotación ChatGPT.tsv',
 'D2 - Anotación Original.tsv',
 'D3 - Anotación Original.tsv',
 'D1 - Anotación Original.tsv']

In [2]:
import pandas as pd
import numpy as np
import krippendorff

cols = pd.read_csv('datasets/D1 - Anotación Grupo 6.tsv', header=1, delimiter='\t').columns
drop_cols = ['nro', 'title','text','context_tweet','body']

d1_grupo_6 = pd.read_csv('datasets/D1 - Anotación Grupo 6.tsv', header=1, delimiter='\t').drop(columns=drop_cols)
d1_original = pd.read_csv('datasets/D1 - Anotación Original.tsv', header=0, delimiter='\t').drop(columns=drop_cols)
d1_chatgpt = pd.read_csv('datasets/D1 - Anotación ChatGPT.tsv', delimiter='\t', header=None)
d1_chatgpt.insert(2, 'Nombre anotador(a)', 'ChatGPT')
d1_chatgpt.columns = cols
d1_chatgpt.drop(columns=drop_cols, inplace=True)

d2_grupo_6 = pd.read_csv('datasets/D2 - Anotación Grupo 6.tsv', header=0, delimiter='\t').drop(columns=drop_cols)
d2_grupo_6 = d2_grupo_6.rename(columns={'Nombre anotador':'Nombre anotador(a)'})
d2_original = pd.read_csv('datasets/D2 - Anotación Original.tsv', header=0, delimiter='\t').drop(columns=drop_cols)
d2_chatgpt = pd.read_csv('datasets/D2 - Anotación ChatGPT.tsv', delimiter='\t')
d2_chatgpt.insert(2, 'Nombre anotador(a)', 'ChatGPT')
d2_chatgpt.columns = cols
d2_chatgpt.drop(columns=drop_cols, inplace=True)

d3_original = pd.read_csv('datasets/D3 - Anotación Original.tsv', header=0, delimiter='\t').drop(columns=drop_cols[1:])

# Filtramos tweets que fueron anotados por todos para calcular sobre ellos el acuerdo entre anotadores
d1_iaa = d1_grupo_6[d1_grupo_6.duplicated('id', keep=False)]
d2_iaa = d2_grupo_6[d2_grupo_6.duplicated('id', keep=False)]

d1_iaa.head()

Unnamed: 0,id,Nombre anotador(a),HATEFUL,WOMEN,LGBTI,RACISM,CLASS,POLITICS,DISABLED,APPEARANCE,CRIMINAL
120,397746,Tobias Muñoz,0,0,0,0,0,0.0,0,0,0
121,397747,Tobias Muñoz,0,0,0,0,0,0.0,0,0,0
122,397748,Tobias Muñoz,1,0,0,0,0,1.0,0,0,0
123,397750,Tobias Muñoz,1,0,0,0,0,1.0,0,0,0
124,397751,Tobias Muñoz,1,0,0,0,1,1.0,0,0,0


## Chequeo empates en nuestras anotaciones

In [25]:
def total_votes(dataset):
    discrimination_types = ['HATEFUL','WOMEN','LGBTI','RACISM','CLASS','POLITICS','DISABLED','APPEARANCE','CRIMINAL']
    annot_dict = {f'{i}': dataset.pivot(index='Nombre anotador(a)', columns='id', values=i) for i in discrimination_types}
    combined_df = pd.concat(annot_dict, axis=0)
    summed_df = combined_df.groupby(level=0).sum()
    
    return summed_df

votos_d1 = total_votes(d1_iaa)
votos_d2 = total_votes(d2_iaa)

empates_d1 = votos_d1 == 2
empates_d2 = votos_d2 == 2

In [30]:
votos_d1

id,397746,397747,397748,397750,397751,397752,405163,405191,405195,405201
APPEARANCE,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,3.0,1.0
CLASS,0.0,0.0,0.0,0.0,1.0,0.0,4.0,3.0,2.0,1.0
CRIMINAL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DISABLED,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HATEFUL,1.0,1.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0
LGBTI,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
POLITICS,1.0,1.0,4.0,4.0,4.0,4.0,1.0,2.0,0.0,2.0
RACISM,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,4.0,0.0
WOMEN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0


In [31]:
votos_d2

id,401880,401913,403814,404076,404924,406029,406057,407231,407997,408107
APPEARANCE,0,0,0,0,0,0,0,0,0,2
CLASS,0,0,0,0,0,0,0,1,0,0
CRIMINAL,0,0,0,0,0,0,0,0,0,0
DISABLED,0,0,0,0,0,0,0,0,0,0
HATEFUL,4,4,0,0,0,4,4,4,1,4
LGBTI,4,4,0,0,0,4,4,0,0,0
POLITICS,0,0,0,0,0,0,0,4,0,0
RACISM,4,4,0,0,0,0,0,0,0,0
WOMEN,0,0,0,0,0,0,0,0,1,2


In [29]:
print("Cantidad de empates en dataset 1:", empates_d1.sum().sum())
print("Cantidad de empates en dataset 2:", empates_d2.sum().sum())

Cantidad de empates en dataset 1: 5
Cantidad de empates en dataset 2: 2


## Calculo de alpha de Krippendorff

### IAA para 10 tweets anotados por nosotros en ambos datasets

In [3]:
def krippendorff_alpha(dataset, return_counts=True):
    discrimination_types = ['HATEFUL','WOMEN','LGBTI','RACISM','CLASS','POLITICS','DISABLED','APPEARANCE','CRIMINAL']
    annot_dict = {f'{i}': dataset.pivot(index='Nombre anotador(a)', columns='id', values=i) for i in discrimination_types}
    if return_counts:
        scores_df = pd.DataFrame(columns=['count', 'alpha'])
    else:
        scores_df = pd.DataFrame(columns=['alpha'])
    
    for i in discrimination_types:
        annotations = annot_dict[i]
        if return_counts:
            # Contamos instancias clasificadas por al menos dos de los anotadores como positivas
            scores_df.loc[i, 'count'] = (annotations.sum(axis=0) >= 2).sum()
        if len(np.unique(annotations)) == 1:
            scores_df.loc[i, 'alpha'] = 1.00
        else:
            scores_df.loc[i, 'alpha'] = round(krippendorff.alpha(reliability_data=annotations, level_of_measurement='nominal'), 2)
    
    return scores_df

Scores para dataset 1

In [4]:
krippendorff_alpha(d1_iaa)

Unnamed: 0,count,alpha
HATEFUL,8,0.49
WOMEN,1,0.32
LGBTI,0,1.0
RACISM,2,1.0
CLASS,3,0.47
POLITICS,6,0.43
DISABLED,0,1.0
APPEARANCE,2,0.27
CRIMINAL,0,1.0


Scores para dataset 2

In [5]:
krippendorff_alpha(d2_iaa)

Unnamed: 0,count,alpha
HATEFUL,6,0.9
WOMEN,1,0.18
LGBTI,4,1.0
RACISM,2,1.0
CLASS,0,0.0
POLITICS,1,1.0
DISABLED,0,1.0
APPEARANCE,1,0.32
CRIMINAL,0,1.0


### IAA entre anotaciones originales y las nuestras

In [55]:
def custom_mode(x):
    mode_values = x.mode()
    if len(mode_values) > 1:
        return mode_values.iat[1]
    else:
        return mode_values.iat[0]

def majority_vote(dataset):
    duplicated = dataset.duplicated('id', keep=False) # Tweets repetidos
    
    # Calculamos la moda sobre los repetidos y en caso de empate asignamos 1 a la columna
    votes = dataset[duplicated].groupby('id').agg(custom_mode).reset_index()
    votes['Nombre anotador(a)'] = 'Voto mayoritario'

    return pd.concat([dataset[~duplicated], votes])

In [56]:
d1_mayoritario = majority_vote(d1_grupo_6)
d2_mayoritario = majority_vote(d2_grupo_6)
d1_original['Nombre anotador(a)'] = 'Original'
d2_original['Nombre anotador(a)'] = 'Original'

d1_completo = pd.concat([d1_mayoritario, d1_original])
d2_completo = pd.concat([d2_mayoritario, d2_original])

Scores para dataset 1

In [57]:
krippendorff_alpha(d1_completo)

Unnamed: 0,count,alpha
HATEFUL,18,0.21
WOMEN,1,0.17
LGBTI,0,0.0
RACISM,5,0.43
CLASS,2,0.34
POLITICS,6,0.1
DISABLED,1,0.32
APPEARANCE,3,0.66
CRIMINAL,1,0.49


Scores para dataset 2

In [58]:
krippendorff_alpha(d2_completo)

Unnamed: 0,count,alpha
HATEFUL,60,0.74
WOMEN,3,0.4
LGBTI,8,0.83
RACISM,19,0.81
CLASS,5,0.83
POLITICS,8,0.45
DISABLED,2,0.43
APPEARANCE,13,0.82
CRIMINAL,8,0.63


### IAA entre anotaciones de ChatGPT y las nuestras

Scores para los 10 tweets del dataset 1

In [59]:
d1_grupo6_chatgpt = pd.concat([d1_mayoritario, d1_chatgpt])
d1_grupo6_chatgpt = d1_grupo6_chatgpt[d1_grupo6_chatgpt.duplicated('id', keep=False)].reset_index(drop=True)

krippendorff_alpha(d1_grupo6_chatgpt)

Unnamed: 0,count,alpha
HATEFUL,8,-0.06
WOMEN,0,-0.06
LGBTI,0,1.0
RACISM,2,1.0
CLASS,2,0.55
POLITICS,5,0.81
DISABLED,0,1.0
APPEARANCE,0,-0.12
CRIMINAL,0,1.0


Scores para los 10 tweets del dataset 2

In [60]:
d2_grupo6_chatgpt = pd.concat([d2_mayoritario, d2_chatgpt])
d2_grupo6_chatgpt = d2_grupo6_chatgpt[d2_grupo6_chatgpt.duplicated('id', keep=False)].reset_index(drop=True)

krippendorff_alpha(d2_grupo6_chatgpt)

Unnamed: 0,count,alpha
HATEFUL,5,1.0
WOMEN,1,1.0
LGBTI,3,1.0
RACISM,1,1.0
CLASS,0,1.0
POLITICS,1,1.0
DISABLED,0,1.0
APPEARANCE,0,0.0
CRIMINAL,0,1.0


### IAA entre anotaciones de ChatGPT y las originales

Scores para los 10 tweets del dataset 1 

In [61]:
d1_original_chatgpt = pd.concat([d1_original, d1_chatgpt])
d1_original_chatgpt = d1_original_chatgpt[d1_original_chatgpt.duplicated('id', keep=False)].reset_index(drop=True)

krippendorff_alpha(d1_original_chatgpt)

Unnamed: 0,count,alpha
HATEFUL,5,-0.27
WOMEN,0,0.0
LGBTI,0,1.0
RACISM,2,1.0
CLASS,2,0.55
POLITICS,2,0.21
DISABLED,0,1.0
APPEARANCE,0,-0.19
CRIMINAL,0,1.0


Scores para los 10 tweets del dataset 2

In [62]:
d2_original_chatgpt = pd.concat([d2_original, d2_chatgpt])
d2_original_chatgpt = d2_original_chatgpt[d2_original_chatgpt.duplicated('id', keep=False)].reset_index(drop=True)

krippendorff_alpha(d2_original_chatgpt)

Unnamed: 0,count,alpha
HATEFUL,4,0.79
WOMEN,1,1.0
LGBTI,2,0.74
RACISM,1,1.0
CLASS,0,1.0
POLITICS,1,1.0
DISABLED,0,1.0
APPEARANCE,0,1.0
CRIMINAL,0,1.0
