## Dbias Package 
We used DBias (Raza et al., 2022)  to implement the first experiments with Transformers.

https://github.com/dreji18/Fairness-in-AI

#### Installing the Packages and dependencies

In [None]:
!pip install Dbias
!pip install https://huggingface.co/d4data/en_pipeline/resolve/main/en_pipeline-any-py3-none-any.whl

In [None]:
from Dbias.text_debiasing import *
from Dbias.bias_classification import *
from Dbias.bias_recognition import *
from Dbias.bias_masking import *
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv('data/wikibias_re.csv')
df['binary_class'] = df['type'].apply(lambda x: 1 if '1' in x else 0)
df = df[["id", "sentence", "bias_words", "binary_class"]]
df.head()

Unnamed: 0,id,sentence,bias_words,binary_class
0,0_0,""" We "" died of natural causes at age eight in ...",O O O O O O O O O O O O O O O O O O O O O O,0
1,0_1,`` We '' died of natural causes at age eight i...,O O O O O O O O O O O O O O O O O O O O O O O,0
2,1_0,Evolution is the source of the vast diversity ...,O 1 O O O O O O O O O O O O O O,1
3,1_1,Evolution may be the source of the vast divers...,O O O O O O O O O O O O O O O O O,0
4,2_0,Credit information such as a persons previous ...,O O O O O O O O O O O O O O O O O O O,0


In [None]:
df_sg2 = pd.read_csv('data/final_labels_SG2.csv', delimiter=';')
df_sg2['binary_class'] = df_sg2['label_bias'].apply(lambda x: 1 if 'Biased' in x else 0)
df_sg2= df_sg2[["text", "binary_class", "biased_words"]]
df_sg2.head()

Unnamed: 0,text,binary_class,biased_words
0,"""Orange Is the New Black"" star Yael Stone is r...",0,[]
1,"""We have one beautiful law,"" Trump recently sa...",1,"['bizarre', 'characteristically']"
2,"...immigrants as criminals and eugenics, all o...",1,"['criminals', 'fringe', 'extreme']"
3,...we sounded the alarm in the early months of...,1,[]
4,[Black Lives Matter] is essentially a non-fals...,1,['cult']


In [None]:
en_corpus_Check = pd.read_csv('data/dev_en.tsv', sep='\t')
en_corpus_Check['label'] = np.where(en_corpus_Check['label']=='SUBJ',1,0)
en_corpus_Check.head()

Unnamed: 0,sentence_id,sentence,label
0,8745d4da-91c9-4538-acee-b0e7b1c413fd,Who will redistribute the hoarded wealth that ...,1
1,43de04ad-d0ac-4852-9b4e-cf0bca066188,What we don’t need is the indiscriminate influ...,1
2,e00b66ee-720a-47e3-a0fb-0e2445b89af6,The Social Distance Between Us shows every sig...,0
3,0b95d635-f821-45dd-9f33-b05d63629195,"History shows that McCarthy and McConnell, lik...",0
4,5ba3117b-3ef9-4815-acb4-a263d3c816bc,So while it’s not hard to reach a banal point ...,1


#### Functions

In [None]:
def custom_classification(x):
    classi_out = classify(x)
    return classi_out[0]['label'], classi_out[0]['score']

def custom_recognizer(x):
    biased_words = recognizer(x)
    biased_words_list = []
    for id in range(0, len(biased_words)):
        biased_words_list.append(biased_words[id]['entity'])
    return ", ".join(biased_words_list)

def custom_debiasing(x):
    suggestions = run(x)
    if suggestions == None:
        return ""
    else:
      all_suggestions = []
      for sent in suggestions[0:3]:
        all_suggestions.append(sent['Sentence'])
      return "\n\n".join(all_suggestions)

#### Implementation

In [None]:
# running Dbias classfication to classify whether a news article is biased or not
df[['state', 'probability']] = df['sentence'].apply(lambda x: custom_classification(x)).to_list()
df

Unnamed: 0,id,sentence,bias_words,binary_class,state,probability
0,0_0,""" We "" died of natural causes at age eight in ...",O O O O O O O O O O O O O O O O O O O O O O,0,Non-biased,0.943307
1,0_1,`` We '' died of natural causes at age eight i...,O O O O O O O O O O O O O O O O O O O O O O O,0,Non-biased,0.725971
2,1_0,Evolution is the source of the vast diversity ...,O 1 O O O O O O O O O O O O O O,1,Biased,0.73975
3,1_1,Evolution may be the source of the vast divers...,O O O O O O O O O O O O O O O O O,0,Biased,0.562016
4,2_0,Credit information such as a persons previous ...,O O O O O O O O O O O O O O O O O O O,0,Non-biased,0.593125
...,...,...,...,...,...,...
8185,4092_1,"Grant resigned , effective July 31 , 1854 , wi...",O O O O O O O O O O O O,0,Non-biased,0.631923
8186,4093_0,It has also joined with many other Christian d...,O O O O O O O O O O O O O O O O O O O O O O O O O,0,Biased,0.986502
8187,4093_1,It has also joined with many Christian denomin...,O O O O O O O O O O O O O O O O O O O O O O O O,0,Biased,0.991014
8188,4094_0,Reflector allows you to decompile .NET assembl...,O O O O O O O O O O O O O O O O O,0,Biased,0.551458


In [None]:
df_sg2[['state', 'probability']] = df_sg2['text'].apply(lambda x: custom_classification(x)).to_list()
df_sg2

Unnamed: 0,text,binary_class,biased_words,state,probability
0,"""Orange Is the New Black"" star Yael Stone is r...",0,[],Non-biased,0.702146
1,"""We have one beautiful law,"" Trump recently sa...",1,"['bizarre', 'characteristically']",Biased,0.961543
2,"...immigrants as criminals and eugenics, all o...",1,"['criminals', 'fringe', 'extreme']",Non-biased,0.604165
3,...we sounded the alarm in the early months of...,1,[],Biased,0.985385
4,[Black Lives Matter] is essentially a non-fals...,1,['cult'],Biased,0.993454
...,...,...,...,...,...
3669,You’ve heard of Jim Crow and Southern Segregat...,1,['ALL'],Biased,0.979269
3670,Young female athletes’ dreams and accomplishme...,1,"['dashed', '""identify""']",Non-biased,0.667677
3671,"Young white men, reacting to social and educat...",1,"['evil', 'white']",Biased,0.994004
3672,Young women taking part in high school and col...,1,"['dashed', '""identify""']",Non-biased,0.670982


In [None]:
en_corpus_Check[['state', 'probability']] = en_corpus_Check['sentence'].apply(lambda x: custom_classification(x)).to_list()
en_corpus_Check

Unnamed: 0,sentence_id,sentence,label,state,probability
0,8745d4da-91c9-4538-acee-b0e7b1c413fd,Who will redistribute the hoarded wealth that ...,1,Biased,0.934526
1,43de04ad-d0ac-4852-9b4e-cf0bca066188,What we don’t need is the indiscriminate influ...,1,Non-biased,0.629372
2,e00b66ee-720a-47e3-a0fb-0e2445b89af6,The Social Distance Between Us shows every sig...,0,Biased,0.990144
3,0b95d635-f821-45dd-9f33-b05d63629195,"History shows that McCarthy and McConnell, lik...",0,Biased,0.813161
4,5ba3117b-3ef9-4815-acb4-a263d3c816bc,So while it’s not hard to reach a banal point ...,1,Biased,0.96553
...,...,...,...,...,...
238,62cff7b0-e459-4cbe-b534-6a814a50302e,"The poverty memoir is an old, popular, well-es...",0,Biased,0.61697
239,dfd1bc2d-5419-46fd-84a1-ac4d2ee2138d,It doesn’t tinker with the underlying social s...,1,Non-biased,0.641418
240,e398e3aa-2af1-45d4-9672-d709d50b6aed,Asylum claimants must be kept in Mexico while ...,0,Non-biased,0.600384
241,7db63c04-f58e-4b77-93d4-d1e22fb7a376,Another example was the Governor’s move last w...,0,Non-biased,0.707847


In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

def show_metrics(y, y_pred):
  accuracy = accuracy_score(y, y_pred)
  f1 = f1_score(y, y_pred, average="weighted")
  precision = precision_score(y, y_pred, pos_label=None,average='weighted',zero_division=0.0)
  recall = recall_score(y, y_pred, pos_label=None,average='weighted')
  print("accuracy = %.3f, precision = %.3f, recall = %.3f, f1 = %.3f" % (accuracy, precision, recall, f1))

In [None]:
#df['state'] = df['state'].apply(lambda x: 1 if 'Biased' in x else 0)

#Métricas teniendo en cuenta todo el corpus
print('Todo el corpus:')
y_pred = df['state'].values
y = df['binary_class'].values
show_metrics(y,  y_pred)

#Métricas quitando la oración #_1
print('Quitando las oraciones #_1:')
zeros = df[df['id'].str.match('\d+_0')]
y_pred_z = zeros['state'].values
y_z = zeros['binary_class'].values
show_metrics(y_z,  y_pred_z)


Todo el corpus:
accuracy = 0.506, precision = 0.543, recall = 0.506, f1 = 0.512
Quitando las oraciones #_1:
accuracy = 0.541, precision = 0.656, recall = 0.541, f1 = 0.578


In [None]:
df_sg2['state'] = df_sg2['state'].apply(lambda x: 1 if 'Biased' in x else 0)

#Métricas teniendo en cuenta todo el corpus
print('Todo el corpus:')
y_pred_sg2 = df_sg2['state'].values
y_sg2 = df_sg2['binary_class'].values
show_metrics(y_sg2,  y_pred_sg2)

Todo el corpus:
accuracy = 0.667, precision = 0.673, recall = 0.667, f1 = 0.665


In [None]:
en_corpus_Check['state'] = en_corpus_Check['state'].apply(lambda x: 1 if 'Biased' in x else 0)

#Métricas teniendo en cuenta todo el corpus
print('Todo el corpus:')
y_pred_Check = en_corpus_Check['state'].values
y_Check = en_corpus_Check['label'].values
show_metrics(y_Check,  y_pred_Check)

Todo el corpus:
accuracy = 0.576, precision = 0.578, recall = 0.576, f1 = 0.562
