# LLM Experiments for WikiBias as an Extrapolation Corpus for Bias Detection


In [1]:
# Importing packages

import torch
import pandas as pd
import numpy as np
import re
import os
from sklearn.model_selection import train_test_split
from openai import OpenAI

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device: ', device)
print("CUDA Memory: ", torch.cuda.max_memory_allocated(), torch.cuda.memory_allocated())

torch.cuda.empty_cache()

Using device:  cuda
CUDA Memory:  0 0


In [2]:
# Veryfing GPU activity
torch.cuda.device_count()

1

In [3]:
torch.cuda.is_available()

True

In [4]:
print(torch.version.cuda)

11.3


In [None]:
from huggingface_hub import notebook_login
notebook_login()

# The following cells import the different corpora. 

The code in each cell corresponds to the directory of the dataset within the computer. Datasets can be found online.

## Wikibias

In [None]:
def get_wikibias():
    '''
    We read the wikibias corpus, prepare the partitions and return them. 
    '''
    corpus = pd.read_csv(f'data/wikibias_re.csv')
    corpus_epis = corpus[corpus.type.str.contains('0\|0\|1')]
    corpus_neu = corpus[corpus.type.str.contains('0\|0\|0')].sample(len(corpus_epis))
    corpus = pd.concat([corpus_epis, corpus_neu], axis=0)
    #-------------
    #We change to binary classes.
    corpus['label'] = corpus['type'].apply(lambda x: 1 if '1' in x else 0)
    #We are left with the data that we need
    corpus = corpus[["sentence", "partition", "label"]]
    #Split
    train = corpus[corpus.partition == 0]
    test = corpus[corpus.partition == 1]
    val =  corpus[corpus.partition == 2]
    return train[["sentence", "label"]], test[["sentence", "label"]], val[["sentence", "label"]]

train, test, val = get_wikibias()

In [9]:
# Para los experimentos usando Gemma se tuvo un total de 80 valores en el train.
# Para los de GPT se usaron un total de 
print(len(test['sentence']), len(test['label']))

## SG2

In [None]:
#SG2
en_corpus_SG2 = pd.read_csv('data/final_labels_SG2.csv', delimiter=';')
en_corpus_SG2['label'] = en_corpus_SG2['label_bias'].apply(lambda x: 1 if 'Biased' in x else 0)
en_corpus_SG2 = en_corpus_SG2[['text','label']]
en_corpus_SG2 = en_corpus_SG2.rename(columns={'text': 'sentence'})
_, test_SG2 = train_test_split(en_corpus_SG2, train_size=.8, random_state=0)
test_SG2.head(15)

Unnamed: 0,sentence,label
253,American Outdoor Brands Corp AOBC.O said on Th...,0
541,Black Lives Matter Philadelphia organizer prop...,0
302,And if we look at a subset of guns which the l...,1
3575,While Pence and the president have tried to pa...,1
1698,Many people in politics and media are still no...,1
3048,Their work came on the heels of an IPCC report...,0
3653,"Yet bizarrely, the entirety of the Establishme...",1
841,"Despite a clear margin of victory for Biden, T...",0
3530,When greed and power are exercised by giant mu...,1
473,"Because he just can't help himself, on the Sun...",0


## Checkthat

In [None]:
#Checkthat
test_Check = pd.read_csv('data/dev_en.tsv', sep='\t')
test_Check['label'] = np.where(test_Check['label']=='SUBJ',1,0)
test_Check = test_Check[['sentence','label']]
test_Check.head(5)

Unnamed: 0,sentence,label
0,Who will redistribute the hoarded wealth that ...,1
1,What we don’t need is the indiscriminate influ...,1
2,The Social Distance Between Us shows every sig...,0
3,"History shows that McCarthy and McConnell, lik...",0
4,So while it’s not hard to reach a banal point ...,1


# Now the LLMs: Gemma and GPT.

## Google Gemma

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed

set_seed(42069)

model = "google/gemma-2b-it"
tokenizer = AutoTokenizer.from_pretrained(model)

model = AutoModelForCausalLM.from_pretrained(model)
#model.to('cuda')

In [13]:
def gemma(text):
    token = tokenizer(text, return_tensors = 'pt')
    respuesta = model.generate(
        input_ids = token['input_ids'],
        max_new_tokens = 60,
        do_sample = True,
        temperature = 0.00001
    )
    out_token = respuesta[0][token['input_ids'].shape[-1]:]
    deco = tokenizer.decode(
        out_token,
        skip_special_tokens = True
    )
    return deco

### Zero-Shot Learning

In [15]:
# Guía de prompts: https://www.promptingguide.ai/models/gemma
def lista_gemma(dataset):
    listaGemma = []
    for i in dataset:
        listaGemma.append(gemma(f"""<start_of_turn>user You are a bias detector. 
        Analyze a sentence and answer with 0 if there is no bias, 1 if there is. 
        The sentence is {i}<end_of_turn><start_of_turn>model"""))
    return listaGemma

In [16]:
%%time
wiki_zeroshot = lista_gemma(test['sentence'])

CPU times: total: 3h 14min 41s
Wall time: 10min 38s


### One-Shot Learning

In [15]:
# Guía de prompts: https://www.promptingguide.ai/models/gemma
def lista_gemma(dataset):
    listaGemma = []
    for i in dataset:
        listaGemma.append(gemma(f"""<start_of_turn>user 
        I want you to analyze this sentence: {i}. Based on the following 
        biased sentence I want you to determine if the previous sentence is biased.
        Example sentence:
        In 1995 he narrated the excellent TV miniseries Watergate.
        Think and reason before responding. 
        And answer with 0 or 1. 0 if there is no bias. 1 if there is.
        <end_of_turn><start_of_turn>model"""))
    return listaGemma

In [19]:
%%time
wiki_oneshot = lista_gemma(test['sentence'])
wiki_oneshot

CPU times: total: 3h 50min 20s
Wall time: 12min 38s


[" 1. The sentence is biased because it presents a one-sided view of Edwards's appointment and service in the Tongan Cabinet. It does not provide a balanced perspective on his role and contributions.",
 ' 1. The sentence is biased as it has a biased tone and opinion. The sentence implies that a certain type of republic or oligarchy is prevalent in the eastern part of the Indian subcontinent, which is a subjective statement.',
 ' 1. The sentence is biased because it is based on a biased source (the sentence itself).',
 ' 0\n\nThe sentence is biased as it suggests that the panpsychist sees him or herself as a mind in a world of minds, implying that the person is a conscious entity.',
 ' 0\n\nThe sentence is biased as it has a biased tone and opinion. It is expressing a preference for certain countries and regions over others.',
 ' 0\n\nThe sentence does not exhibit bias, as it is a neutral statement about the lack of an alert being sent.',
 ' 1. The sentence is biased. \n\nThe sentence i

In [16]:
%%time
SG2_oneshot = lista_gemma(test_SG2['sentence'])
SG2_oneshot

[' 1. The sentence is biased because it has a subjective tone and expresses a biased opinion about American Outdoor Brands Corp.',
 ' 1. The sentence is biased because it expresses a strong opinion against the police and advocates for their abolition.',
 '\n0. There is no bias in the sentence. \n\nThe sentence is stating a factual claim about the relationship between guns and gun deaths.',
 ' 1. The sentence is biased because it has a negative tone and expresses a preference for a return to normalcy.',
 " 1. The sentence is biased because it has a negative tone and focuses on the negative aspects of Pelosi's position.",
 ' 1. The sentence is biased because it uses a negative tone and a strong verb ("warned") to emphasize the dire consequences of inaction on climate change.',
 ' 1. The sentence is biased as it expresses a negative opinion about BLM.',
 " 1. The sentence is biased because it has a negative tone and expresses a negative opinion about Trump's behavior.",
 ' 1. The sentence

In [18]:
%%time
checkaux = lista_gemma(test_Check['sentence'])
checkaux

[' 1. The sentence is biased because it expresses a biased opinion about the distribution of wealth.',
 ' 1. The sentence is biased because it uses a negative tone and a broad generalization about an entire group of people.',
 ' 1. The sentence is biased because it is comparing The Social Distance Between Us to Poverty Safari, which are both TV miniseries. This comparison suggests that The Social Distance Between Us is more successful than Poverty Safari, which is a negative statement.',
 ' 1. The sentence is biased because it has a negative connotation towards McCarthy and McConnell.',
 ' 1. The sentence is biased. \n\nThe sentence assumes that exploitation is virtuous, which is a value judgment.',
 ' 1. The sentence is biased because it has a negative tone and focuses on the negative actions of McCarthy and President Trump.',
 ' 1. The sentence is biased because it expresses a negative opinion about the Republican Party.',
 '\n0. There is no bias in the sentence. It is a statement of

### Few-Shot Learning

In [20]:
def lista_gemma(dataset):
    listaGemma = []
    for i in dataset:
        listaGemma.append(gemma(f"""<start_of_turn>user 
        I want you to analyze this sentence: {i}. Based on the following 
        biased sentences I want you to determine if the previous sentence is biased.
        These are the examples:
        1.- In 1995 he narrated the excellent TV miniseries Watergate.
        2.- Evolution is the source of the vast diversity of extant and extinct life on Earth.
        3.- It asks such questions as : How should men interact in society?
        Think and write your step-by-step reasoning before responding. 
        And answer only with 0, 1. 0 if there is no bias. 1 if there is.
        <end_of_turn><start_of_turn>model"""))
    return listaGemma

In [21]:
%%time
wiki_fewshot = lista_gemma(test['sentence'])
wiki_fewshot

CPU times: total: 5h 3s
Wall time: 16min 27s


["\nThe sentence is biased because it presents a biased perspective on Edwards's appointment to the Tongan Cabinet. The sentence implies that Edwards was appointed by the Tongan monarch with the intention of serving as a minister, which is a subjective opinion.",
 '\n0\n\nThe sentence is biased as it has a value judgment. It is expressing a preference for a type of republic or oligarchy in the eastern part of the Indian subcontinent.',
 '\n1. In 1995 he narrated the excellent TV miniseries Watergate.\n0\n\nThe sentence is biased because it has a biased tone and opinion. The word "excellent" suggests a positive opinion about the TV miniseries, which is clearly biased towards a positive view.',
 "\n0. The sentence is not biased. It is a neutral statement about the existence of the panpsychist's perspective.",
 "\nSure, here's the analysis of the sentence:\n\n**Sentence:** Other countries , namely Brazil and some Muslim countries amongst others , welcomed the result.\n\n**Bias:** 1\n\n**E

In [17]:
def ds_cawn(dataset, path):
    df = pd.DataFrame(dataset)
    df = df.rename(columns={0:"Respuestas"})
    df.to_csv(path)

In [None]:
ds_cawn(SG2_oneshot, r'gemmasg2_oneshot.csv') 
ds_cawn(checkaux, r'gemmacheckthat_oneshot.csv') 

In [None]:
ds_cawn(wiki_zeroshot, r'gemma_zeroshot.csv') 
ds_cawn(wiki_oneshot, r'gemma_oneshot.csv')
ds_cawn(wiki_fewshot, r'gemma_fewshot.csv')

In [None]:
labels_buenos = [int(i) for i in test['label']]
labels_df = pd.DataFrame(labels_buenos)
labels_df = labels_df.rename(columns={0:"label"})
labels_df.to_csv(r'labels.csv')

### Evaluation

In [None]:
#wiki_zeroshot = pd.read_csv(r'gemma_zeroshot.csv')
#wiki_oneshot = pd.read_csv(r'gemma_oneshot.csv')
#wiki_fewshot = pd.read_csv(r'gemma_fewshot.csv')
checkthat = pd.read_csv(r'gemmacheckthat_oneshot.csv')
sg2 = pd.read_csv(r'gemmasg2_oneshot.csv')
labels = pd.read_csv(r'labels.csv')

In [3]:
def valores(dataset):
    n_shot = []
    for i in dataset:
        if re.search(r'([0-9])', i) != None:
            n_shot.append(int(re.search(r'([0-9])', i)[0]))
        else:
            n_shot.append(1) #Revisando los casos todos son positivos.
    return n_shot

In [9]:
#wiki_labels = [int(i) for i in test['label']]
check_labels = [int(i) for i in test_Check['label']]
sg2_labels = [int(i) for i in test_SG2['label']]
#wiki_zero = valores(wiki_zeroshot["Respuestas"])
#wiki_one = valores(wiki_oneshot["Respuestas"])
checkthat_val = valores(checkthat["Respuestas"])
sg2_val = valores(sg2["Respuestas"])
#wiki_few = valores(wiki_fewshot["Respuestas"])

In [10]:
print(len(checkthat_val), len(check_labels), len(sg2_val), len(sg2_labels))

243 243 735 735


In [28]:
from sklearn import metrics

print(metrics.classification_report(wiki_labels, wiki_zero))
print(metrics.classification_report(wiki_labels, wiki_one))
print(metrics.classification_report(wiki_labels, wiki_few))

              precision    recall  f1-score   support

           0       0.44      0.65      0.52        34
           1       0.60      0.39      0.47        46

    accuracy                           0.50        80
   macro avg       0.52      0.52      0.50        80
weighted avg       0.53      0.50      0.49        80

              precision    recall  f1-score   support

           0       0.58      0.53      0.55        34
           1       0.67      0.72      0.69        46

    accuracy                           0.64        80
   macro avg       0.63      0.62      0.62        80
weighted avg       0.63      0.64      0.63        80

              precision    recall  f1-score   support

           0       0.36      0.26      0.31        34
           1       0.54      0.61      0.57        46
           2       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0

    accuracy                           0.46        80
   macro avg       0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
from sklearn import metrics

print(metrics.classification_report(check_labels, checkthat_val))
print(metrics.classification_report(sg2_labels, sg2_val))

              precision    recall  f1-score   support

           0       0.48      0.32      0.38       116
           1       0.52      0.69      0.59       127

    accuracy                           0.51       243
   macro avg       0.50      0.50      0.49       243
weighted avg       0.50      0.51      0.49       243

              precision    recall  f1-score   support

           0       0.49      0.27      0.35       360
           1       0.51      0.73      0.60       375
           8       0.00      0.00      0.00         0

    accuracy                           0.51       735
   macro avg       0.34      0.34      0.32       735
weighted avg       0.50      0.51      0.48       735



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## GPT4

In [12]:
from openai import OpenAI

# key = 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

bias1 = "Credit information such as a persons previous loan performance is a powerful tool to predict his future behavior"

client = OpenAI()
response = client.chat.completions.create(
  model="gpt-4o-mini",
  temperature = 0.2,
  messages=[
    {"role": "system", "content": "You detect bias in sentences."},
    {"role": "user", "content": f"Determine if the following sentence is biased or not. Answer only with 0 and 1. 0 if it isn't biased, 1 if it is. The sentence is {bias1}"},
    {"role": "assistant", "content": "The following is an example of a biased sentence:  In 1995 he narrated the excellent TV miniseries Watergate."}
  ]
)

mensaje = response.choices[0].message.content
print(mensaje)

1


### Zero-shot

In [24]:
def gpt4_answers(bias):
    client = OpenAI()
    response = client.chat.completions.create(
        model = "gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You detect bias in sentences."},
            {"role": "user", "content": f"Determine if the following sentence is biased or not. Answer only with 0 and 1. 0 if it isn't biased, 1 if it is. The sentence is {bias}"},
        ]
    )
    return response.choices[0].message.content

def gpt_iteracion(test_set):
    aux = []
    for i in test_set:
        aux.append(gpt4_answers(i))
    return aux

In [19]:
primero = gpt_iteracion(test['sentence'])
primero

[0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0]

### One-shot

In [10]:
import time
from time import sleep

def gpt4_oneshot(bias):
    client = OpenAI()
    response = client.chat.completions.create(
        #model = "gpt-4o-mini",
        model = "gpt-4",
        messages=[
            {"role": "system", "content": "You detect bias in sentences."},
            {"role": "user", "content": f"Determine if the following sentence is biased or not. Answer only with 0 and 1. 0 if it isn't biased, 1 if it is. The sentence is {bias}"},
            {"role": "assistant", "content": "The following is an example of a biased sentence:  In 1995 he narrated the excellent TV miniseries Watergate."} #Framing bias
          ]
    )
    return response.choices[0].message.content

def gpt_it_oneshot(test_set):
    aux = []
    for i in test_set:
        aux.append(gpt4_oneshot(i))
        #time.sleep(100)
    return aux

In [11]:
%%time
one_shot_answers = gpt_it_oneshot(test["sentence"])
one_shot_gpt = [int(i) for i in one_shot_answers]

CPU times: total: 21.2 s
Wall time: 24min 51s


In [None]:
len(one_shot_gpt)

### Few-Shot

In [18]:
def gpt4_fewshot(bias):
    client = OpenAI()
    response = client.chat.completions.create(
        #model = "gpt-4o-mini",
        model = "gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You detect bias in sentences."},
            {"role": "user", "content": f"Determine if the following sentence is biased or not. Answer only with 0 and 1. 0 if it isn't biased, 1 if it is. The sentence is {bias}"},
            {"role": "assistant", "content": "The following are examples of a biased sentences: 1) In 1995 he narrated the excellent TV miniseries Watergate. 2) Evolution is the source of the vast diversity of extant and extinct life on Earth. 3) It asks such questions as : How should men interact in society?"
            },
          ]
    )
    return response.choices[0].message.content

def gpt_it_fewshot(test_set):
    aux = []
    for i in test_set:
        aux.append(gpt4_oneshot(i))
        #time.sleep(100)
    return aux

In [None]:
few_shot_answers = gpt_it_fewshot(test['sentence'])
few_shot_gpt = [int(i) for i in few_shot_answers]

### Evaluation

In [22]:
#zero_shot_gpt = [int(i) for i in primero]
one_shot_gpt = [int(i) for i in one_shot_answers]
#few_shot_gpt = [int(i) for i in few_shot_answers]
wiki_labels = [int(i) for i in test['label']]

In [24]:
from sklearn import metrics
#print(metrics.classification_report(wiki_labels, zero_shot_gpt))
print(metrics.classification_report(wiki_labels, one_shot_gpt))
#print(metrics.classification_report(wiki_labels, few_shot_gpt))

              precision    recall  f1-score   support

           0       0.50      0.03      0.06        31
           1       0.60      0.98      0.74        46

    accuracy                           0.60        77
   macro avg       0.55      0.51      0.40        77
weighted avg       0.56      0.60      0.47        77

