# Testing models

In [49]:
import sys
sys.path.append('src/')
import pandas as pd
from Models import deberta_base_nli, bart_nli, deberta_v3_nli
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from Helpers import *

pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_colwidth', 500)  # Display full text in columns

## Basic tests of models

In [2]:
deberta_base_nli('a child was very sad and crying over a dropped ice cream', 'What is my favourite city?')

({'contradiction': 96.9, 'neutral': 3.0, 'entailment': 0.0}, ['contradiction'])

In [3]:
bart_nli('a child was very sad and crying over a dropped ice cream', 'What is my favourite city?')

({'contradiction': 93.7, 'neutral': 5.8, 'entailment': 0.5}, ['contradiction'])

In [4]:
deberta_v3_nli('a child was very sad and crying over a dropped ice cream', 'What is my favourite city?')



({'contradiction': 97.7, 'neutral': 2.2, 'entailment': 0.1}, ['contradiction'])

# Testing models with random samples from toxic dataset

In [53]:
df = get_random_samples("data/toxicbias_train.csv", 50)

## Calculate labels

In [47]:
import pandas as pd

def add_nli_predictions(df, model):
    df = df.copy()  # Create a copy of the DataFrame

    df = df.apply(lambda row: predict_labels(row, model), axis=1)

    return df

def predict_labels(row, model):
    comment_text = row['comment_text']
    bias = row['bias']
    rationale = row['rationale']

    if bias == 'neutral' or pd.isna(rationale):
        rationale = "Bias or prejudice in the text."

    scores, labels = model(comment_text, rationale)
    row['predicted_label_rationale'] = labels
    row['predicted_scores_rationale'] = scores

    scores, labels = model(comment_text, "Bias or prejudice in the text.")
    row['predicted_label_base'] = labels
    row['predicted_scores_base'] = scores

    return row

In [104]:
dfs = {
    'deberta-base': add_nli_predictions(df, deberta_base_nli),
    'bart-large': add_nli_predictions(df, bart_nli),
    'deberta-v3': add_nli_predictions(df, deberta_v3_nli)
}



## Breakdown of labels 

In [186]:
def label_breakdown(df, label_column_name = 'predicted_label_rationale'):
    label_breakdown = {
        'bias': {'contradiction': 0, 'neutral': 0, 'entailment': 0},
        'neutral': {'contradiction': 0, 'neutral': 0, 'entailment': 0}
    }

    for _, row in df.iterrows():
        bias = row['bias']
        predicted_label = row[label_column_name][0]
        
        label_breakdown[bias][predicted_label] += 1

    return label_breakdown


In [191]:
## compare results of rationale hypothesis and base one
results = {}

for model, data in dfs.items():
  results[model] = {
      'rationale hypothesis':label_breakdown(data),
      'base hypothesis':label_breakdown(data, 'predicted_label_base'),
    }
  
for model, data in results.items():
  print(model)
  print(f'rationale hypothesis: {data["rationale hypothesis"]}')
  print(f'base hypothesis: {data["base hypothesis"]}')
  print()


deberta-base
rationale hypothesis: {'bias': {'contradiction': 3, 'neutral': 34, 'entailment': 3}, 'neutral': {'contradiction': 0, 'neutral': 1, 'entailment': 9}}
base hypothesis: {'bias': {'contradiction': 3, 'neutral': 4, 'entailment': 33}, 'neutral': {'contradiction': 0, 'neutral': 1, 'entailment': 9}}

bart-large
rationale hypothesis: {'bias': {'contradiction': 6, 'neutral': 29, 'entailment': 5}, 'neutral': {'contradiction': 0, 'neutral': 1, 'entailment': 9}}
base hypothesis: {'bias': {'contradiction': 1, 'neutral': 4, 'entailment': 35}, 'neutral': {'contradiction': 0, 'neutral': 1, 'entailment': 9}}

deberta-v3
rationale hypothesis: {'bias': {'contradiction': 1, 'neutral': 34, 'entailment': 5}, 'neutral': {'contradiction': 1, 'neutral': 7, 'entailment': 2}}
base hypothesis: {'bias': {'contradiction': 7, 'neutral': 32, 'entailment': 1}, 'neutral': {'contradiction': 1, 'neutral': 7, 'entailment': 2}}



## Accuracy of labels

In [176]:
def calculate_correct_bias(predictions):
    correct_predictions = predictions['entailment']
    incorrect_predictions = predictions['contradiction'] + predictions['neutral']
    return correct_predictions, incorrect_predictions

def calculate_correct_neutral(predictions):
    correct_predictions = predictions['contradiction'] + predictions['neutral']
    incorrect_predictions = predictions['entailment']
    return correct_predictions, incorrect_predictions

In [194]:
for model, data in results.items():
  rationale_hypothesis_results = data['rationale hypothesis']
  base_hypothesis_results = data['base hypothesis']

  # rationale hypothesis
  bias_correct_predictions, bias_incorrect_predictions = calculate_correct_bias(rationale_hypothesis_results['bias'])
  neutral_correct_predictions, neutral_incorrect_predictions = calculate_correct_neutral(rationale_hypothesis_results['neutral'])
 
  accuracy = ((bias_correct_predictions + neutral_correct_predictions) / (bias_correct_predictions + neutral_correct_predictions + bias_incorrect_predictions +neutral_incorrect_predictions)) * 100

  print(f'({model}) rationale hypothesis accuracy = {accuracy}%\n')

  # base hypothesis
  bias_correct_predictions, bias_incorrect_predictions = calculate_correct_bias(base_hypothesis_results['bias'])
  neutral_correct_predictions, neutral_incorrect_predictions = calculate_correct_neutral(base_hypothesis_results['neutral'])
 
  accuracy = ((bias_correct_predictions + neutral_correct_predictions) / (bias_correct_predictions + neutral_correct_predictions + bias_incorrect_predictions +neutral_incorrect_predictions)) * 100

  print(f'({model}) base hypothesis accuracy =  {accuracy}%\n')
    
    

(deberta-base) rationale hypothesis accuracy = 8.0%

(deberta-base) base hypothesis accuracy =  68.0%

(bart-large) rationale hypothesis accuracy = 12.0%

(bart-large) base hypothesis accuracy =  72.0%

(deberta-v3) rationale hypothesis accuracy = 26.0%

(deberta-v3) base hypothesis accuracy =  18.0%



In [195]:
df[df['bias'] == 'neutral']

Unnamed: 0,comment_text,identity_attack_x,bias,target,category,rationale
1244,Why is the DP so reluctant to call this guy what he is an allAmerican white boy TERRORIST? Does the DP think it is genetically impossible for a white guy top be a terrorist ot is there something more racially sinister at work here?,0.5,neutral,,none,
236,And you probably think it is ok to lightly beat women.,0.257576,neutral,,none,
3462,Yes if the Catholic Church were not so darned ... Catholic ... we could join the LDS and the Anglicans in changing dogma on the fly. Some minor notes ... the committee discussing birth control was not granted any authority to approve or disapprove anything. It was supposed to be giving the Holy Father some pros and cons to consider. And its report was not for public consumption but was leaked. When we talk about chance we mean if something is certain the probability is 1 and if absolutely ...,0.5,neutral,,none,
3277,You are lying. There is no clitorectomy at birth. There is no stoning for talking. Women dont have to wear the hijab at all times. There is no killing babies for being born lesbian How in the world would one determine at birth that a baby is lesbian? Are you non compos mentis? Does rationality baffle you?,0.267606,neutral,,none,
725,Where did you get your FALSE information about the top of Mauna Kea? Its garbage like that that causes MORE issues with worthwhile projects all over our island.. white racism what kid of clown are you?,0.271429,neutral,,none,
2166,F texas and all the other bible thumping bigots and sexists.,0.285714,neutral,,none,
2704,I wonder what would happen if someone started a whites only tournament...,0.285714,neutral,,none,
188,These Black activists should study their history and read up on the Underground Railroad. Maybe then theyd feel like they deserve to pay Canada a tribute not the other way around.,0.528571,neutral,,none,
1045,Not to white people of course because they did not experience any discrimination...,0.8,neutral,,none,
1132,Yes keep the blame game going on someone who is not even President yet Last time we checked the bag lady with the pointy hat and broom lost....too bad...so sad....NOT Join the Marxistfeminist gender bender crybabies and their latte sipping hipsters at the nearest cafe if you dont like democracy in action,0.5,neutral,,none,


In [223]:
premise = 'Your comment speaks for itself. In your mind Catholicism is all about sex.'
hypothesis = 'Prejudice or bias against Catholicism.'
bart_nli(premise, hypothesis)

({'contradiction': 0.7, 'neutral': 40.6, 'entailment': 58.7}, ['entailment'])

In [224]:
premise = 'Your comment speaks for itself. In your mind Catholicism is all about sex.'
hypothesis = 'Negative view of Catholicism.'
bart_nli(premise, hypothesis)

({'contradiction': 0.1, 'neutral': 24.0, 'entailment': 75.9}, ['entailment'])

In [234]:
premise = 'You are lying. There is no clitorectomy at birth. There is no stoning for talking. Women dont have to wear the hijab at all times. There is no killing babies for being born lesbian How in the world would one determine at birth that a baby is lesbian? Are you non compos mentis? Does rationality baffle you?'
hypothesis = 'Negative opinion of a gender.'
bart_nli(premise, hypothesis)

({'contradiction': 55.2, 'neutral': 8.5, 'entailment': 36.3},
 ['contradiction'])