# Testing models

In [2]:
import sys
sys.path.append('src/')
import pandas as pd
from Models import deberta_base_nli, bart_nli, deberta_v3_nli
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from Helpers import *

pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_colwidth', 500)  # Display full text in columns

## Basic tests of models

In [2]:
deberta_base_nli('a child was very sad and crying over a dropped ice cream', 'What is my favourite city?')

({'contradiction': 96.9, 'neutral': 3.0, 'entailment': 0.0}, ['contradiction'])

In [3]:
bart_nli('a child was very sad and crying over a dropped ice cream', 'What is my favourite city?')

({'contradiction': 93.7, 'neutral': 5.8, 'entailment': 0.5}, ['contradiction'])

In [4]:
deberta_v3_nli('a child was very sad and crying over a dropped ice cream', 'What is my favourite city?')



({'contradiction': 97.7, 'neutral': 2.2, 'entailment': 0.1}, ['contradiction'])

# Testing models with random samples from toxic dataset

In [53]:
df = get_random_samples("data/toxicbias_train.csv", 50)

## Calculate labels

In [135]:
import pandas as pd

def add_nli_predictions(df, model):
    df = df.copy()  # Create a copy of the DataFrame

    df = df.apply(lambda row: predict_labels(row, model), axis=1)

    return df

def predict_labels(row, model):
    comment_text = row['comment_text']
    bias = row['bias']
    rationale = row['rationale']

    if bias == 'neutral' or pd.isna(rationale):
        rationale = "Bias or prejudice in the text."

    scores, labels = model(comment_text, rationale)
    row['predicted_label_rationale'] = labels
    row['predicted_scores_rationale'] = scores

    scores, labels = model(comment_text, "Bias or prejudice in the text.")
    row['predicted_label_base'] = labels
    row['predicted_scores_base'] = scores

    return row

In [104]:
dfs = {
    'deberta-base': add_nli_predictions(df, deberta_base_nli),
    'bart-large': add_nli_predictions(df, bart_nli),
    'deberta-v3': add_nli_predictions(df, deberta_v3_nli)
}



## Breakdown of labels 

In [186]:
def label_breakdown(df, label_column_name = 'predicted_label_rationale'):
    label_breakdown = {
        'bias': {'contradiction': 0, 'neutral': 0, 'entailment': 0},
        'neutral': {'contradiction': 0, 'neutral': 0, 'entailment': 0}
    }

    for _, row in df.iterrows():
        bias = row['bias']
        predicted_label = row[label_column_name][0]
        
        label_breakdown[bias][predicted_label] += 1

    return label_breakdown


In [191]:
## compare results of rationale hypothesis and base one
results = {}

for model, data in dfs.items():
  results[model] = {
      'rationale hypothesis':label_breakdown(data),
      'base hypothesis':label_breakdown(data, 'predicted_label_base'),
    }
  
for model, data in results.items():
  print(model)
  print(f'rationale hypothesis: {data["rationale hypothesis"]}')
  print(f'base hypothesis: {data["base hypothesis"]}')
  print()


deberta-base
rationale hypothesis: {'bias': {'contradiction': 3, 'neutral': 34, 'entailment': 3}, 'neutral': {'contradiction': 0, 'neutral': 1, 'entailment': 9}}
base hypothesis: {'bias': {'contradiction': 3, 'neutral': 4, 'entailment': 33}, 'neutral': {'contradiction': 0, 'neutral': 1, 'entailment': 9}}

bart-large
rationale hypothesis: {'bias': {'contradiction': 6, 'neutral': 29, 'entailment': 5}, 'neutral': {'contradiction': 0, 'neutral': 1, 'entailment': 9}}
base hypothesis: {'bias': {'contradiction': 1, 'neutral': 4, 'entailment': 35}, 'neutral': {'contradiction': 0, 'neutral': 1, 'entailment': 9}}

deberta-v3
rationale hypothesis: {'bias': {'contradiction': 1, 'neutral': 34, 'entailment': 5}, 'neutral': {'contradiction': 1, 'neutral': 7, 'entailment': 2}}
base hypothesis: {'bias': {'contradiction': 7, 'neutral': 32, 'entailment': 1}, 'neutral': {'contradiction': 1, 'neutral': 7, 'entailment': 2}}



## Accuracy of labels

In [176]:
def calculate_correct_bias(predictions):
    correct_predictions = predictions['entailment']
    incorrect_predictions = predictions['contradiction'] + predictions['neutral']
    return correct_predictions, incorrect_predictions

def calculate_correct_neutral(predictions):
    correct_predictions = predictions['contradiction'] + predictions['neutral']
    incorrect_predictions = predictions['entailment']
    return correct_predictions, incorrect_predictions

In [194]:
for model, data in results.items():
  rationale_hypothesis_results = data['rationale hypothesis']
  base_hypothesis_results = data['base hypothesis']

  # rationale hypothesis
  bias_correct_predictions, bias_incorrect_predictions = calculate_correct_bias(rationale_hypothesis_results['bias'])
  neutral_correct_predictions, neutral_incorrect_predictions = calculate_correct_neutral(rationale_hypothesis_results['neutral'])
 
  accuracy = ((bias_correct_predictions + neutral_correct_predictions) / (bias_correct_predictions + neutral_correct_predictions + bias_incorrect_predictions +neutral_incorrect_predictions)) * 100

  print(f'({model}) rationale hypothesis accuracy = {accuracy}%\n')

  # base hypothesis
  bias_correct_predictions, bias_incorrect_predictions = calculate_correct_bias(base_hypothesis_results['bias'])
  neutral_correct_predictions, neutral_incorrect_predictions = calculate_correct_neutral(base_hypothesis_results['neutral'])
 
  accuracy = ((bias_correct_predictions + neutral_correct_predictions) / (bias_correct_predictions + neutral_correct_predictions + bias_incorrect_predictions +neutral_incorrect_predictions)) * 100

  print(f'({model}) base hypothesis accuracy =  {accuracy}%\n')
    
    

(deberta-base) rationale hypothesis accuracy = 8.0%

(deberta-base) base hypothesis accuracy =  68.0%

(bart-large) rationale hypothesis accuracy = 12.0%

(bart-large) base hypothesis accuracy =  72.0%

(deberta-v3) rationale hypothesis accuracy = 26.0%

(deberta-v3) base hypothesis accuracy =  18.0%



## Gender test with categorical hypotheses

In [291]:
premise = 'Your comment speaks for itself. In your mind Catholicism is all about sex.'
hypothesis = 'Prejudice against a gender.'
bart_nli(premise, hypothesis)

({'contradiction': 11.4, 'neutral': 69.9, 'entailment': 18.6}, ['neutral'])

In [234]:
premise = 'You are lying. There is no clitorectomy at birth. There is no stoning for talking. Women dont have to wear the hijab at all times. There is no killing babies for being born lesbian How in the world would one determine at birth that a baby is lesbian? Are you non compos mentis? Does rationality baffle you?'
hypothesis = 'Negative opinion of a gender.'
bart_nli(premise, hypothesis)

({'contradiction': 55.2, 'neutral': 8.5, 'entailment': 36.3},
 ['contradiction'])

In [250]:
premise = 'I just think that men are more intelligent than other genders.'
hypothesis = 'Bias against a gender.'
bart_nli(premise, hypothesis)

({'contradiction': 10.8, 'neutral': 32.4, 'entailment': 56.8}, ['entailment'])

In [251]:
premise = 'I just think that men are more intelligent than other genders.'
hypothesis = 'Prejudice against a gender.'
bart_nli(premise, hypothesis)

({'contradiction': 16.9, 'neutral': 49.1, 'entailment': 34.0}, ['neutral'])

In [252]:
premise = 'I just think that men are more intelligent than other genders.'
hypothesis = 'Negative opinion of a gender.'
bart_nli(premise, hypothesis)

({'contradiction': 14.3, 'neutral': 10.1, 'entailment': 75.6}, ['entailment'])

In [263]:
import pandas as pd

def add_nli_predictions(df, model):
    df = df.copy()  # Create a copy of the DataFrame

    df = df.apply(lambda row: predict_labels(row, model), axis=1)

    return df

def predict_labels(row, model):
    comment_text = row['comment_text']

    hypothesis1 = 'Bias against a gender.'
    hypothesis2 = 'Prejudice against a gender.'
    hypothesis3 = 'Negative opinion of a gender.'

    scores, labels = model(comment_text, hypothesis1)
    row['hypothesis1_label'] = labels
    row['hypothesis1_score'] = scores

    scores, labels = model(comment_text, hypothesis2)
    row['hypothesis2_label'] = labels
    row['hypothesis2_score'] = scores

    scores, labels = model(comment_text, hypothesis3)
    row['hypothesis3_label'] = labels
    row['hypothesis3_score'] = scores

    return row

In [264]:

df_gender = pd.read_csv("data/toxicbias_train.csv")
df_gender = df_gender[df_gender['category'] == 'gender']

df_gender_test = add_nli_predictions(df_gender, bart_nli)

In [273]:
h1_count = df_gender_test['hypothesis1_label'].apply(lambda x: x[0]).value_counts()
h2_count = df_gender_test['hypothesis2_label'].apply(lambda x: x[0]).value_counts()
h3_count = df_gender_test['hypothesis2_label'].apply(lambda x: x[0]).value_counts()
print(h1_count)
print(h2_count)
print(h3_count)

entailment       88
neutral          19
contradiction     9
Name: hypothesis1_label, dtype: int64
entailment       72
neutral          33
contradiction    11
Name: hypothesis2_label, dtype: int64
entailment       72
neutral          33
contradiction    11
Name: hypothesis2_label, dtype: int64


In [284]:
df_gender_test[(df_gender_test['hypothesis1_label'].apply(lambda x: x[0]) == 'contradiction') & (df_gender_test['hypothesis2_label'].apply(lambda x: x[0]) == 'contradiction') & (df_gender_test['hypothesis3_label'].apply(lambda x: x[0]) == 'contradiction')]

Unnamed: 0,comment_text,identity_attack_x,bias,target,category,rationale,hypothesis1_label,hypothesis1_score,hypothesis2_label,hypothesis2_score,hypothesis3_label,hypothesis3_score
595,At last a woman who has a head on her shoulders.,0.5,bias,women,gender,prejudice against women,[contradiction],"{'contradiction': 98.3, 'neutral': 1.4, 'entailment': 0.3}",[contradiction],"{'contradiction': 88.9, 'neutral': 10.2, 'entailment': 0.9}",[contradiction],"{'contradiction': 93.6, 'neutral': 2.6, 'entailment': 3.8}"
826,this female is a female and despite the nip and tucks and hormone therapy will forever remain a female. CNS News published an article by John Hopkins Psychiatrist Paul R. McHugh.He had a great deal of experience with this phenomenon. I think anyone interested in this bizarre behavior needs to read it.,0.5,bias,women,gender,prejudice against women,[contradiction],"{'contradiction': 66.0, 'neutral': 27.9, 'entailment': 6.2}",[contradiction],"{'contradiction': 90.7, 'neutral': 8.3, 'entailment': 1.0}",[contradiction],"{'contradiction': 90.6, 'neutral': 2.3, 'entailment': 7.1}"
2543,Totally agree. Women are far superior to men.,0.6,bias,"women,men",gender,prejudice against men and towards women,[contradiction],"{'contradiction': 99.4, 'neutral': 0.5, 'entailment': 0.1}",[contradiction],"{'contradiction': 98.1, 'neutral': 1.7, 'entailment': 0.2}",[contradiction],"{'contradiction': 96.1, 'neutral': 0.9, 'entailment': 3.1}"
3621,Thats right. it takes a man to show women how to be a woman. Just look at Bruce Jenner.,0.5,bias,women,gender,prejudice against women,[contradiction],"{'contradiction': 45.7, 'neutral': 35.8, 'entailment': 18.5}",[contradiction],"{'contradiction': 71.4, 'neutral': 25.1, 'entailment': 3.5}",[contradiction],"{'contradiction': 80.6, 'neutral': 4.7, 'entailment': 14.7}"


In [287]:
premise = 'Totally agree. Men are far superior to women.'
hypothesis = 'Bias against a gender.'
bart_nli(premise, hypothesis)

({'contradiction': 4.2, 'neutral': 25.3, 'entailment': 70.5}, ['entailment'])

## Testing neutral cases

In [337]:
hypothesis1 = 'Bias against a race.'
hypothesis2 = 'Prejudice against a race.'
hypothesis3 = 'Negative opinion of a race.'
hypothesis4 = 'No bias or prejudice.'


premise = 'You strike me as an all or nothing black or white type of person.'

print(bart_nli(premise, hypothesis1))
print(bart_nli(premise, hypothesis2))
print(bart_nli(premise, hypothesis3))
print(bart_nli(premise, hypothesis4))

({'contradiction': 13.9, 'neutral': 29.8, 'entailment': 56.3}, ['entailment'])
({'contradiction': 11.4, 'neutral': 56.0, 'entailment': 32.5}, ['neutral'])
({'contradiction': 50.1, 'neutral': 4.7, 'entailment': 45.2}, ['contradiction'])
({'contradiction': 83.9, 'neutral': 7.7, 'entailment': 8.4}, ['contradiction'])


In [343]:
hypothesis7 = 'You should discriminate against different races and social groups.'

premise = 'You strike me as an all or nothing black or white type of person.'

print(bart_nli(premise, hypothesis7))


({'contradiction': 95.7, 'neutral': 4.2, 'entailment': 0.1}, ['contradiction'])


In [336]:
hypothesis1 = 'Bias against a race.'
hypothesis2 = 'Prejudice against a race.'
hypothesis3 = 'Negative opinion of a race.'
hypothesis4 = 'No bias or prejudice.'
premise = 'so other than governing while black what made Obama a racist and a thug?	'

print(bart_nli(premise, hypothesis1))
print(bart_nli(premise, hypothesis2))
print(bart_nli(premise, hypothesis3))
print(bart_nli(premise, hypothesis4))


({'contradiction': 0.7, 'neutral': 8.8, 'entailment': 90.5}, ['entailment'])
({'contradiction': 5.7, 'neutral': 46.6, 'entailment': 47.6}, ['entailment'])
({'contradiction': 0.2, 'neutral': 2.9, 'entailment': 96.9}, ['entailment'])
({'contradiction': 99.9, 'neutral': 0.1, 'entailment': 0.0}, ['contradiction'])


In [347]:
hypothesis7 = 'You should discriminate against different races and social groups.'

premise = 'so other than governing while black what made Obama a racist and a thug?	'

print(bart_nli(premise, hypothesis7))


({'contradiction': 96.6, 'neutral': 3.3, 'entailment': 0.1}, ['contradiction'])


In [340]:
premise = 'When the Fuk does this shit stop... we raised an indian in our family cause his mother was a drug addict and his tribe didnt want him.'
print(bart_nli(premise, hypothesis1))
print(bart_nli(premise, hypothesis2))
print(bart_nli(premise, hypothesis3))
neutral_hypothesis = 'Objectivitiy and impartiality, fair and open minded.'
print(bart_nli(premise, neutral_hypothesis))

({'contradiction': 2.3, 'neutral': 27.4, 'entailment': 70.3}, ['entailment'])
({'contradiction': 2.9, 'neutral': 27.0, 'entailment': 70.1}, ['entailment'])
({'contradiction': 0.9, 'neutral': 6.5, 'entailment': 92.6}, ['entailment'])
({'contradiction': 99.2, 'neutral': 0.6, 'entailment': 0.2}, ['contradiction'])


In [355]:
hypothesis7 = 'You should avoid discriminating against different races and social groups.'
hypothesis8 = 'The premise proposes that one gender is better than another.'

premise_n = 'When the Fuk does this shit stop... we raised an indian in our family cause his mother was a drug addict and his tribe didnt want him.'
premise_b = 'Totally agree. Men are far superior to women.'

print(bart_nli(premise_n, hypothesis7))
print(bart_nli(premise_n, hypothesis8))
print(bart_nli(premise_b, hypothesis7))
print(bart_nli(premise_b, hypothesis8))


({'contradiction': 4.0, 'neutral': 68.9, 'entailment': 27.2}, ['neutral'])
({'contradiction': 52.1, 'neutral': 20.1, 'entailment': 27.8}, ['contradiction'])
({'contradiction': 0.5, 'neutral': 98.9, 'entailment': 0.6}, ['neutral'])
({'contradiction': 0.1, 'neutral': 1.7, 'entailment': 98.2}, ['entailment'])


In [329]:
hypothesis1 = 'Bias against a race.'
hypothesis2 = 'Prejudice against a race.'
hypothesis3 = 'Negative opinion of a race.'
hypothesis4 = 'Bias against a sexual orientation.'
hypothesis5 = 'Prejudice against a sexual orientation.'
hypothesis6 = 'Negative opinion of a sexual orientation.'

premise = 'have nothing against Mexicans bisexuals or small people but that sculpture sucks.'

print(bart_nli(premise, hypothesis1))
print(bart_nli(premise, hypothesis2))
print(bart_nli(premise, hypothesis3))
print(bart_nli(premise, hypothesis4))
print(bart_nli(premise, hypothesis5))
print(bart_nli(premise, hypothesis6))


({'contradiction': 13.1, 'neutral': 16.9, 'entailment': 70.0}, ['entailment'])
({'contradiction': 18.7, 'neutral': 20.3, 'entailment': 61.0}, ['entailment'])
({'contradiction': 0.7, 'neutral': 4.6, 'entailment': 94.7}, ['entailment'])
({'contradiction': 72.5, 'neutral': 9.2, 'entailment': 18.3}, ['contradiction'])
({'contradiction': 70.2, 'neutral': 16.7, 'entailment': 13.1}, ['contradiction'])
({'contradiction': 1.0, 'neutral': 4.9, 'entailment': 94.1}, ['entailment'])


In [330]:
# Flipped sucks with rocks

hypothesis1 = 'Bias against a race.'
hypothesis2 = 'Prejudice against a race.'
hypothesis3 = 'Negative opinion of a race.'
hypothesis4 = 'Bias against a sexual orientation.'
hypothesis5 = 'Prejudice against a sexual orientation.'
hypothesis6 = 'Negative opinion of a sexual orientation.'

premise = 'have nothing against Mexicans bisexuals or small people but that sculpture rocks.'

print(bart_nli(premise, hypothesis1))
print(bart_nli(premise, hypothesis2))
print(bart_nli(premise, hypothesis3))
print(bart_nli(premise, hypothesis4))
print(bart_nli(premise, hypothesis5))
print(bart_nli(premise, hypothesis6))


({'contradiction': 65.2, 'neutral': 10.8, 'entailment': 24.0}, ['contradiction'])
({'contradiction': 74.2, 'neutral': 11.4, 'entailment': 14.4}, ['contradiction'])
({'contradiction': 13.9, 'neutral': 6.1, 'entailment': 80.0}, ['entailment'])
({'contradiction': 96.1, 'neutral': 1.6, 'entailment': 2.3}, ['contradiction'])
({'contradiction': 94.7, 'neutral': 3.2, 'entailment': 2.1}, ['contradiction'])
({'contradiction': 12.4, 'neutral': 6.2, 'entailment': 81.4}, ['entailment'])


In [308]:
hypothesis1 = 'Bias against a race.'
hypothesis2 = 'Prejudice against a race.'
hypothesis3 = 'Negative opinion of a race.'
hypothesis4 = 'Bias against a gender.'
hypothesis5 = 'Prejudice against a gender.'
hypothesis6 = 'Negative opinion of a gender.'

premise = 'Since females are a larger minority than blacks in the US passing the ERA is a good place to start. It would also benefit the 50 of blacks who are female. Both sexism AND racism matter.'

print(bart_nli(premise, hypothesis1))
print(bart_nli(premise, hypothesis2))
print(bart_nli(premise, hypothesis3))
print(bart_nli(premise, hypothesis4))
print(bart_nli(premise, hypothesis5))
print(bart_nli(premise, hypothesis6))

({'contradiction': 0.9, 'neutral': 5.9, 'entailment': 93.2}, ['entailment'])
({'contradiction': 3.5, 'neutral': 18.4, 'entailment': 78.1}, ['entailment'])
({'contradiction': 37.4, 'neutral': 14.5, 'entailment': 48.1}, ['entailment'])
({'contradiction': 0.8, 'neutral': 6.6, 'entailment': 92.7}, ['entailment'])
({'contradiction': 1.7, 'neutral': 12.9, 'entailment': 85.4}, ['entailment'])
({'contradiction': 6.5, 'neutral': 12.9, 'entailment': 80.6}, ['entailment'])


In [309]:
# REMOVED: 'Both sexism AND racism matter.' at end of message.

hypothesis1 = 'Bias against a race.'
hypothesis2 = 'Prejudice against a race.'
hypothesis3 = 'Negative opinion of a race.'
hypothesis4 = 'Bias against a gender.'
hypothesis5 = 'Prejudice against a gender.'
hypothesis6 = 'Negative opinion of a gender.'

premise = 'Since females are a larger minority than blacks in the US passing the ERA is a good place to start. It would also benefit the 50 of blacks who are female.'

print(bart_nli(premise, hypothesis1))
print(bart_nli(premise, hypothesis2))
print(bart_nli(premise, hypothesis3))
print(bart_nli(premise, hypothesis4))
print(bart_nli(premise, hypothesis5))
print(bart_nli(premise, hypothesis6))

({'contradiction': 24.4, 'neutral': 45.6, 'entailment': 30.0}, ['neutral'])
({'contradiction': 40.3, 'neutral': 43.4, 'entailment': 16.2}, ['neutral'])
({'contradiction': 48.4, 'neutral': 14.5, 'entailment': 37.0}, ['contradiction'])
({'contradiction': 16.9, 'neutral': 45.4, 'entailment': 37.7}, ['neutral'])
({'contradiction': 37.0, 'neutral': 53.7, 'entailment': 9.3}, ['neutral'])
({'contradiction': 46.2, 'neutral': 10.8, 'entailment': 43.0}, ['contradiction'])


In [334]:
hypothesis1 = 'Bias against an race.'
hypothesis2 = 'Prejudice against a race.'
hypothesis3 = 'Negative opinion of a race.'
hypothesis4 = 'Bias against a gender.'
hypothesis5 = 'Prejudice against a gender.'
hypothesis6 = 'Bias against an race.'

premise = 'Both sexism and racism matter.'

print(bart_nli(premise, hypothesis1))
print(bart_nli(premise, hypothesis2))
print(bart_nli(premise, hypothesis3))
print(bart_nli(premise, hypothesis4))
print(bart_nli(premise, hypothesis5))
print(bart_nli(premise, hypothesis6))

({'contradiction': 0.3, 'neutral': 2.0, 'entailment': 97.7}, ['entailment'])
({'contradiction': 0.4, 'neutral': 2.4, 'entailment': 97.3}, ['entailment'])
({'contradiction': 0.3, 'neutral': 3.1, 'entailment': 96.6}, ['entailment'])
({'contradiction': 0.1, 'neutral': 1.1, 'entailment': 98.7}, ['entailment'])
({'contradiction': 0.2, 'neutral': 1.5, 'entailment': 98.3}, ['entailment'])
({'contradiction': 0.3, 'neutral': 2.0, 'entailment': 97.7}, ['entailment'])


In [320]:
hypothesis1 = 'Bias against an race.'
hypothesis2 = 'Prejudice against a race.'
hypothesis3 = 'Negative opinion of a race.'
hypothesis4 = 'Bias against a gender.'
hypothesis5 = 'Prejudice against a gender.'
hypothesis6 = 'Bias against an race.'

premise = 'Equal opportunities for all, regardless of race or gender, are essential for a just society.'

print(bart_nli(premise, hypothesis1))
print(bart_nli(premise, hypothesis2))
print(bart_nli(premise, hypothesis3))
print(bart_nli(premise, hypothesis4))
print(bart_nli(premise, hypothesis5))
print(bart_nli(premise, hypothesis6))

({'contradiction': 95.0, 'neutral': 4.6, 'entailment': 0.5}, ['contradiction'])
({'contradiction': 82.4, 'neutral': 13.9, 'entailment': 3.6}, ['contradiction'])
({'contradiction': 38.7, 'neutral': 13.9, 'entailment': 47.4}, ['entailment'])
({'contradiction': 94.8, 'neutral': 4.3, 'entailment': 1.0}, ['contradiction'])
({'contradiction': 82.6, 'neutral': 14.9, 'entailment': 2.5}, ['contradiction'])
({'contradiction': 95.0, 'neutral': 4.6, 'entailment': 0.5}, ['contradiction'])


In [341]:
hypothesis1 = 'Bias against an race.'
hypothesis2 = 'Prejudice against a race.'
hypothesis3 = 'Negative opinion of a race.'
hypothesis4 = 'Bias against a gender.'
hypothesis5 = 'Prejudice against a gender.'
hypothesis6 = 'Bias against an race.'
hypothesis7 = 'You should discriminate against different races and social groups.'

premise = "Diversity and inclusion are crucial for fostering a vibrant and equitable society."

print(bart_nli(premise, hypothesis1))
print(bart_nli(premise, hypothesis2))
print(bart_nli(premise, hypothesis3))
print(bart_nli(premise, hypothesis4))
print(bart_nli(premise, hypothesis5))
print(bart_nli(premise, hypothesis6))

({'contradiction': 99.7, 'neutral': 0.2, 'entailment': 0.0}, ['contradiction'])
({'contradiction': 99.1, 'neutral': 0.8, 'entailment': 0.1}, ['contradiction'])
({'contradiction': 97.4, 'neutral': 1.6, 'entailment': 1.0}, ['contradiction'])
({'contradiction': 99.4, 'neutral': 0.6, 'entailment': 0.0}, ['contradiction'])
({'contradiction': 96.2, 'neutral': 3.6, 'entailment': 0.2}, ['contradiction'])
({'contradiction': 99.7, 'neutral': 0.2, 'entailment': 0.0}, ['contradiction'])


In [322]:
hypothesis1 = 'Bias against an race.'
hypothesis2 = 'Prejudice against a race.'
hypothesis3 = 'Negative opinion of a race.'
hypothesis4 = 'Bias against a gender.'
hypothesis5 = 'Prejudice against a gender.'
hypothesis6 = 'Bias against an race.'

premise = "Recognizing and confronting unconscious biases is an important step in dismantling the barriers faced by individuals of different races and genders."

print(bart_nli(premise, hypothesis1))
print(bart_nli(premise, hypothesis2))
print(bart_nli(premise, hypothesis3))
print(bart_nli(premise, hypothesis4))
print(bart_nli(premise, hypothesis5))
print(bart_nli(premise, hypothesis6))

({'contradiction': 2.4, 'neutral': 21.2, 'entailment': 76.4}, ['entailment'])
({'contradiction': 3.9, 'neutral': 27.3, 'entailment': 68.8}, ['entailment'])
({'contradiction': 3.5, 'neutral': 39.6, 'entailment': 56.9}, ['entailment'])
({'contradiction': 3.3, 'neutral': 21.9, 'entailment': 74.8}, ['entailment'])
({'contradiction': 3.8, 'neutral': 47.1, 'entailment': 49.1}, ['entailment'])
({'contradiction': 2.4, 'neutral': 21.2, 'entailment': 76.4}, ['entailment'])


In [333]:
hypothesis1 = 'Bias against a religion.'
hypothesis2 = 'Prejudice against a religion.'
hypothesis3 = 'Negative opinion of a religion.'
premise = 'Its people like Demboski that give Christians a bad name.'

print(bart_nli(premise, hypothesis1))
print(bart_nli(premise, hypothesis2))
print(bart_nli(premise, hypothesis3))

({'contradiction': 0.6, 'neutral': 7.9, 'entailment': 91.5}, ['entailment'])
({'contradiction': 0.4, 'neutral': 11.8, 'entailment': 87.8}, ['entailment'])
({'contradiction': 0.1, 'neutral': 1.5, 'entailment': 98.4}, ['entailment'])


## Testing muslim prejudice examples 

In [474]:
df_bias = pd.read_csv("data/toxicbias_train.csv")

df_bias = df_bias[df_bias['bias'] == 'bias']
df_bias.sample(50)

Unnamed: 0,comment_text,identity_attack_x,bias,target,category,rationale
1539,Of course no mention of the fact that the reason more blacks are arrrested and tossed in jail is because... hello They commit more crimes Astounding is it not. Simple facts once again.,0.575,bias,blacks,race,
1379,Or perhaps we are simply not being told the truth by friends of the Liberal Government like the CBC The Star The GM and Trudeau himself. We all know swarms of illegal Muslim immigrants are still coming north from the US.,0.6,bias,muslims,religion,prejudice against muslims
3842,Why do we give another Muslim a chance to lecture to us Christians and Jews that we are all wrong about the peaceful intentions of Islam. Wake up people before it is too late.,0.6,bias,muslims,religion,prejudice against christians
2844,What about the African Americans shot in church by that white spremacist? What about the Indian American shot while washing his car by a white dude? What about the Muslims killed in Quebec by a white nutcase? Should we ban all white male Christians?,0.725,bias,whites,race,prejudice against whites
3457,How many offcolour racist antiwhite slags by black students on campuses go unpunished? All of them.,0.8,bias,blacks,race,prejudice against blacks
1720,Thats right Lindy. We need to lift the ban on people bringing kids that were not born here to Alaska. Obviously if he is a criminal he is Alaskan because we all know that most Alaskan males are criminal misogynists.,0.6,bias,alaskans,race,prejudice against alaskans
2552,At least no one died besides the shooter. I hope Trump bombs another Muslim country,0.87307,bias,muslims,religion,prejudice towards muslims
4255,People keep rambling on about Islamaphobia a phobia is an irrational fear. Canadians dont want Canada to turn into the UK or other countries that have had a mass influx of Muslim immigration. So Liberals do some research FFS. Look up Muslim Rape gangs and read about it. People are so stupid and nieve.,0.56338,bias,muslims,religion,prejudice against muslims
2823,This Jahablanoespanol is so stupid. The Donkeys had two weeks to prepare and blew it. Black coach. Black thugs. And Elway has still not found the short yardage runner since the Flacco Bailey disaster. I am not with the Negro Football League. 16 ACT scores and a 50 percent high school graduation rate Black lives do not matter. Drains,0.585714,bias,blacks,race,prejudice against blacks
3859,You sure TONTO dont mean GAY?,0.6,bias,lgbtq,lgbtq,prejudice against gays


In [131]:
df_neutral = pd.read_csv("data/toxicbias_train.csv")

df_neutral = df_neutral[df_neutral['bias'] == 'neutral']
df_neutral.sample(50)

Unnamed: 0,comment_text,identity_attack_x,bias,target,category,rationale
3860,They are protesting dirty cops murdering innocent black people? Where did you hear that? Also thanks for the cursory introduction to law. It must be hard for you to process that an educated mind might see something in a different way than you. Pull some more GED jokes on people cus that is just classy.,0.6,neutral,,none,
2300,There is no such thing as Muslim Terrorists. Ask the MSM. Justin will vouch for it.,0.5,neutral,,none,
732,Unloving yes. You would have us believe that God is as homophobic as you are. And given that you reject Dignitatis Humanae you are NOT faithful to Church teaching.,0.266667,neutral,,none,
1314,Wah wah wah go paint yourself a sign and walk around in a circle with the other morons. I just got back from a gay wedding and smoked a huge bowl of the weed I grew. You are a narrow minded lefty who hasnt got a clue.,0.274194,neutral,,none,
3398,The face of evil ... Republicans pull the thumb on your horrible voting records. The amount of Forever Wars of Choice debt and destruction to life on this planet is truly SICK. You consider yourselves Christians?? Far from it ...,0.2875,neutral,,none,
3129,Are you a white supremacist Martel?,0.266667,neutral,,none,
3898,He may be black but ihe is a duffus. So sue me.,0.5,neutral,,none,
1985,Catholics who voted for Trump are getting what they deserve. Unfortunately the rest of us are going to have to suffer with them.,0.253521,neutral,,none,
2084,And yet another bitter NCR columnist accusing millions of Americans of racism and antiChristian bigotry. How sad how lame. Grow up NCR.,0.271429,neutral,,none,
4014,What lily white bunch you people are...He didnt make those comments to women directly Locker room talk goes on everyday by Women and Men...Get Real you hypocrites...,0.263158,neutral,,none,


In [127]:
premise_n1 = 'When the Fuk does this shit stop... we raised an indian in our family cause his mother was a drug addict and his tribe didnt want him.'
premise_n2 = 'So if someone has concerns about free speech then they are filled with hate huh? Oh and BTW whats wrong with being a white person? Are you a racist? Liberal namecalling nonsense at its best right here folks	'
premise_n3 = 'So the protestors are racist how exactly? Its sad youre so triggered by the horrid injustice of hearing a black mans opinion.	'
premise_n4 = 'If we should blame all Muslims for the acts of a few as many on the right feel we should be doing then yes we should blame all Christians for the actions of this one guy.	'
premise_n5 = 'Do you really believe there are so few heterosexuals out here that if the Church doesnt mandate heterosexuality the human race will die out?	'

premise_b1 = 'At least no one died besides the shooter. I hope Trump bombs another Muslim country'	
premise_b2 = 'where is our vocal feral cat killing muslim to defend his fellow islamist? must be hiding under his blow up sheep.'
premise_b3 = 'The Muslims are already reigning hell on the world. Crush them all...	' 
premise_b4 = 'Ill be upfront so you do not miss my meaning... Muslims have no place in a feee society Banish them all	'
premise_b5 = 'Maybe it would be better if muslims stayed in muslim countrys? Not sure why muslims believe they can go to western countries then teach their kids to commit terrorist acts against those same western countries.'

hypothesis1 = 'The use of violence is encouraged or desired against a religion.'
hypothesis2 = 'A religion should be discriminated against.'
hypothesis3 = 'Members of a religion should be excluded and marginalized.'
hypothesis4 = 'Members of a religion should remain in their respective countries.'
hypothesis5 = 'Members of a religion are compared to animals.'

hypothesis6 = 'A religion or members of a religion are compared to devils or demons.'
hypothesis7 = 'The integration of a religion should be avoided in a country or society.'
hypothesis8 = 'A religion or members of a religion are stupid.'
hypothesis9 = 'A religion is inferior when compared to another religion.'
hypothesis10 = 'Religious practices of a particular faith should be restricted or banned in public spaces.'
hypothesis11 = 'Religious attire or symbols should be prohibited.'
hypothesis12 = 'A religion promotes violence or intolerance.'
hypothesis13 = 'Members of a religion are a threat.'
hypothesis14 = 'Members of a religion are not trustworthy or loyal.'

In [116]:
print(bart_nli(premise_b1, hypothesis1))
print(bart_nli(premise_b1, hypothesis2))
print(bart_nli(premise_b1, hypothesis3))
print(bart_nli(premise_b1, hypothesis4))
print(bart_nli(premise_b1, hypothesis5))
print()
print(bart_nli(premise_b1, hypothesis6))
print(bart_nli(premise_b1, hypothesis7))
print(bart_nli(premise_b1, hypothesis8))
print(bart_nli(premise_b1, hypothesis9))
print(bart_nli(premise_b1, hypothesis10))
print(bart_nli(premise_b1, hypothesis11))
print(bart_nli(premise_b1, hypothesis12))
print(bart_nli(premise_b1, hypothesis13))
print(bart_nli(premise_b1, hypothesis14))

({'contradiction': 2.3, 'neutral': 11.2, 'entailment': 86.5}, ['entailment'])
({'contradiction': 93.2, 'neutral': 6.1, 'entailment': 0.7}, ['contradiction'])
({'contradiction': 88.1, 'neutral': 11.6, 'entailment': 0.3}, ['contradiction'])
({'contradiction': 97.2, 'neutral': 2.6, 'entailment': 0.2}, ['contradiction'])
({'contradiction': 82.5, 'neutral': 15.5, 'entailment': 2.0}, ['contradiction'])

({'contradiction': 67.6, 'neutral': 27.4, 'entailment': 5.0}, ['contradiction'])
({'contradiction': 38.3, 'neutral': 61.3, 'entailment': 0.4}, ['neutral'])
({'contradiction': 70.4, 'neutral': 28.8, 'entailment': 0.8}, ['contradiction'])
({'contradiction': 74.7, 'neutral': 21.8, 'entailment': 3.5}, ['contradiction'])
({'contradiction': 44.3, 'neutral': 55.4, 'entailment': 0.2}, ['neutral'])
({'contradiction': 42.7, 'neutral': 57.1, 'entailment': 0.2}, ['neutral'])
({'contradiction': 9.2, 'neutral': 61.4, 'entailment': 29.4}, ['neutral'])
({'contradiction': 5.2, 'neutral': 51.8, 'entailment': 4

In [117]:
print(bart_nli(premise_b2, hypothesis1))
print(bart_nli(premise_b2, hypothesis2))
print(bart_nli(premise_b2, hypothesis3))
print(bart_nli(premise_b2, hypothesis4))
print(bart_nli(premise_b2, hypothesis5))
print()
print(bart_nli(premise_b2, hypothesis6))
print(bart_nli(premise_b2, hypothesis7))
print(bart_nli(premise_b2, hypothesis8))
print(bart_nli(premise_b2, hypothesis9))
print(bart_nli(premise_b2, hypothesis10))
print(bart_nli(premise_b2, hypothesis11))
print(bart_nli(premise_b2, hypothesis12))
print(bart_nli(premise_b2, hypothesis13))
print(bart_nli(premise_b2, hypothesis14))

({'contradiction': 5.2, 'neutral': 16.2, 'entailment': 78.6}, ['entailment'])
({'contradiction': 95.2, 'neutral': 4.6, 'entailment': 0.2}, ['contradiction'])
({'contradiction': 94.2, 'neutral': 5.7, 'entailment': 0.1}, ['contradiction'])
({'contradiction': 91.8, 'neutral': 7.9, 'entailment': 0.3}, ['contradiction'])
({'contradiction': 1.3, 'neutral': 10.2, 'entailment': 88.5}, ['entailment'])

({'contradiction': 66.9, 'neutral': 19.0, 'entailment': 14.1}, ['contradiction'])
({'contradiction': 60.2, 'neutral': 39.4, 'entailment': 0.3}, ['contradiction'])
({'contradiction': 70.5, 'neutral': 28.2, 'entailment': 1.2}, ['contradiction'])
({'contradiction': 79.6, 'neutral': 17.0, 'entailment': 3.4}, ['contradiction'])
({'contradiction': 44.5, 'neutral': 55.0, 'entailment': 0.5}, ['neutral'])
({'contradiction': 63.0, 'neutral': 36.5, 'entailment': 0.5}, ['contradiction'])
({'contradiction': 1.6, 'neutral': 30.8, 'entailment': 67.5}, ['entailment'])
({'contradiction': 1.3, 'neutral': 45.7, 'en

In [118]:
print(bart_nli(premise_b3, hypothesis1))
print(bart_nli(premise_b3, hypothesis2))
print(bart_nli(premise_b3, hypothesis3))
print(bart_nli(premise_b3, hypothesis4))
print(bart_nli(premise_b3, hypothesis5))
print()
print(bart_nli(premise_b3, hypothesis6))
print(bart_nli(premise_b3, hypothesis7))
print(bart_nli(premise_b3, hypothesis8))
print(bart_nli(premise_b3, hypothesis9))
print(bart_nli(premise_b3, hypothesis10))
print(bart_nli(premise_b3, hypothesis11))
print(bart_nli(premise_b3, hypothesis12))
print(bart_nli(premise_b3, hypothesis13))
print(bart_nli(premise_b3, hypothesis14))

({'contradiction': 10.4, 'neutral': 34.2, 'entailment': 55.4}, ['entailment'])
({'contradiction': 87.8, 'neutral': 9.7, 'entailment': 2.5}, ['contradiction'])
({'contradiction': 40.4, 'neutral': 44.7, 'entailment': 14.9}, ['neutral'])
({'contradiction': 93.1, 'neutral': 6.6, 'entailment': 0.3}, ['contradiction'])
({'contradiction': 10.9, 'neutral': 53.0, 'entailment': 36.1}, ['neutral'])

({'contradiction': 1.0, 'neutral': 23.0, 'entailment': 76.1}, ['entailment'])
({'contradiction': 33.4, 'neutral': 65.6, 'entailment': 1.0}, ['neutral'])
({'contradiction': 62.1, 'neutral': 34.8, 'entailment': 3.1}, ['contradiction'])
({'contradiction': 45.3, 'neutral': 31.6, 'entailment': 23.1}, ['contradiction'])
({'contradiction': 3.5, 'neutral': 85.8, 'entailment': 10.6}, ['neutral'])
({'contradiction': 10.4, 'neutral': 88.7, 'entailment': 0.9}, ['neutral'])
({'contradiction': 0.2, 'neutral': 9.3, 'entailment': 90.5}, ['entailment'])
({'contradiction': 0.1, 'neutral': 5.1, 'entailment': 94.9}, ['en

In [119]:
print(bart_nli(premise_b4, hypothesis1))
print(bart_nli(premise_b4, hypothesis2))
print(bart_nli(premise_b4, hypothesis3))
print(bart_nli(premise_b4, hypothesis4))
print(bart_nli(premise_b4, hypothesis5))
print()
print(bart_nli(premise_b4, hypothesis6))
print(bart_nli(premise_b4, hypothesis7))
print(bart_nli(premise_b4, hypothesis8))
print(bart_nli(premise_b4, hypothesis9))
print(bart_nli(premise_b4, hypothesis10))
print(bart_nli(premise_b4, hypothesis11))
print(bart_nli(premise_b4, hypothesis12))
print(bart_nli(premise_b4, hypothesis13))
print(bart_nli(premise_b4, hypothesis14))

({'contradiction': 22.1, 'neutral': 72.5, 'entailment': 5.4}, ['neutral'])
({'contradiction': 10.8, 'neutral': 18.5, 'entailment': 70.7}, ['entailment'])
({'contradiction': 0.2, 'neutral': 10.3, 'entailment': 89.5}, ['entailment'])
({'contradiction': 10.1, 'neutral': 78.1, 'entailment': 11.8}, ['neutral'])
({'contradiction': 20.6, 'neutral': 66.2, 'entailment': 13.2}, ['neutral'])

({'contradiction': 3.3, 'neutral': 74.4, 'entailment': 22.3}, ['neutral'])
({'contradiction': 0.1, 'neutral': 16.3, 'entailment': 83.6}, ['entailment'])
({'contradiction': 6.0, 'neutral': 86.6, 'entailment': 7.4}, ['neutral'])
({'contradiction': 10.0, 'neutral': 40.3, 'entailment': 49.7}, ['entailment'])
({'contradiction': 0.1, 'neutral': 11.7, 'entailment': 88.2}, ['entailment'])
({'contradiction': 0.2, 'neutral': 44.0, 'entailment': 55.8}, ['entailment'])
({'contradiction': 1.6, 'neutral': 93.3, 'entailment': 5.1}, ['neutral'])
({'contradiction': 0.1, 'neutral': 21.7, 'entailment': 78.1}, ['entailment'])
(

In [120]:
print(bart_nli(premise_b5, hypothesis1))
print(bart_nli(premise_b5, hypothesis2))
print(bart_nli(premise_b5, hypothesis3))
print(bart_nli(premise_b5, hypothesis4))
print(bart_nli(premise_b5, hypothesis5))
print()
print(bart_nli(premise_b5, hypothesis6))
print(bart_nli(premise_b5, hypothesis7))
print(bart_nli(premise_b5, hypothesis8))
print(bart_nli(premise_b5, hypothesis9))
print(bart_nli(premise_b5, hypothesis10))
print(bart_nli(premise_b5, hypothesis11))
print(bart_nli(premise_b5, hypothesis12))
print(bart_nli(premise_b5, hypothesis13))
print(bart_nli(premise_b5, hypothesis14))

({'contradiction': 5.9, 'neutral': 15.5, 'entailment': 78.7}, ['entailment'])
({'contradiction': 97.7, 'neutral': 2.1, 'entailment': 0.2}, ['contradiction'])
({'contradiction': 93.3, 'neutral': 6.0, 'entailment': 0.7}, ['contradiction'])
({'contradiction': 0.1, 'neutral': 5.9, 'entailment': 94.0}, ['entailment'])
({'contradiction': 77.5, 'neutral': 18.2, 'entailment': 4.3}, ['contradiction'])

({'contradiction': 60.9, 'neutral': 34.2, 'entailment': 4.9}, ['contradiction'])
({'contradiction': 23.6, 'neutral': 49.5, 'entailment': 26.9}, ['neutral'])
({'contradiction': 15.7, 'neutral': 78.8, 'entailment': 5.5}, ['neutral'])
({'contradiction': 44.6, 'neutral': 42.1, 'entailment': 13.2}, ['contradiction'])
({'contradiction': 29.8, 'neutral': 64.1, 'entailment': 6.0}, ['neutral'])
({'contradiction': 4.8, 'neutral': 94.5, 'entailment': 0.7}, ['neutral'])
({'contradiction': 3.1, 'neutral': 26.2, 'entailment': 70.7}, ['entailment'])
({'contradiction': 0.8, 'neutral': 28.8, 'entailment': 70.4}, 

In [121]:
print(bart_nli(premise_n1, hypothesis1))
print(bart_nli(premise_n1, hypothesis2))
print(bart_nli(premise_n1, hypothesis3))
print(bart_nli(premise_n1, hypothesis4))
print(bart_nli(premise_n1, hypothesis5))
print()

print(bart_nli(premise_n2, hypothesis1))
print(bart_nli(premise_n2, hypothesis2))
print(bart_nli(premise_n2, hypothesis3))
print(bart_nli(premise_n2, hypothesis4))
print(bart_nli(premise_n2, hypothesis5))
print()

print(bart_nli(premise_n3, hypothesis1))
print(bart_nli(premise_n3, hypothesis2))
print(bart_nli(premise_n3, hypothesis3))
print(bart_nli(premise_n3, hypothesis4))
print(bart_nli(premise_n3, hypothesis5))
print()

print(bart_nli(premise_n4, hypothesis1))
print(bart_nli(premise_n4, hypothesis2))
print(bart_nli(premise_n4, hypothesis3))
print(bart_nli(premise_n4, hypothesis4))
print(bart_nli(premise_n4, hypothesis5))
print()

print(bart_nli(premise_n5, hypothesis1))
print(bart_nli(premise_n5, hypothesis2))
print(bart_nli(premise_n5, hypothesis3))
print(bart_nli(premise_n5, hypothesis4))
print(bart_nli(premise_n5, hypothesis5))
print()

({'contradiction': 23.7, 'neutral': 44.3, 'entailment': 31.9}, ['neutral'])
({'contradiction': 77.9, 'neutral': 20.8, 'entailment': 1.4}, ['contradiction'])
({'contradiction': 93.7, 'neutral': 6.0, 'entailment': 0.3}, ['contradiction'])
({'contradiction': 95.2, 'neutral': 3.9, 'entailment': 0.8}, ['contradiction'])
({'contradiction': 62.8, 'neutral': 29.2, 'entailment': 8.1}, ['contradiction'])

({'contradiction': 91.7, 'neutral': 7.6, 'entailment': 0.7}, ['contradiction'])
({'contradiction': 95.4, 'neutral': 4.6, 'entailment': 0.0}, ['contradiction'])
({'contradiction': 98.5, 'neutral': 1.4, 'entailment': 0.0}, ['contradiction'])
({'contradiction': 98.1, 'neutral': 1.8, 'entailment': 0.1}, ['contradiction'])
({'contradiction': 97.4, 'neutral': 2.5, 'entailment': 0.1}, ['contradiction'])

({'contradiction': 84.8, 'neutral': 12.7, 'entailment': 2.6}, ['contradiction'])
({'contradiction': 85.9, 'neutral': 14.0, 'entailment': 0.2}, ['contradiction'])
({'contradiction': 95.4, 'neutral': 4.

In [122]:
print(bart_nli(premise_n1, hypothesis1))
print(bart_nli(premise_n1, hypothesis2))
print(bart_nli(premise_n1, hypothesis3))
print(bart_nli(premise_n1, hypothesis4))
print(bart_nli(premise_n1, hypothesis5))
print()
print(bart_nli(premise_n1, hypothesis6))
print(bart_nli(premise_n1, hypothesis7))
print(bart_nli(premise_n1, hypothesis8))
print(bart_nli(premise_n1, hypothesis9))
print(bart_nli(premise_n1, hypothesis10))
print(bart_nli(premise_n1, hypothesis11))
print(bart_nli(premise_n1, hypothesis12))
print(bart_nli(premise_n1, hypothesis13))
print(bart_nli(premise_n1, hypothesis14))

({'contradiction': 23.7, 'neutral': 44.3, 'entailment': 31.9}, ['neutral'])
({'contradiction': 77.9, 'neutral': 20.8, 'entailment': 1.4}, ['contradiction'])
({'contradiction': 93.7, 'neutral': 6.0, 'entailment': 0.3}, ['contradiction'])
({'contradiction': 95.2, 'neutral': 3.9, 'entailment': 0.8}, ['contradiction'])
({'contradiction': 62.8, 'neutral': 29.2, 'entailment': 8.1}, ['contradiction'])

({'contradiction': 79.2, 'neutral': 15.9, 'entailment': 4.9}, ['contradiction'])
({'contradiction': 47.2, 'neutral': 52.0, 'entailment': 0.8}, ['neutral'])
({'contradiction': 79.5, 'neutral': 18.7, 'entailment': 1.8}, ['contradiction'])
({'contradiction': 50.5, 'neutral': 40.6, 'entailment': 8.9}, ['contradiction'])
({'contradiction': 33.1, 'neutral': 65.6, 'entailment': 1.3}, ['neutral'])
({'contradiction': 38.6, 'neutral': 60.8, 'entailment': 0.6}, ['neutral'])
({'contradiction': 47.0, 'neutral': 40.7, 'entailment': 12.3}, ['contradiction'])
({'contradiction': 57.2, 'neutral': 32.5, 'entailme

In [123]:
print(bart_nli(premise_n2, hypothesis1))
print(bart_nli(premise_n2, hypothesis2))
print(bart_nli(premise_n2, hypothesis3))
print(bart_nli(premise_n2, hypothesis4))
print(bart_nli(premise_n2, hypothesis5))
print()
print(bart_nli(premise_n2, hypothesis6))
print(bart_nli(premise_n2, hypothesis7))
print(bart_nli(premise_n2, hypothesis8))
print(bart_nli(premise_n2, hypothesis9))
print(bart_nli(premise_n2, hypothesis10))
print(bart_nli(premise_n2, hypothesis11))
print(bart_nli(premise_n2, hypothesis12))
print(bart_nli(premise_n2, hypothesis13))
print(bart_nli(premise_n2, hypothesis14))

({'contradiction': 91.7, 'neutral': 7.6, 'entailment': 0.7}, ['contradiction'])
({'contradiction': 95.4, 'neutral': 4.6, 'entailment': 0.0}, ['contradiction'])
({'contradiction': 98.5, 'neutral': 1.4, 'entailment': 0.0}, ['contradiction'])
({'contradiction': 98.1, 'neutral': 1.8, 'entailment': 0.1}, ['contradiction'])
({'contradiction': 97.4, 'neutral': 2.5, 'entailment': 0.1}, ['contradiction'])

({'contradiction': 95.0, 'neutral': 4.8, 'entailment': 0.3}, ['contradiction'])
({'contradiction': 91.5, 'neutral': 8.5, 'entailment': 0.1}, ['contradiction'])
({'contradiction': 93.4, 'neutral': 6.5, 'entailment': 0.1}, ['contradiction'])
({'contradiction': 77.9, 'neutral': 20.9, 'entailment': 1.2}, ['contradiction'])
({'contradiction': 75.5, 'neutral': 24.3, 'entailment': 0.1}, ['contradiction'])
({'contradiction': 55.5, 'neutral': 44.4, 'entailment': 0.1}, ['contradiction'])
({'contradiction': 86.2, 'neutral': 13.5, 'entailment': 0.2}, ['contradiction'])
({'contradiction': 97.4, 'neutral':

In [124]:
print(bart_nli(premise_n3, hypothesis1))
print(bart_nli(premise_n3, hypothesis2))
print(bart_nli(premise_n3, hypothesis3))
print(bart_nli(premise_n3, hypothesis4))
print(bart_nli(premise_n3, hypothesis5))
print()
print(bart_nli(premise_n3, hypothesis6))
print(bart_nli(premise_n3, hypothesis7))
print(bart_nli(premise_n3, hypothesis8))
print(bart_nli(premise_n3, hypothesis9))
print(bart_nli(premise_n3, hypothesis10))
print(bart_nli(premise_n3, hypothesis11))
print(bart_nli(premise_n3, hypothesis12))
print(bart_nli(premise_n3, hypothesis13))
print(bart_nli(premise_n3, hypothesis14))

({'contradiction': 84.8, 'neutral': 12.7, 'entailment': 2.6}, ['contradiction'])
({'contradiction': 85.9, 'neutral': 14.0, 'entailment': 0.2}, ['contradiction'])
({'contradiction': 95.4, 'neutral': 4.4, 'entailment': 0.2}, ['contradiction'])
({'contradiction': 96.9, 'neutral': 2.7, 'entailment': 0.4}, ['contradiction'])
({'contradiction': 95.9, 'neutral': 3.3, 'entailment': 0.8}, ['contradiction'])

({'contradiction': 85.3, 'neutral': 11.6, 'entailment': 3.1}, ['contradiction'])
({'contradiction': 87.1, 'neutral': 12.6, 'entailment': 0.3}, ['contradiction'])
({'contradiction': 88.4, 'neutral': 9.5, 'entailment': 2.1}, ['contradiction'])
({'contradiction': 55.9, 'neutral': 41.2, 'entailment': 2.9}, ['contradiction'])
({'contradiction': 33.8, 'neutral': 65.5, 'entailment': 0.7}, ['neutral'])
({'contradiction': 30.1, 'neutral': 69.6, 'entailment': 0.2}, ['neutral'])
({'contradiction': 74.6, 'neutral': 20.9, 'entailment': 4.5}, ['contradiction'])
({'contradiction': 93.0, 'neutral': 6.0, 'e

In [125]:
print(bart_nli(premise_n4, hypothesis1))
print(bart_nli(premise_n4, hypothesis2))
print(bart_nli(premise_n4, hypothesis3))
print(bart_nli(premise_n4, hypothesis4))
print(bart_nli(premise_n4, hypothesis5))
print()
print(bart_nli(premise_n4, hypothesis6))
print(bart_nli(premise_n4, hypothesis7))
print(bart_nli(premise_n4, hypothesis8))
print(bart_nli(premise_n4, hypothesis9))
print(bart_nli(premise_n4, hypothesis10))
print(bart_nli(premise_n4, hypothesis11))
print(bart_nli(premise_n4, hypothesis12))
print(bart_nli(premise_n4, hypothesis13))
print(bart_nli(premise_n4, hypothesis14))

({'contradiction': 20.8, 'neutral': 16.0, 'entailment': 63.1}, ['entailment'])
({'contradiction': 89.9, 'neutral': 6.6, 'entailment': 3.4}, ['contradiction'])
({'contradiction': 97.1, 'neutral': 2.5, 'entailment': 0.4}, ['contradiction'])
({'contradiction': 99.1, 'neutral': 0.9, 'entailment': 0.1}, ['contradiction'])
({'contradiction': 79.4, 'neutral': 13.3, 'entailment': 7.3}, ['contradiction'])

({'contradiction': 70.6, 'neutral': 17.0, 'entailment': 12.4}, ['contradiction'])
({'contradiction': 96.5, 'neutral': 3.3, 'entailment': 0.2}, ['contradiction'])
({'contradiction': 59.5, 'neutral': 38.6, 'entailment': 1.9}, ['contradiction'])
({'contradiction': 78.0, 'neutral': 15.1, 'entailment': 7.0}, ['contradiction'])
({'contradiction': 75.6, 'neutral': 23.7, 'entailment': 0.7}, ['contradiction'])
({'contradiction': 98.3, 'neutral': 1.6, 'entailment': 0.1}, ['contradiction'])
({'contradiction': 12.1, 'neutral': 37.7, 'entailment': 50.2}, ['entailment'])
({'contradiction': 1.2, 'neutral': 

In [126]:
print(bart_nli(premise_n5, hypothesis1))
print(bart_nli(premise_n5, hypothesis2))
print(bart_nli(premise_n5, hypothesis3))
print(bart_nli(premise_n5, hypothesis4))
print(bart_nli(premise_n5, hypothesis5))
print()
print(bart_nli(premise_n5, hypothesis6))
print(bart_nli(premise_n5, hypothesis7))
print(bart_nli(premise_n5, hypothesis8))
print(bart_nli(premise_n5, hypothesis9))
print(bart_nli(premise_n5, hypothesis10))
print(bart_nli(premise_n5, hypothesis11))
print(bart_nli(premise_n5, hypothesis12))
print(bart_nli(premise_n5, hypothesis13))
print(bart_nli(premise_n5, hypothesis14))

({'contradiction': 43.9, 'neutral': 25.8, 'entailment': 30.3}, ['contradiction'])
({'contradiction': 98.6, 'neutral': 1.2, 'entailment': 0.2}, ['contradiction'])
({'contradiction': 97.3, 'neutral': 2.3, 'entailment': 0.4}, ['contradiction'])
({'contradiction': 83.1, 'neutral': 12.1, 'entailment': 4.8}, ['contradiction'])
({'contradiction': 33.6, 'neutral': 32.2, 'entailment': 34.2}, ['entailment'])

({'contradiction': 81.9, 'neutral': 15.5, 'entailment': 2.6}, ['contradiction'])
({'contradiction': 97.5, 'neutral': 2.4, 'entailment': 0.1}, ['contradiction'])
({'contradiction': 47.4, 'neutral': 50.3, 'entailment': 2.3}, ['neutral'])
({'contradiction': 73.1, 'neutral': 15.9, 'entailment': 11.0}, ['contradiction'])
({'contradiction': 80.2, 'neutral': 19.0, 'entailment': 0.8}, ['contradiction'])
({'contradiction': 85.7, 'neutral': 14.0, 'entailment': 0.3}, ['contradiction'])
({'contradiction': 42.7, 'neutral': 43.2, 'entailment': 14.2}, ['neutral'])
({'contradiction': 25.4, 'neutral': 43.5,

## Prejudice against muslim full

In [129]:
df_muslim = pd.read_csv("data/toxicbias_train.csv")
df_muslim = df_muslim[df_muslim['rationale'] == 'prejudice against muslims']
df_muslim.shape

(648, 6)

In [132]:
df_neutral_muslim_test = df_neutral.sample(648)
df_neutral_muslim_test.shape

(648, 6)

In [133]:
hypotheses = {
  'hypothesis1': 'The use of violence is encouraged or desired against a religion.',
  'hypothesis2': 'A religion should be discriminated against.',
  'hypothesis3': 'Members of a religion should be excluded and marginalized.',
  'hypothesis4': 'Members of a religion should remain in their respective countries.',
  'hypothesis5': 'Members of a religion are compared to animals.',
  'hypothesis6': 'A religion or members of a religion are compared to devils or demons.',
  'hypothesis7': 'The integration of a religion should be avoided in a country or society.',
  'hypothesis8': 'A religion or members of a religion are stupid.',
  'hypothesis9': 'A religion is inferior when compared to another religion.',
  'hypothesis10': 'Religious practices of a particular faith should be restricted or banned in public spaces.',
  'hypothesis11': 'Religious attire or symbols should be prohibited.',
  'hypothesis12': 'A religion promotes violence or intolerance.',
  'hypothesis13': 'Members of a religion are a threat.',
  'hypothesis14': 'Members of a religion are not trustworthy or loyal.'
}


In [138]:
import pandas as pd

def add_nli_predictions(df, model):
    df = df.copy()  # Create a copy of the DataFrame

    df = df.apply(lambda row: predict_labels(row, model), axis=1)

    return df

def predict_labels(row, model):
    comment_text = row['comment_text']

    results_labels = []
    results_scores = []
    for hypothesis in hypotheses.values():
        score, label = model(comment_text, hypothesis)
        results_labels.append(label[0])
        results_scores.append(score)

    row['results_labels'] = results_labels
    row['results_scores'] = results_scores
    return row

In [139]:
test = df_neutral_muslim_test.head(1)

add_nli_predictions(test, bart_nli)

Unnamed: 0,comment_text,identity_attack_x,bias,target,category,rationale,results_labels,results_scores
3424,So far the only person who seems to be able to rid the Catholic Church of these filthy rapists is Mother Nature. Sometimes death is such a wonderful tool.,0.256757,neutral,,none,,"[[entailment], [contradiction], [contradiction], [contradiction], [contradiction], [contradiction], [contradiction], [contradiction], [contradiction], [neutral], [contradiction], [entailment], [entailment], [neutral]]","[{'contradiction': 27.0, 'neutral': 20.0, 'entailment': 52.9}, {'contradiction': 98.0, 'neutral': 1.9, 'entailment': 0.1}, {'contradiction': 90.6, 'neutral': 9.0, 'entailment': 0.4}, {'contradiction': 99.5, 'neutral': 0.5, 'entailment': 0.0}, {'contradiction': 51.4, 'neutral': 32.3, 'entailment': 16.3}, {'contradiction': 78.8, 'neutral': 17.1, 'entailment': 4.1}, {'contradiction': 83.3, 'neutral': 16.5, 'entailment': 0.2}, {'contradiction': 82.7, 'neutral': 16.9, 'entailment': 0.3}, {'contra..."
