Installing dataset

Installing required dependencies

In [None]:
pip install transformers

In [None]:
pip install datasets

In [None]:
pip install detoxify

Importing files

In [None]:
from datasets import load_dataset, concatenate_datasets
import pandas as pd
import tensorflow as tf
from transformers import TFRobertaForSequenceClassification,RobertaTokenizer,AutoTokenizer, TFAutoModelForSequenceClassification,pipeline,TextClassificationPipeline,AutoModelForSequenceClassification,BertTokenizer, TFBertForSequenceClassification
from prettytable import PrettyTable
from detoxify import Detoxify

Using dataset *cnn_dailymail* for running bias models

In [84]:
combined_dataset = concatenate_datasets([load_dataset('cnn_dailymail', '1.0.0')['train']])

In [85]:
print("Combined Length")
print(len(combined_dataset))

Combined Length
287113


Put all highlights(summaries of new articles) in a dataframe

In [None]:
df_train_highlights = pd.DataFrame({'highlights': [article['highlights'] for article in combined_dataset]})


Using 1st value to test models

In [None]:
predict_val=df_train_highlights['highlights'].iloc[0]

In [None]:
# BIAS https://huggingface.co/d4data/bias-detection-model

tokenizer_bias = AutoTokenizer.from_pretrained("d4data/bias-detection-model")
model_bias = TFAutoModelForSequenceClassification.from_pretrained("d4data/bias-detection-model")

classifier_bias = pipeline('text-classification', model=model_bias, tokenizer=tokenizer_bias) # cuda = 0,1 based on gpu availability


In [None]:
# TOXIC https://huggingface.co/unitary/toxic-bert
tokenizer_toxic1= AutoTokenizer.from_pretrained("unitary/toxic-bert")
model_toxic1 = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")
classifier__toxic1 = pipeline('text-classification', model=model_toxic1, tokenizer=tokenizer_toxic1) # cuda = 0,1 based on gpu availability


In [None]:
# TOXIC https://huggingface.co/martin-ha/toxic-comment-model
tokenizer_toxic = AutoTokenizer.from_pretrained("martin-ha/toxic-comment-model")
model_toxic = AutoModelForSequenceClassification.from_pretrained("martin-ha/toxic-comment-model")

classifier__toxic =  TextClassificationPipeline(model=model_toxic, tokenizer=tokenizer_toxic)


In [None]:
# Load pre-trained BERT model and tokenizer
tokenizer_bert_base_uncased = BertTokenizer.from_pretrained("bert-base-uncased")
model_bert_base_uncased = TFBertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)



model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Load pre-trained RoBERTa model and tokenizer
tokenizer_roberta_base = RobertaTokenizer.from_pretrained("roberta-base")
model_roberta_base = TFRobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=9)

In [None]:
# Load pre-trained RoBERTa model and tokenizer
classifier = pipeline(task="text-classification", model="SamLowe/roberta-base-go_emotions", top_k=None)

sentences = predict_val
max_score_label = max(classifier(sentences)[0], key=lambda x: x['score'])

In [None]:
# Tokenize the input text for both models
inputs_bert = tokenizer(predict_val, padding=True, truncation=True, max_length=128, return_tensors="tf")
inputs_roberta = tokenizer(predict_val, padding=True, truncation=True, max_length=128, return_tensors="tf")

# Perform inference for both models
outputs_bert = model_bert_base_uncased(inputs_bert)
outputs_roberta = model_roberta_base(inputs_roberta)

# Get predicted labels for both models
predictions_bert = tf.nn.softmax(outputs_bert.logits, axis=-1)
predictions_roberta = tf.nn.softmax(outputs_roberta.logits, axis=-1)

# Get predicted label for BERT-based model
label_bert = "toxic" if tf.argmax(predictions_bert, axis=1).numpy()[0] == 1 else "not toxic"

# Get predicted label for RoBERTa-based model
label_roberta = "toxic" if tf.argmax(predictions_roberta, axis=1).numpy()[0] == 1 else "not toxic"

The response from BERT-based model is not toxic.
The response from RoBERTa-based model is not toxic.


In [None]:
# Perform inference and get predicted emotion category for BERT-based model
outputs_bert = model_bert_base_uncased(inputs)
predicted_label_id_bert = tf.argmax(outputs_bert.logits, axis=1).numpy()[0]
predicted_label_bert = emotion_labels[predicted_label_id_bert]

# Perform inference and get predicted emotion category for RoBERTa-based model
outputs_roberta = model_roberta_base(inputs)
predicted_label_id_roberta = tf.argmax(outputs_roberta.logits, axis=1).numpy()[0]
predicted_label_roberta = emotion_labels[predicted_label_id_roberta]


The predicted emotion category from BERT-based model is: neutral.
The predicted emotion category from RoBERTa-based model is: sadness.


In [None]:
# Tokenize the input text
inputs = tokenizer(predict_val, padding=True, truncation=True, max_length=128, return_tensors="tf")

# Perform inference and get predicted label for BERT-based model
outputs_bert = model_bert_base_uncased(inputs)
predictions_bert = tf.nn.softmax(outputs_bert.logits, axis=-1)
label_bert = "sexist" if tf.argmax(predictions_bert, axis=1).numpy()[0] == 1 else "not sexist"

# Perform inference and get predicted label for RoBERTa-based model
outputs_roberta = model_roberta_base(inputs)
predictions_roberta = tf.nn.softmax(outputs_roberta.logits, axis=-1)
label_roberta = "sexist" if tf.argmax(predictions_roberta, axis=1).numpy()[0] == 1 else "not sexist"


The response from BERT-based model is not sexist.
The response from RoBERTa-based model is not sexist.


Outputs of each model

In [86]:
print(f"Summary String: {predict_val}")


Summary String: Harry Potter star Daniel Radcliffe gets £20M fortune as he turns 18 Monday . Young actor says he has no plans to fritter his cash away . Radcliffe's earnings from first five Potter films have been held in trust fund .


In [87]:
from prettytable import PrettyTable

# Create a PrettyTable instance
table = PrettyTable()

# Add columns to the table
table.field_names = ["Category", "Response"]

# Add data to the table
table.add_row(["Bias Detection Model", classifier_bias(str(predict_val))])
table.add_row(["Max Value Response from Roberta", max_score_label])
table.add_row(["Toxic BERT", classifier__toxic1(str(predict_val))])
table.add_row(["Toxic Comment Model", classifier__toxic(str(predict_val))])
table.add_row(["Response from BERT-based model", label_bert])
table.add_row(["Response from RoBERTa-based model", label_roberta])
table.add_row(["Predicted Emotion Category from BERT", predicted_label_bert])
table.add_row(["Predicted Emotion Category from RoBERTa", predicted_label_roberta])
table.add_row(["Response from BERT-based model", label_bert])
table.add_row(["Response from RoBERTa-based model", label_roberta])

# Print the table
print(table)


+-----------------------------------------+--------------------------------------------------------+
|                 Category                |                        Response                        |
+-----------------------------------------+--------------------------------------------------------+
|           Bias Detection Model          | [{'label': 'Non-biased', 'score': 0.5043940544128418}] |
|     Max Value Response from Roberta     |   {'label': 'neutral', 'score': 0.9192013144493103}    |
|                Toxic BERT               |  [{'label': 'toxic', 'score': 0.0008910775068216026}]  |
|           Toxic Comment Model           | [{'label': 'non-toxic', 'score': 0.9989933371543884}]  |
|      Response from BERT-based model     |                       not toxic                        |
|    Response from RoBERTa-based model    |                       not toxic                        |
|   Predicted Emotion Category from BERT  |                        neutral                 

In [74]:
# Initialize Detoxify models
original_model = Detoxify('original')
unbiased_model = Detoxify('unbiased')
multilingual_model = Detoxify('multilingual')

# Get predictions from all three models
original_results = original_model.predict(predict_val)
unbiased_results = unbiased_model.predict(predict_val)
multilingual_results = multilingual_model.predict(predict_val)

# Create a PrettyTable instance
table = PrettyTable()

# Add columns to the table
table.field_names = ["Model", "Category", "Probability"]

# Add data to the table
for model_name, results in [("Original", original_results.items()), ("Unbiased", unbiased_results.items()), ("Multilingual", multilingual_results.items())]:
    for category, probability in results:
        table.add_row([model_name, category, probability])

# Print the table
print(table)


+--------------+-----------------+----------------+
|    Model     |     Category    |  Probability   |
+--------------+-----------------+----------------+
|   Original   |     toxicity    | 0.00089107745  |
|   Original   | severe_toxicity | 0.00010377898  |
|   Original   |     obscene     | 0.00016919927  |
|   Original   |      threat     | 0.000114530434 |
|   Original   |      insult     | 0.00020599189  |
|   Original   | identity_attack | 0.00014119442  |
|   Unbiased   |     toxicity    |  0.000525384   |
|   Unbiased   | severe_toxicity | 1.2461513e-06  |
|   Unbiased   |     obscene     | 3.7944104e-05  |
|   Unbiased   | identity_attack |  6.431531e-05  |
|   Unbiased   |      insult     | 0.00015094518  |
|   Unbiased   |      threat     | 1.7821974e-05  |
|   Unbiased   | sexual_explicit | 1.5974996e-05  |
| Multilingual |     toxicity    | 0.00037832616  |
| Multilingual | severe_toxicity |  3.070612e-05  |
| Multilingual |     obscene     | 0.00022446709  |
| Multilingu