In [10]:
from transformers import pipeline

detoxify_pipeline = pipeline(
     'text-classification', 
     model='unitary/toxic-bert', 
     tokenizer='bert-base-uncased', 
     function_to_apply='sigmoid', 
     return_all_scores=True
     )

detoxify_pipeline('shut up, you idiot!')
# [[{'label': 'toxic', 'score': 0.9950607419013977}, 
# {'label': 'severe_toxic', 'score': 0.07963108271360397}, 
# {'label': 'obscene', 'score': 0.8713390231132507}, 
# {'label': 'threat', 'score': 0.0019536688923835754}, 
# {'label': 'insult', 'score': 0.9586619138717651}, 
# {'label': 'identity_hate', 'score': 0.014700635336339474}]]



[[{'label': 'toxic', 'score': 0.9950607419013977},
  {'label': 'severe_toxic', 'score': 0.07963104546070099},
  {'label': 'obscene', 'score': 0.8713389039039612},
  {'label': 'threat', 'score': 0.001953667961061001},
  {'label': 'insult', 'score': 0.9586619138717651},
  {'label': 'identity_hate', 'score': 0.014700641855597496}]]

In [11]:
model = Detoxify("original")
res = model.predict('shut up, you idiot!')
print(res)



{'toxicity': 0.99506074, 'severe_toxicity': 0.079631045, 'obscene': 0.8713389, 'threat': 0.001953668, 'insult': 0.9586619, 'identity_attack': 0.014700642}


In [13]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('unitary/toxic-bert')
model = AutoModelForSequenceClassification.from_pretrained('unitary/toxic-bert')

# Tokenize the input text
inputs = tokenizer('shut up, you idiot!', return_tensors='pt')

# Get the model's output
with torch.no_grad():
    outputs = model(**inputs)

# Apply sigmoid to the outputs
sigmoid = torch.nn.Sigmoid()
probabilities = sigmoid(outputs.logits)

# Get all scores
scores = probabilities.cpu().numpy()
print(scores)


[[0.99506074 0.07963105 0.8713389  0.00195367 0.9586619  0.01470064]]


In [24]:
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import AutoTokenizer, pipeline


tokenizer = AutoTokenizer.from_pretrained("protectai/unbiased-toxic-roberta-onnx", file_name="model.onnx")
model = ORTModelForSequenceClassification.from_pretrained("laiyer/unbiased-toxic-roberta-onnx", file_name="model.onnx")
classifier = pipeline(
    task="text-classification",
    model=model,
    tokenizer=tokenizer,
)

classifier_output = classifier('shut up, you idiot!')
print(classifier_output)


[{'label': 'toxicity', 'score': 0.9974876642227173}]


In [None]:
def sentiment_analysis_onnx_batched(model_id, df, field_name, batch_size, gpu_id):
    file_name = "onnx/model.onnx"

    model = ORTModelForSequenceClassification.from_pretrained(model_id, file_name=file_name, provider="CUDAExecutionProvider", provider_options={'device_id': gpu_id})
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    # Function to classify emotions of multiple texts in batched mode and return scores
    def classify_texts(texts):
        # Tokenize the batch of texts
        inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=512)
        outputs = model(**inputs)

        probabilities = torch.sigmoid(outputs.logits)
        labels = model.config.id2label  # Adjust if necessary
        
        # Process each item in the batch
        batch_results = []
        for prob in probabilities:
            result = {labels[i]: prob_item.item() for i, prob_item in enumerate(prob.squeeze())}
            batch_results.append(result)
            
        return batch_results

    start_time = time.time()

In [25]:
import time

tic = time.time()
classifier_output = classifier('shut up, you idiot!')
print(classifier_output)
toc = time.time()
print(f"Time: {toc - tic:.3f} s")

[{'label': 'toxicity', 'score': 0.9974876642227173}]
Time: 0.011 s


In [26]:
model = Detoxify("unbiased")
tic = time.time()
res = model.predict('shut up, you idiot!')
toc = time.time()
print(f"Time: {toc - tic:.3f} s")
print(res)



Time: 0.017 s
{'toxicity': 0.99738663, 'severe_toxicity': 0.0018401984, 'obscene': 0.038010202, 'identity_attack': 0.004118336, 'insult': 0.9938607, 'threat': 0.00080649974, 'sexual_explicit': 0.0021994496}
