In [1]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m65.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m80.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m71.6 MB/s[0m eta [36m0:00:00[0m
Col

In [1]:
from transformers import pipeline

pipe = pipeline("text-classification", model="unitary/toxic-bert")

from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("unitary/toxic-bert")
model = AutoModelForSequenceClassification.from_pretrained("unitary/toxic-bert")

In [2]:
import torch

input_text = "So cute dog!"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids

# Perform inference with the model
with torch.no_grad():
    outputs = model(input_ids)
outputs = outputs[0]
torch.nn.functional.softmax(outputs)

  torch.nn.functional.softmax(outputs)


tensor([[0.7704, 0.0248, 0.0663, 0.0268, 0.0672, 0.0445]])

This too heavy model, I will try more lightweight model

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_checkpoint = 'cointegrated/rubert-tiny-toxicity'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
if torch.cuda.is_available():
    model.cuda()


def text2toxicity(text, aggregate=True):
    """ Calculate toxicity of a text (if aggregate=True) or a vector of toxicity aspects (if aggregate=False)"""
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True).to(model.device)
        proba = torch.sigmoid(model(**inputs).logits).cpu().numpy()
    if isinstance(text, str):
        proba = proba[0]
    if aggregate:
        return 1 - proba.T[0] * (1 - proba.T[-1])
    return proba

I will use translation and references texts and their toxicity level to train model.  

In [4]:
import pandas as pd
import numpy as np

data = pd.read_csv('filtered.tsv', sep='\t')
data.drop(len(data) - 1, axis=0, inplace=True)
data1 = data.iloc[:, [2, 6]]
data2 = data.iloc[:, [1, 5]]
data1.columns = ['text', 'toxicity_score']
data2.columns = ['text', 'toxicity_score']

In [5]:
data = pd.concat([data1, data2], axis=0)
data

Unnamed: 0,text,toxicity_score
0,"if Alkar floods her with her mental waste, it ...",0.981983
1,you're becoming disgusting.,0.999039
2,"well, we can spare your life.",0.985068
3,"monkey, you have to wake up.",0.994215
4,I have orders to kill her.,0.999348
...,...,...
577771,"I thought American men were bad enough, but no...",0.999624
577772,You didn't know that Estelle had stolen some f...,0.000121
577773,It'il suck the life out of you!,0.996124
577774,"I can't fuckin' take that, bruv.",0.984538


In [6]:
print(list(data['text'])[:10])

['if Alkar floods her with her mental waste, it would explain the high levels of neurotransmitter.', "you're becoming disgusting.", 'well, we can spare your life.', 'monkey, you have to wake up.', 'I have orders to kill her.', "I'm not going to breed kids with a genetic disorder that makes them die.", "they're laughing at us. We'll show you.", "there wasn't much black in Maine then.", 'Briggs, what the hell is going on?', "another simply didn't know what to do, so whenever he met my brother, he nearly beat the shit out of him."]


In [7]:
lst = text2toxicity(list(data['text'])[:10], True)
print(lst[:2])

[0.99022484 0.24870145]


In [8]:
sh = list(zip(list(data['toxicity_score'])[:10], lst[:10]))
sh

[(0.9819834232330322, 0.99022484),
 (0.9990390539169312, 0.24870145),
 (0.98506760597229, 0.31828928),
 (0.9942149519920348, 0.6044049),
 (0.9993481040000916, 0.9975751),
 (0.0358464829623699, 0.3226593),
 (0.0001314068067586, 0.07216662),
 (0.148709550499916, 0.16785556),
 (0.8410708904266357, 0.23996061),
 (0.9304717183113098, 0.58641845)]

There are large difference, so I think I need train this model by adding new linear layers

Let's preprocess data to train our model

In [12]:
len(data)

1155552

I think I reduced data because if not, training will be very long

In [14]:
data=data.iloc[:200000, :]

In [15]:
sentences = np.array(list(data['text']))
targets = np.array(list(zip(list(data['toxicity_score']), list(1 - data['toxicity_score']))))
print(sentences[:2], targets[:2])

['if Alkar floods her with her mental waste, it would explain the high levels of neurotransmitter.'
 "you're becoming disgusting."] [[9.81983423e-01 1.80165768e-02]
 [9.99039054e-01 9.60946083e-04]]


In [20]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn

model_checkpoint = 'cointegrated/rubert-tiny-toxicity'
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

class CustomDataset(Dataset):
    def __init__(self, texts, labels=None, test=False):
        if not test and labels is None:
            raise Exception("You should passed labels when you use this dataset for training!")
        self.texts = texts
        self.labels = labels
        self.test = test

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = tokenizer(self.texts[idx], truncation=True, padding='max_length', max_length=128,
                             return_tensors='pt')
        res = {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
        }
        if not self.test:
            res['labels'] = torch.tensor(self.labels[idx])
        return res

dataset = CustomDataset(sentences, targets)
data_loader = DataLoader(dataset, batch_size=512, shuffle=True)

class CustomClassifier(nn.Module):
    def __init__(self, model, device):
        super(CustomClassifier, self).__init__()
        self.model = model
        self.classifier = nn.Sequential(
            nn.Linear(5, 5),
            nn.Linear(5, 2),
            nn.Softmax(dim=1)
        )
        self.classifier.to(device)

    def forward(self, input_ids, attention_mask):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        logits = self.classifier(outputs.logits)
        return logits


classifier = CustomClassifier(model, device)

criterion = nn.MSELoss()
optimizer = torch.optim.SGD(classifier.parameters(), lr=0.01, momentum=0.9)

for epoch in range(5):
    classifier.train()
    total_loss = 0.0
    for batch in data_loader:
        input_ids, attention_mask, labels = batch['input_ids'], batch['attention_mask'], batch['labels']

        input_ids, attention_mask, labels = input_ids.to(device), attention_mask.to(device), labels.to(device)

        optimizer.zero_grad()
        logits = classifier(input_ids, attention_mask).to(device)
        loss = criterion(logits.float(), labels.float())
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch + 1}, Loss: {total_loss / len(data_loader)}")
    save_path = f"evaluting_toxicity_model_{epoch}.pth"
    torch.save({
    'model_state_dict': classifier.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'epoch': epoch,
    'loss': total_loss / len(data_loader)
    }, save_path)

Epoch 1, Loss: 0.13481990650029438
Epoch 2, Loss: 0.10675954302329846
Epoch 3, Loss: 0.09861405799760843
Epoch 4, Loss: 0.09281198428872296
Epoch 5, Loss: 0.08917331304925177


In [21]:
# Example text to predict
texts_to_predict = sentences[200:202]

# Create a data loader for prediction
dataset = CustomDataset(texts_to_predict, test=True)
data_loader = DataLoader(dataset, batch_size=2, shuffle=False)

with torch.no_grad():
    for batch in data_loader:
        input_ids, attention_mask = batch['input_ids'], batch['attention_mask']
        if torch.cuda.is_available():
            input_ids, attention_mask = input_ids.to('cuda'), attention_mask.to('cuda')

        logits = classifier(input_ids, attention_mask)
        predictions = logits[:, 0]
print(texts_to_predict)
print(targets[200:202, 0])
print(predictions)

["you're still in trouble, aren't you?"
 'before you go, Officer, could you just kill the spider I caught under the cup?']
[0.00041122 0.07897831]
tensor([0.0379, 0.6970], device='cuda:0')


**Conclusion:** this is not working well :( So I will use the function `text2toxicity` that I coded above to estimate toxicity of the text.