In [190]:
import torch
import pandas as pd
from tqdm import tqdm
from transformers import BertForMaskedLM, BertTokenizer, DataCollatorForLanguageModeling
from torch.utils.data import Dataset, DataLoader

In [191]:
class ToxicWordsDataset(Dataset):
    def __init__(self):
        model_name = "bert-base-uncased"
        self.tokenizer = BertTokenizer.from_pretrained(model_name)

        positive_words = open('../data/external/positive_words.txt').read().split('\n')
        negative_words = open('../data/external/negative_words.txt').read().split('\n')
        toxic_words = open('../data/external/toxic_words.txt').read().split('\n')
        toxic_words.extend(negative_words)

        toxic_words = [w for w in toxic_words if w.isalnum() and len(w) > 1]
        positive_words = [w for w in positive_words if w.isalnum() and len(w) > 1]

        self.texts = []
        self.labels = []

        for w in tqdm(toxic_words):
            word = self.tokenizer(w, add_special_tokens=False, max_length=1, truncation=True).input_ids
            self.texts.append(word[0])
            self.labels.append(1)

        for w in tqdm(positive_words):
            word = self.tokenizer(w, add_special_tokens=False, max_length=1, truncation=True).input_ids
            self.texts.append(word[0])
            self.labels.append(0)

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return self.texts[idx], self.labels[idx]

In [192]:
dataset = ToxicWordsDataset()

  0%|          | 0/5038 [00:00<?, ?it/s]

100%|██████████| 5038/5038 [00:00<00:00, 6326.00it/s]
100%|██████████| 1904/1904 [00:00<00:00, 7436.92it/s]


In [193]:
for x, y in dataset:
    print(x, y)
    break

1018 1


In [194]:
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=True)

In [195]:
import torch.nn as nn

class ToxicWordsClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, dropout=0.1):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(embedding_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embedding(x)
        x = self.dropout(x)
        x = self.fc(x)
        x = self.sigmoid(x)
        return x
    
model = ToxicWordsClassifier(dataset.tokenizer.vocab_size, 512)

In [196]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.BCELoss()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [197]:
def train(epoch, model, optimizer, criterion, train_dataloader, device):
    model.to(device)
    model.train()
    progress_bar = tqdm(train_dataloader)
    for batch in progress_bar:
        optimizer.zero_grad()
        x, y = batch
        x, y = x.to(device), y.reshape(-1, 1).to(device)
        y_hat = model(x)
        loss = criterion(y_hat, y.float())
        loss.backward()
        optimizer.step()

        acc = ((y_hat > 0.7) == y).sum().item() / len(y)

        progress_bar.set_description(f'Epoch: {epoch}, Loss: {loss.item():.4f}, Acc: {acc:.4f}')

def evaluate(epoch, model, criterion, eval_loader, device):
    model.to(device)
    model.eval()
    progress_bar = tqdm(eval_loader)
    with torch.no_grad():
        for batch in progress_bar:
            x, y = batch
            x, y = x.to(device), y.reshape(-1, 1).to(device)
            y_hat = model(x)
            loss = criterion(y_hat, y.float())

            acc = ((y_hat > 0.7) == y).sum().item() / len(y)

            progress_bar.set_description(f'\tEpoch: {epoch}, Loss: {loss.item():.4f}, Acc: {acc:.4f}')

In [198]:
EPOCHS = 5

for epoch in range(EPOCHS):
    train(epoch, model, optimizer, criterion, train_dataloader, device)
    evaluate(epoch, model, criterion, val_dataloader, device)

  0%|          | 0/196 [00:00<?, ?it/s]

Epoch: 0, Loss: 0.5683, Acc: 0.4286: 100%|██████████| 196/196 [00:02<00:00, 91.85it/s] 
	Epoch: 0, Loss: 0.5094, Acc: 0.4783: 100%|██████████| 22/22 [00:00<00:00, 392.83it/s]
Epoch: 1, Loss: 0.2785, Acc: 0.5714: 100%|██████████| 196/196 [00:01<00:00, 105.31it/s]
	Epoch: 1, Loss: 0.4651, Acc: 0.6087: 100%|██████████| 22/22 [00:00<00:00, 354.80it/s]
Epoch: 2, Loss: 0.4385, Acc: 0.7143: 100%|██████████| 196/196 [00:01<00:00, 106.26it/s]
	Epoch: 2, Loss: 0.4685, Acc: 0.6087: 100%|██████████| 22/22 [00:00<00:00, 333.30it/s]
Epoch: 3, Loss: 0.5971, Acc: 0.5714: 100%|██████████| 196/196 [00:01<00:00, 105.81it/s]
	Epoch: 3, Loss: 0.3472, Acc: 0.8261: 100%|██████████| 22/22 [00:00<00:00, 297.27it/s]
Epoch: 4, Loss: 0.2484, Acc: 0.8571: 100%|██████████| 196/196 [00:01<00:00, 106.86it/s]
	Epoch: 4, Loss: 0.3299, Acc: 0.8261: 100%|██████████| 22/22 [00:00<00:00, 342.37it/s]
Epoch: 5, Loss: 0.2532, Acc: 0.8571: 100%|██████████| 196/196 [00:01<00:00, 98.57it/s] 
	Epoch: 5, Loss: 0.6097, Acc: 0.7391:

In [199]:
word = 'obama'
encoded = dataset.tokenizer(word, add_special_tokens=False, max_length=1, truncation=True).input_ids[0]
encoded = torch.tensor(encoded).to(device)
model.eval()
with torch.no_grad():
    toxic_chance = model(encoded)
print(toxic_chance.item())

0.6940274834632874


In [200]:
def get_toxicity(word):
    encoded = dataset.tokenizer(word, add_special_tokens=False, max_length=1, truncation=True).input_ids[0]
    encoded = torch.tensor(encoded).to(device)
    model.eval()
    with torch.no_grad():
        toxic_chance = model(encoded)
    return toxic_chance.item()

In [201]:
df = pd.read_csv('../data/raw/filtered.tsv', sep='\t')
sents = df[(df['similarity'] < 0.7) & (df['ref_tox'] > df['trn_tox'])]
sents = sents[['reference', 'translation']]
toxic_sentences = sents['reference'].tolist()
non_toxic_sentences = sents['translation'].tolist()

In [206]:
def replace_toxic_words(text):
    text = text.lower().split()
    for i in range(len(text)):
        toxic_chance = get_toxicity(text[i])
        if toxic_chance > 0.6:
            text[i] = "[MASK]"
    return ' '.join(text)

In [203]:
model_name = "../models/bert_maskedlm"
tokenizer = BertTokenizer.from_pretrained(model_name)
masked_model = BertForMaskedLM.from_pretrained(model_name)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer)

In [207]:
def detoxificate_text(text, masked_model, tokenizer):
    test_input = tokenizer(replace_toxic_words(text), padding='max_length', max_length=128, truncation=True, return_tensors='pt')
    input_ids = test_input.input_ids
    with torch.no_grad():
        output = masked_model(**test_input)
    mask_idxs = torch.where(test_input['input_ids'][0] == tokenizer.mask_token_id)
    mask_token_logits = output.logits[0, mask_idxs[0]]
    top_tokens = torch.topk(mask_token_logits, 100, dim=1).indices.tolist()
    for i in range(len(top_tokens)):
        for token in top_tokens[i]:
            if get_toxicity(tokenizer.decode([token])) < 0.6:
                input_ids[0][mask_idxs[0][i]] = token
                break

    non_toxic_text = tokenizer.decode(input_ids[0]).replace('[CLS]', '').replace('[SEP]', '').replace('[PAD]', '').strip()

    return non_toxic_text

In [208]:
import random

random.seed(1337)

masked_model = BertForMaskedLM.from_pretrained("../models/bert_maskedlm")
tokenizer = BertTokenizer.from_pretrained("../models/bert_maskedlm")

random_toxic_sentences = random.sample(toxic_sentences, 10)

for sentence in random_toxic_sentences:
    print(sentence)
    print(detoxificate_text(sentence, masked_model, tokenizer))
    print()

Suddenly, to the delight and outrage of the congregation, a raucous saxophone broke the solemnity, and a jazz rendering of "Fools Rush In" was blaring over the loudspeakers.
suddenly, his of delight,, of ",,, saxophone,,,,,, e of " fools rush " was very ing ed.

This place is such a dump.
i'was such luck.

Doesn't mean a damn thing!
doesn't say any ing thing!

I’m just going to have to find someone to cover for my ass first.’
i ’ m probably going gonna have her find out,, ing ing hole.

He is a walking dead man with no will of his own.
i'will ed ed me with that will of his.

You're such a jerk.
what such luck.

I'm jacking off.
i'm serious.

I may puke.
i will.

You gotta be shitting me.
that'ing ed me.

You're a great liar, Dad.
what, great idea.

