In [2]:
import pandas as pd
from tqdm import tqdm
from transformers import BertForMaskedLM, BertTokenizer, DataCollatorForLanguageModeling
from datasets import Dataset
import torch
import torch.nn as nn
from torch.utils.data import Dataset as TorchDataset, DataLoader

import sys
sys.path.append('..')
from src.data.preprocess import put_mask_with_classifier, get_toxicity
from src.models.predict import detoxificate_text_with_classifier
from src.models.train import train, train_classifier, evaluate_classifier
from src.models.classifier import ToxicWordsClassifier

import warnings
warnings.filterwarnings('ignore')

RANDOM_SEED = 1337
torch.manual_seed(RANDOM_SEED)

<torch._C.Generator at 0x189a5bce450>

In [4]:
model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForMaskedLM.from_pretrained(model_name)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [5]:
class ToxicWordsDataset(TorchDataset):
    def __init__(self, tokenizer):
        self.tokenizer = tokenizer

        positive_words = open('../data/interim/positive_words.txt').read().split('\n')
        toxic_words = open('../data/interim/toxic_words.txt').read().split('\n')

        toxic_words = [w for w in toxic_words if w.isalnum() and len(w) > 1]
        positive_words = [w for w in positive_words if w.isalnum() and len(w) > 1]

        self.texts = []
        self.labels = []

        for w in tqdm(toxic_words):
            word = self.tokenizer(w, add_special_tokens=False, max_length=1, truncation=True).input_ids
            self.texts.append(word[0])
            self.labels.append(1)

        for w in tqdm(positive_words):
            word = self.tokenizer(w, add_special_tokens=False, max_length=1, truncation=True).input_ids
            self.texts.append(word[0])
            self.labels.append(0)

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return self.texts[idx], self.labels[idx]
    
dataset = ToxicWordsDataset(tokenizer)

100%|██████████| 5017/5017 [00:00<00:00, 7707.56it/s]
100%|██████████| 1904/1904 [00:00<00:00, 7294.50it/s]


In [6]:
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=True)

In [7]:
toxicity_classifier = ToxicWordsClassifier(vocab_size=dataset.tokenizer.vocab_size, embedding_dim=512, dropout=0.1)

In [28]:
EPOCHS = 20

optimizer = torch.optim.Adam(toxicity_classifier.parameters(), lr=1e-3)
criterion = nn.BCELoss()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

best_loss = 1e9

for epoch in range(EPOCHS):
    train_classifier(epoch, toxicity_classifier, optimizer, criterion, train_dataloader, device)
    loss = evaluate_classifier(epoch, toxicity_classifier, criterion, val_dataloader, device)
    if loss < best_loss:
        best_loss = loss
        torch.save(toxicity_classifier.state_dict(), '../models/toxicity_classifier.pth')

Epoch: 0, Loss: 0.55398, Acc: 0.49670: 100%|██████████| 196/196 [00:02<00:00, 80.93it/s]
	Epoch: 0, Loss: 0.52563, Acc: 0.58757: 100%|██████████| 22/22 [00:00<00:00, 297.28it/s]
Epoch: 1, Loss: 0.43307, Acc: 0.69169: 100%|██████████| 196/196 [00:02<00:00, 92.75it/s]
	Epoch: 1, Loss: 0.50799, Acc: 0.66150: 100%|██████████| 22/22 [00:00<00:00, 289.45it/s]
Epoch: 2, Loss: 0.33611, Acc: 0.78642: 100%|██████████| 196/196 [00:02<00:00, 95.28it/s]
	Epoch: 2, Loss: 0.53309, Acc: 0.69188: 100%|██████████| 22/22 [00:00<00:00, 301.35it/s]
Epoch: 3, Loss: 0.26854, Acc: 0.84571: 100%|██████████| 196/196 [00:02<00:00, 92.02it/s]
	Epoch: 3, Loss: 0.56389, Acc: 0.71542: 100%|██████████| 22/22 [00:00<00:00, 265.04it/s]
Epoch: 4, Loss: 0.23178, Acc: 0.87044: 100%|██████████| 196/196 [00:02<00:00, 91.84it/s]
	Epoch: 4, Loss: 0.57273, Acc: 0.71542: 100%|██████████| 22/22 [00:00<00:00, 301.35it/s]
Epoch: 5, Loss: 0.20883, Acc: 0.88552: 100%|██████████| 196/196 [00:02<00:00, 93.46it/s]
	Epoch: 5, Loss: 0.56

In [31]:
toxic_word = 'buttcheeks'
non_toxic_word = 'university'

toxicity_classifier.load_state_dict(torch.load('../models/toxicity_classifier.pth'))

print(f'Word: {toxic_word}, Toxicity: {get_toxicity(toxic_word, tokenizer, toxicity_classifier)}')
print(f'Word: {non_toxic_word}, Toxicity: {get_toxicity(non_toxic_word, tokenizer, toxicity_classifier)}')

Word: buttcheeks, Toxicity: 0.8885523676872253
Word: university, Toxicity: 0.5311500430107117


In [32]:
df = pd.read_csv('../data/interim/train.csv')
toxic_sentences = df['reference'].tolist()
non_toxic_sentences = df['translation'].tolist()

data = []
labels = []

for i in tqdm(range(len(toxic_sentences))):
    toxic_sentences[i] = put_mask_with_classifier(toxic_sentences[i], tokenizer, toxicity_classifier)
    if '[MASK]' in toxic_sentences[i]:
        data.append(toxic_sentences[i])
        labels.append(non_toxic_sentences[i])

dataset = Dataset.from_dict({"text": data, "labels": labels})

100%|██████████| 101535/101535 [08:05<00:00, 209.28it/s]


In [33]:
MAX_LEN = 128

def group_texts(examples):
    inputs = [ex for ex in examples['text']]
    target = [ex for ex in examples['labels']]

    batch = tokenizer(inputs, padding='max_length', max_length=MAX_LEN, truncation=True, return_tensors='pt')
    batch["labels"] = tokenizer(target, padding='max_length', max_length=MAX_LEN, truncation=True, return_tensors='pt').input_ids

    return batch

dataset = dataset.map(group_texts, batched=True)

Map:   0%|          | 0/98230 [00:00<?, ? examples/s]

In [34]:
train_size = int(0.9 * len(dataset))
val_size = len(dataset) - train_size
train_dataset = dataset.select(range(train_size))
val_dataset = dataset.select(range(train_size, train_size + val_size))

In [35]:
train('maskedlm_with_classifier', 
      model, 
      tokenizer, 
      train_dataset, 
      val_dataset, 
      data_collator,
      batch_size=16, 
      epochs=1,
      seed=RANDOM_SEED
)

  0%|          | 0/5526 [00:00<?, ?it/s]

{'loss': 3.2831, 'learning_rate': 1.819037278320666e-05, 'epoch': 0.09}
{'loss': 3.0637, 'learning_rate': 1.638074556641332e-05, 'epoch': 0.18}
{'loss': 3.0873, 'learning_rate': 1.4571118349619979e-05, 'epoch': 0.27}
{'loss': 2.9544, 'learning_rate': 1.2761491132826638e-05, 'epoch': 0.36}
{'loss': 2.9274, 'learning_rate': 1.0951863916033298e-05, 'epoch': 0.45}
{'loss': 2.9715, 'learning_rate': 9.142236699239957e-06, 'epoch': 0.54}
{'loss': 2.8911, 'learning_rate': 7.332609482446616e-06, 'epoch': 0.63}
{'loss': 2.8747, 'learning_rate': 5.5229822656532765e-06, 'epoch': 0.72}
{'loss': 2.8676, 'learning_rate': 3.7133550488599353e-06, 'epoch': 0.81}
{'loss': 2.8604, 'learning_rate': 1.9037278320665944e-06, 'epoch': 0.9}
{'loss': 2.8661, 'learning_rate': 9.410061527325373e-08, 'epoch': 1.0}


  0%|          | 0/614 [00:00<?, ?it/s]

{'eval_loss': 2.749858856201172, 'eval_runtime': 157.18, 'eval_samples_per_second': 62.495, 'eval_steps_per_second': 3.906, 'epoch': 1.0}
{'train_runtime': 3724.7467, 'train_samples_per_second': 23.735, 'train_steps_per_second': 1.484, 'train_loss': 2.9680513250107268, 'epoch': 1.0}


In [37]:
import random

random.seed(RANDOM_SEED)

best_model = BertForMaskedLM.from_pretrained("../models/bert_maskedlm")
tokenizer = BertTokenizer.from_pretrained("../models/bert_maskedlm")

random_toxic_sentences = random.sample(df['reference'].tolist(), 10)

for sentence in random_toxic_sentences:
    print(f'Original: {sentence}')
    print(f'Masked: {put_mask_with_classifier(sentence, tokenizer, toxicity_classifier)}')
    print(f'Detoxified: {detoxificate_text_with_classifier(sentence, tokenizer, best_model, toxicity_classifier)}')
    print()

Original: Suddenly, to the delight and outrage of the congregation, a raucous saxophone broke the solemnity, and a jazz rendering of "Fools Rush In" was blaring over the loudspeakers.
Masked: suddenly, to [MASK] delight and outrage of [MASK] [MASK] a raucous saxophone [MASK] [MASK] [MASK] and a jazz rendering of [MASK] rush [MASK] was [MASK] [MASK] [MASK] loudspeakers.
Detoxified: suddenly, to a delight and outrage of all, a raucous saxophone struck a, and a jazz rendering of a rush hour was played on on loudspeakers.

Original: This place is such a dump.
Masked: this [MASK] [MASK] such a [MASK]
Detoxified: this guy not such a pain

Original: Doesn't mean a damn thing!
Masked: doesn't mean a [MASK] thing!
Detoxified: doesn't mean a damned thing!

Original: I’m just going to have to find someone to cover for my ass first.’
Masked: i’m just going to have to find someone to [MASK] [MASK] [MASK] [MASK] first.’
Detoxified: i ’ m just going to have to find someone to kick me to to first. ’

