In [1]:
from tqdm.auto import tqdm
from torch import nn
import torch

# my imports
import sys
sys.path.insert(1, '../')
from src.data.load_data import load_data
from src.models.roberta_toxicity_classifier import RTCModel
from src.models.t5_toxicity_evaluator import T5TEModel

In [2]:
model_t5 = T5TEModel('../models/last_toxic_regressor/model.pt')
model_rtc = RTCModel()

Some weights of the model checkpoint at SkolkovoInstitute/roberta_toxicity_classifier were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
df = load_data(path='../data/raw/filtered.tsv', flatten=True)

In [4]:
class ToxicDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe):
        self.raw_data = dataframe

        self.texts = dataframe['text'].tolist()
        self.targets = dataframe['toxicity'].tolist()

    def __getitem__(self, idx):
        return self.texts[idx], self.targets[idx]

    def __len__(self):
        return len(self.texts)

In [5]:
dataset = ToxicDataset(df)

In [6]:
for text, target in tqdm(dataset, total=len(dataset)):
    t5_output = model_t5(text)
    rtc_output = model_rtc(text)

    t5_toxic = t5_output > 0.5
    rtc_toxic = rtc_output[1] > rtc_output[0]

    if t5_toxic ^ rtc_toxic:
        print(f'`{text}`: T5 output {t5_output} vs. RTC: {rtc_output}')

  0%|          | 0/1155554 [00:00<?, ?it/s]

`I'm not gonna have a child... ...with the same genetic disorder as me who's gonna die. L...`: T5 output 0.9804766178131104 vs. RTC: [1.0820412635803223, -0.6436259746551514]
`Briggs, what the hell is going on?`: T5 output 0.8802444338798523 vs. RTC: [-0.056935735046863556, -0.12763941287994385]
`So now their spirits are cursed, walking back roads, waterways, and if they find an unfaithful man, they kill him, and that man is never seen again.`: T5 output 0.8547601103782654 vs. RTC: [1.084721326828003, -1.0755829811096191]
`The guy is totally irresponsible.`: T5 output 0.4396005868911743 vs. RTC: [-0.6044259667396545, 0.5645673274993896]
`I can't believe Marcie's gone crazy.`: T5 output 0.7892584204673767 vs. RTC: [0.35944440960884094, -0.2329864203929901]
`If it were, his new press secretary would not be describing himself as “dopey.”`: T5 output 0.22222629189491272 vs. RTC: [-1.2555899620056152, 1.2892508506774902]
`Travis, someone's getting high over here-- he's going nuts.`: T5 outp

KeyboardInterrupt: 

* threshold: 0.5; discrepancy: 13.1%

In [7]:
dataloader = torch.utils.data.DataLoader(dataset, batch_size=16, shuffle=False, collate_fn=model.collate_batch)

total_discrepancy = 0
total = 0
toxicity_threshold = 0.5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
with tqdm(dataloader, total=len(dataloader), desc='Evaluating') as pb:
    for samples in pb:
        logits = model(samples)

        roberta_toxic = logits[:, 1] > logits[:, 0]
        label_toxic = samples['labels'].to(device) > toxicity_threshold

        total_discrepancy += torch.sum(roberta_toxic ^ label_toxic).item()
        total += len(samples.input_ids)

        pb.set_postfix({'Discrepancy': total_discrepancy / total})

Evaluating:   0%|          | 0/36112 [00:00<?, ?it/s]

KeyboardInterrupt: 