<a href="https://colab.research.google.com/github/SumitNawathe/HateSpeechModel/blob/main/Final_Evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installs/Upgrades

In [None]:
!nvidia-smi

In [None]:
!pip install --upgrade pip

In [None]:
!pip install torch
!pip install pytorch-lightning
!pip install transformers

In [None]:
!pip install --upgrade pandas
!pip3 install pickle5

In [None]:
!pip install -U sentence-transformers

In [None]:
import pickle5 as pickle
import pandas as pd
import numpy as np

from tqdm.auto import tqdm

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

#from transformers import BertTokenizerFast as BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup

import pytorch_lightning as pl
from torchmetrics.functional import accuracy, f1, auroc
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, multilabel_confusion_matrix

import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc

%matplotlib inline
%config InlineBackend.figure_format='retina'

RANDOM_SEED = 42

sns.set(style='whitegrid', palette='muted', font_scale=1.2)
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]
sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))
rcParams['figure.figsize'] = 12, 8

pl.seed_everything(RANDOM_SEED)

import gc
from sentence_transformers import SentenceTransformer
from transformers import AdamW

## Loading Data/Model

In [None]:
with open('hatespeech_df_encodedv2.pickle', 'rb') as hatespeech_df_file:
  hatespeech_df = pickle.load(hatespeech_df_file)
hatespeech_df.shape, hatespeech_df.keys()

((23055, 16),
 Index(['text', 'jew', 'language', 'race', 'black', 'immigrant', 'other_race',
        'religion', 'muslim', 'gender', 'women', 'lgbt', 'disability',
        'not_hate', 'asian', 'encoding'],
       dtype='object'))

In [None]:
# temp fix, remove after running
hatespeech_df.loc[hatespeech_df[hatespeech_df['immigrant'] == 1].index, 'race'] = 1

In [None]:
MAX_TOKEN_COUNT = 768
MODEL_NAME = 'sentence-transformers/LaBSE'
LABEL_COLUMNS = ['race', 'asian', 'black', 'immigrant', 'other_race', 'religion', 
                 'jew', 'muslim', 'gender', 'women', 'lgbt', 'disability', 'not_hate']

In [None]:
train_df, val_df = train_test_split(hatespeech_df, test_size=0.10)
train_df.shape, val_df.shape

((20749, 16), (2306, 16))

In [None]:
class ToxicCommentsDataset(Dataset):
  def __init__(
      self,
      data: pd.DataFrame,
      max_token_len: int = MAX_TOKEN_COUNT
  ):
    self.data = data
    self.max_token_len = max_token_len
  
  def __len__(self):
    return len(self.data)
  
  # called methods for indexing []
  def __getitem__(self, index: int):
    data_row = self.data.iloc[index]
    text = data_row.text
    encoded = data_row.encoding
    labels = data_row[LABEL_COLUMNS]

    # returns all multiple aspects of data separately as dict
    return dict(
        text = text,
        encoded = encoded,
        labels = torch.FloatTensor(labels)
    )

In [None]:
class ToxicCommentDataModule(pl.LightningDataModule):
  def __init__(self, train_df, test_df, batch_size=10):
    super().__init__()
    self.batch_size = batch_size
    self.train_df = train_df
    self.test_df = test_df
  
  # sets up datasets from raw data
  def setup(self, stage=None):
    self.train_dataset = ToxicCommentsDataset(self.train_df)
    self.test_dataset = ToxicCommentsDataset(self.test_df)
  
  # returns dataloaders which are iterable, sequential access to elements in batches
  
  def train_dataloader(self):
    return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=0, drop_last=True)
  
  def val_dataloader(self):
    return DataLoader(self.test_dataset, batch_size=self.batch_size, num_workers=0, drop_last=True)
  
  def test_dataloader(self):
    return DataLoader(self.test_dataset, batch_size=self.batch_size, num_workers=0, drop_last=True)

In [None]:
class ToxicCommentTagger(pl.LightningModule):
  def __init__(self, input_dim, n_classes):
    super().__init__()

    self.classifier = nn.Sequential(
        nn.Linear(768, 8192),
        nn.ReLU(inplace=True),
        nn.Linear(8192, 4096),
        nn.ReLU(inplace=True),
        nn.Linear(4096, 2048),
        nn.ReLU(inplace=True),
        nn.Linear(2048, 1024),
        nn.ReLU(inplace=True),
        nn.Linear(1024, n_classes),
    )
    self.criterion = nn.BCELoss()
  
  def forward(self, encoded, labels=None):
    output = self.classifier(encoded)
    output = torch.sigmoid(output)    
    loss = 0
    if labels is not None:
        loss = self.criterion(output, labels)
    return loss, output
  
  # runs each type of data through model

  def training_step(self, batch, batch_idx):
    encoded = batch["encoded"]
    labels = batch["labels"]
    loss, outputs = self(encoded, labels)
    self.log("train_loss", loss, prog_bar=True, logger=True)
    return {"loss": loss, "predictions": outputs, "labels": labels}
  
  def validation_step(self, batch, batch_idx):
    encoded = batch["encoded"]
    labels = batch["labels"]
    loss, outputs = self(encoded, labels)
    self.log("val_loss", loss, prog_bar=True, logger=True)
    return loss
  
  def test_step(self, batch, batch_idx):
    encoded = batch["encoded"]
    labels = batch["labels"]
    loss, outputs = self(encoded, labels)
    self.log("test_loss", loss, prog_bar=True, logger=True)
    return loss
  
  # evaluates results of model at end of epoch
  def training_epoch_end(self, outputs):
    # boilerplate to get labels and predictions out of model pipeline
    labels = []
    predictions = []
    for output in outputs:
      # detach() takes each out of pipeline, cpu() moves data to cpu
      for out_labels in output["labels"].detach().cpu():
        labels.append(out_labels)
      for out_predictions in output["predictions"].detach().cpu():
        predictions.append(out_predictions)
    
    labels = torch.stack(labels).int()
    predictions = torch.stack(predictions)

    for i, name in enumerate(LABEL_COLUMNS):
      if name not in ['race', 'religion', 'gender', 'disability', 'not_hate']:
        continue
      # auroc = area under reciever operating characteristic,
      # metric used to evaluate classification models
      class_roc_auc = auroc(predictions[:, i], labels[:, i])
      # logs results
      self.logger.experiment.add_scalar(f"{name}_roc_auc/Train", class_roc_auc, self.current_epoch)
  
  # uses AdamW optimizer, schedule adjusts learning rate during training
  def configure_optimizers(self):
    optimizer = AdamW(self.parameters(), lr=6e-5)
    return dict(
        optimizer=optimizer
    )

In [None]:
with open('trained_model_larger.pickle', 'rb') as saved_model_file:
  saved_model = pickle.load(saved_model_file)
trained_model = saved_model

NameError: ignored

## Heuristic Testing

In [None]:
labse = SentenceTransformer('sentence-transformers/LaBSE')

In [None]:
comment = '돈이전부는 아니라지만 첫째가 돈이 먼저지 이세상 에는 돈으로 세상을사니ㅠ'
test_comment = train_df['text'].str.contains(comment, regex=False)
print (test_comment)
num = 0
for index, value in test_comment.items():
  if (value is True):
    print(index)
    print(train_df.loc[index].text)
    num+=1
print(num)
print()

20591                          False
16434                          False
3507                           False
4406976_gab                    False
10141                          False
                               ...  
1178762932198481922_twitter    False
28460                          False
8999391_gab                    False
12553                          False
20050                          False
Name: text, Length: 20749, dtype: bool
2099
2099             돈이전부는 아니라지만 첫째가 돈이 먼저지 이세상 에는 돈으로 세상을사니ㅠ
2099    There are a few things which i will definitely...
Name: text, dtype: object
1



In [None]:
test_comment = train_df.loc[5293].text
print(test_comment)
encoding = labse.encode(test_comment)
_, test_prediction = trained_model(torch.tensor(encoding))
test_prediction = test_prediction.flatten().numpy()
classes = []
highest = 0
highestClass = ''
for label, prediction in zip(LABEL_COLUMNS, test_prediction):
  print(f"{label}: {prediction}")
  if prediction > highest and label is not 'not_hate':
    highest = prediction
    highestClass = label
  if prediction > 0.5:
    classes.append(label)
  else:
    accepted = ['asian', 'other_race', 'jew', 'disability', 'muslim', 'lgbt']
    if (label in accepted and prediction > 0.3):
      classes.append(label)

  if label == 'not_hate' and prediction > 0.45:
    classes.clear()
  elif label == 'not_hate' and prediction <= .45 and highestClass not in classes:
    classes.append(highestClass)

print("classes: ")
print(classes)

if test_prediction[-1] > 0.45:
  print("not hate")
else:
  print("hateful")

UK diversity is a one of its main strengths. Immigrants are resilient, hardworking and outgoing. I particularly love when they live in immigrant-neighbourhoods and do not mix up with the rest.
race: 0.9113994240760803
asian: 0.02068265713751316
black: 0.007221808657050133
immigrant: 0.9150980710983276
other_race: 0.051347944885492325
religion: 0.004964934661984444
jew: 0.00026572486967779696
muslim: 0.00519524235278368
gender: 5.032298213336617e-05
women: 1.2493173926486634e-05
lgbt: 5.8734643971547484e-05
disability: 0.0033514583483338356
not_hate: 0.119871124625206
classes: 
['race', 'immigrant']
hateful


In [None]:
trained_model.summarize()

  """Entry point for launching an IPython kernel.
  "Argument `mode` in `LightningModule.summarize` is deprecated in v1.4"


  | Name       | Type       | Params
------------------------------------------
0 | classifier | Sequential | 50.4 M
1 | criterion  | BCELoss    | 0     
------------------------------------------
0         Trainable params
50.4 M    Non-trainable params
50.4 M    Total params
201.441   Total estimated model params size (MB)

## Rigorous Testing

We can calculate the accuracy of the module using the validation set.

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
trained_model = trained_model.to(device)

val_dataset = ToxicCommentsDataset(
  val_df,
  max_token_len=MAX_TOKEN_COUNT
)

predictions = []
labels = []

for item in tqdm(val_dataset):
  _, prediction = trained_model(torch.tensor(item["encoded"]).unsqueeze(dim=0).to(device) )
  predictions.append(prediction.flatten())
  labels.append(item["labels"].int())

predictions = torch.stack(predictions).detach().cpu()
labels = torch.stack(labels).detach().cpu()

accuracy(predictions, labels, threshold=0.5)

  0%|          | 0/2306 [00:00<?, ?it/s]

tensor(0.9197)

We can calculate the AUROC for each label.

In [None]:
print("AUROC per tag")
for i, name in enumerate(LABEL_COLUMNS):
  tag_auroc = auroc(predictions[:, i], labels[:, i], pos_label=1)
  print(f"{name}: {tag_auroc}")

AUROC per tag
race: 0.9039750695228577
asian: 0.9636693596839905
black: 0.9272775650024414
immigrant: 0.9697491526603699
other_race: 0.8894108533859253
religion: 0.9588590860366821
jew: 0.9614421129226685
muslim: 0.9733967781066895
gender: 0.9205159544944763
women: 0.9363411664962769
lgbt: 0.9306183457374573
disability: 0.9156815409660339
not_hate: 0.8065183758735657


Finally, we can create a classification report .

In [None]:
y_pred = predictions.numpy()
y_true = labels.numpy()
upper, lower = 1, 0
y_pred = np.where(y_pred > 0.5, upper, lower)
print(classification_report(
  y_true,
  y_pred,
  target_names=LABEL_COLUMNS,
  zero_division=0
))

              precision    recall  f1-score   support

        race       0.67      0.79      0.72       593
       asian       0.64      0.65      0.64       104
       black       0.67      0.69      0.68       242
   immigrant       0.66      0.69      0.67        94
  other_race       0.62      0.47      0.54       155
    religion       0.85      0.76      0.80       484
         jew       0.83      0.65      0.73       220
      muslim       0.84      0.77      0.80       252
      gender       0.72      0.77      0.74       621
       women       0.68      0.65      0.66       269
        lgbt       0.71      0.70      0.71       361
  disability       0.61      0.23      0.33        61
    not_hate       0.64      0.49      0.56       598

   micro avg       0.71      0.68      0.70      4054
   macro avg       0.70      0.64      0.66      4054
weighted avg       0.71      0.68      0.69      4054
 samples avg       0.67      0.66      0.66      4054

