In [None]:
# !pip install transformers # you may need to install transformers first

In [60]:
import re
import pandas as pd
import numpy as np
import json
import random

import torch
from tqdm import tqdm, trange
from sklearn.metrics import classification_report

In [77]:
# load data
df = pd.read_csv(" ") # path to the data
df.groupby('quarter').count()
df_q1 = df[(df['quarter']=='Q1_2020') | (df['quarter']=='Q1_2021') | (df['quarter']=='Q1_2022')]
df_q2 = df[(df['quarter']=='Q2_2020') | (df['quarter']=='Q2_2021') | (df['quarter']=='Q2_2022')]
df_q3 = df[(df['quarter']=='Q3_2020') | (df['quarter']=='Q3_2021') | (df['quarter']=='Q3_2022')]
df_q4 = df[(df['quarter']=='Q4_2020') | (df['quarter']=='Q4_2021') | (df['quarter']=='Q4_2022')]
# reset index for random sampling
df_q1.reset_index(inplace=True)
df_q2.reset_index(inplace=True)
df_q3.reset_index(inplace=True)
df_q4.reset_index(inplace=True)

In [3]:
# load the pre-trained model
import torch
from transformers import AutoModelForTokenClassification, AutoModelForSequenceClassification, AdamW, get_linear_schedule_with_warmup
from transformers import BertForTokenClassification, BertForSequenceClassification,BertPreTrainedModel, BertModel
import torch.nn as nn
import torch.nn.functional as F

class BertPooler(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.activation = nn.Tanh()

    def forward(self, hidden_states):
        # We "pool" the model by simply taking the hidden state corresponding
        # to the first token.
        first_token_tensor = hidden_states[:, 0]
        pooled_output = self.dense(first_token_tensor)
        pooled_output = self.activation(pooled_output)
        return pooled_output



class Model_Rational_Label(BertPreTrainedModel):
     def __init__(self,config):
        super().__init__(config)
        self.num_labels=2
        self.impact_factor=0.8
        self.bert = BertModel(config,add_pooling_layer=False)
        self.bert_pooler=BertPooler(config)
        self.token_dropout = nn.Dropout(0.1)
        self.token_classifier = nn.Linear(config.hidden_size, 2)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(config.hidden_size, self.num_labels)
        self.init_weights()
#         self.embeddings = AutoModelForTokenClassification.from_pretrained(params['model_path'], cache_dir=params['cache_path'])

     def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, attn=None, labels=None):
        outputs = self.bert(input_ids, attention_mask)
        # out = outputs.last_hidden_state
        out=outputs[0]
        logits = self.token_classifier(self.token_dropout(out))

        embed=self.bert_pooler(outputs[0])
        y_pred = self.classifier(self.dropout(embed))
        loss_token = None
        loss_label = None
        loss_total = None

        if attn is not None:
            loss_fct = nn.CrossEntropyLoss()
            # Only keep active parts of the loss
            if mask is not None:
                active_loss = mask.view(-1) == 1
                active_logits = logits.view(-1, 2)
                active_labels = torch.where(
                    active_loss, attn.view(-1), torch.tensor(loss_fct.ignore_index).type_as(attn)
                )
                loss_token = loss_fct(active_logits, active_labels)
            else:
                loss_token = loss_fct(logits.view(-1, 2), attn.view(-1))

            loss_total=self.impact_factor*loss_token

        if labels is not None:
            loss_funct = nn.CrossEntropyLoss()
            loss_logits =  loss_funct(y_pred.view(-1, self.num_labels), labels.view(-1))
            loss_label= loss_logits
            if(loss_total is not None):
                loss_total+=loss_label
            else:
                loss_total=loss_label
        if(loss_total is not None):
            return y_pred, logits, loss_total
        else:
            return y_pred, logits

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

### from models.py
# from models import *
tokenizer = AutoTokenizer.from_pretrained("Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two")
model = Model_Rational_Label.from_pretrained("Hate-speech-CNERG/bert-base-uncased-hatexplain-rationale-two")
model = model.to(device)

In [42]:
# set the dataloader
from torch.utils.data import Dataset, DataLoader
class HateDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        text = self.dataframe.loc[idx]['text']
        label = self.dataframe.loc[idx]['ground_truth']
        category = self.dataframe.loc[idx]['category']
        return text, label, category

def get_dataloader(train_data, test_data, batch_size):
    train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
    return train_dataloader, test_dataloader

In [27]:
# define the training function
def train(model, train_dataloader, optimizer, device):
    for batch in train_dataloader:
        model.train()
        optimizer.zero_grad()
        text, label, category = batch
        encoded_input = tokenizer(text, return_tensors='pt', padding=True, truncation=True).to(device)
        prediction_logits, _ = model(input_ids=encoded_input['input_ids'],attention_mask=encoded_input['attention_mask'])
        loss = F.cross_entropy(prediction_logits, label.to(device))
        loss.backward()
        optimizer.step()

# define the evaluation function
def evaluate(model, test_dataloader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in test_dataloader:
            text, label, category = batch
            encoded_input = tokenizer(text, return_tensors='pt', padding=True, truncation=True).to(device)
            prediction_logits, _ = model(input_ids=encoded_input['input_ids'],attention_mask=encoded_input['attention_mask'])
            prediction = prediction_logits.argmax(dim=1)
            correct += (prediction == label.to(device)).sum().item()
            total += len(label)
    return correct / total

In [30]:
# define the training process
def train_process(model, train_dataloader, test_dataloader, optimizer, device, epochs):
    train_acc = []
    test_acc = []
    for epoch in range(epochs):
        train(model, train_dataloader, optimizer, device)
        train_acc.append(evaluate(model, train_dataloader, device))
        test_acc.append(evaluate(model, test_dataloader, device))
        print('Epoch: {}, Train Accuracy: {}, Test Accuracy: {}'.format(epoch, train_acc[-1], test_acc[-1]))
    return train_acc, test_acc


In [29]:
# evaluate the model and save the predictions into a dataframe
def evaluate_and_save(model, test_dataloader, device):
    model.eval()
    predictions = []
    with torch.no_grad():
        for batch in test_dataloader:
            text, label, category = batch
            encoded_input = tokenizer(text, return_tensors='pt', padding=True, truncation=True).to(device)
            prediction_logits, _ = model(input_ids=encoded_input['input_ids'],attention_mask=encoded_input['attention_mask'])
            prediction = prediction_logits.argmax(dim=1)
            # convert to cpu
            label = label.cpu().numpy()
            prediction = prediction.cpu().numpy()
            # print(prediction, label)
            # save the predictions into a dict
            for i in range(len(prediction)):
                predictions.append({'text': text[i], 'label': label[i], 'prediction': prediction[i], 'category': category[i]})
    # save the predictions into a dataframe
    predictions_df = pd.DataFrame(predictions)
    return predictions_df

### Evaluate the 1st quarter

In [55]:
train_dataset = HateDataset(df_q1)
test_dataset = HateDataset(df_q1)
train_dataloader, test_dataloader = get_dataloader(train_dataset, test_dataset, batch_size=32)

In [56]:
df_q1result = evaluate_and_save(model, test_dataloader, device) # evaluate

In [67]:
df_q1result.groupby(['category']).count()

Unnamed: 0_level_0,text,label,prediction
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ageism,186,186,186
asian,179,179,179
mask,16,16,16
rus_ukr,158,158,158
us_capitol,311,311,311
vaccine,78,78,78


In [65]:
def calculate(dataframe):
    for cate in dataframe.category.unique().tolist():
        df_sub = dataframe[dataframe['category']==cate]
        print(cate)
        print(classification_report(df_sub['label'], df_sub['prediction'], digits=3))
        print('====================')

In [66]:
calculate(df_q1result)

vaccine
              precision    recall  f1-score   support

           0      0.727     0.980     0.835        49
           1      0.917     0.379     0.537        29

    accuracy                          0.756        78
   macro avg      0.822     0.679     0.686        78
weighted avg      0.798     0.756     0.724        78

asian
              precision    recall  f1-score   support

           0      0.626     0.968     0.760        95
           1      0.906     0.345     0.500        84

    accuracy                          0.676       179
   macro avg      0.766     0.657     0.630       179
weighted avg      0.757     0.676     0.638       179

ageism
              precision    recall  f1-score   support

           0      0.861     0.925     0.892       147
           1      0.607     0.436     0.507        39

    accuracy                          0.823       186
   macro avg      0.734     0.681     0.700       186
weighted avg      0.808     0.823     0.811       186

### Train with 1st quarter and evaluate the 2nd quarter

In [68]:
train_dataset = HateDataset(df_q1) # we use the whole q1 data to train the model
test_dataset = HateDataset(df_q2)
train_dataloader, test_dataloader = get_dataloader(train_dataset, test_dataset, batch_size=32)

In [69]:
# define the optimizer
from transformers import AdamW
optimizer = AdamW(model.parameters(), lr=1e-5)

# fine-tune the model with q1 data
train_acc, test_acc = train_process(model, train_dataloader, test_dataloader, optimizer, device, epochs=10)



Epoch: 0, Train Accuracy: 0.8793103448275862, Test Accuracy: 0.8051511758118701
Epoch: 1, Train Accuracy: 0.9256465517241379, Test Accuracy: 0.8241881298992161
Epoch: 2, Train Accuracy: 0.9655172413793104, Test Accuracy: 0.8208286674132139
Epoch: 3, Train Accuracy: 0.9881465517241379, Test Accuracy: 0.8219484882418813
Epoch: 4, Train Accuracy: 0.9989224137931034, Test Accuracy: 0.8163493840985442
Epoch: 5, Train Accuracy: 0.9989224137931034, Test Accuracy: 0.8118701007838746
Epoch: 6, Train Accuracy: 1.0, Test Accuracy: 0.8208286674132139
Epoch: 7, Train Accuracy: 1.0, Test Accuracy: 0.8118701007838746
Epoch: 8, Train Accuracy: 1.0, Test Accuracy: 0.8141097424412094
Epoch: 9, Train Accuracy: 1.0, Test Accuracy: 0.8174692049272116


In [70]:
df_q2result = evaluate_and_save(model, test_dataloader, device) # evaluate

In [71]:
df_q2result.groupby(['category']).count()

Unnamed: 0_level_0,text,label,prediction
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ageism,117,117,117
asian,296,296,296
mask,64,64,64
rus_ukr,190,190,190
us_capitol,112,112,112
vaccine,114,114,114


In [72]:
calculate(df_q2result)

ageism
              precision    recall  f1-score   support

           0      0.837     0.906     0.870        85
           1      0.680     0.531     0.596        32

    accuracy                          0.803       117
   macro avg      0.758     0.719     0.733       117
weighted avg      0.794     0.803     0.795       117

mask
              precision    recall  f1-score   support

           0      0.800     0.957     0.871        46
           1      0.778     0.389     0.519        18

    accuracy                          0.797        64
   macro avg      0.789     0.673     0.695        64
weighted avg      0.794     0.797     0.772        64

vaccine
              precision    recall  f1-score   support

           0      0.836     0.824     0.830        74
           1      0.683     0.700     0.691        40

    accuracy                          0.781       114
   macro avg      0.759     0.762     0.761       114
weighted avg      0.782     0.781     0.781       114


### Train with 2nd quarter and evaluate the 3rd quarter


In [73]:
train_dataset = HateDataset(df_q2) # we use the whole q1 data to train the model
test_dataset = HateDataset(df_q3)
train_dataloader, test_dataloader = get_dataloader(train_dataset, test_dataset, batch_size=32)

# fine-tune the model with q2 data
train_acc, test_acc = train_process(model, train_dataloader, test_dataloader, optimizer, device, epochs=10)
df_q3result = evaluate_and_save(model, test_dataloader, device) # evaluate

Epoch: 0, Train Accuracy: 0.8992161254199328, Test Accuracy: 0.8170731707317073
Epoch: 1, Train Accuracy: 0.9540873460246361, Test Accuracy: 0.8310104529616724
Epoch: 2, Train Accuracy: 0.975363941769317, Test Accuracy: 0.8353658536585366
Epoch: 3, Train Accuracy: 0.986562150055991, Test Accuracy: 0.8292682926829268
Epoch: 4, Train Accuracy: 0.9932810750279956, Test Accuracy: 0.8318815331010453
Epoch: 5, Train Accuracy: 0.9955207166853304, Test Accuracy: 0.8362369337979094
Epoch: 6, Train Accuracy: 0.9955207166853304, Test Accuracy: 0.8362369337979094
Epoch: 7, Train Accuracy: 0.9955207166853304, Test Accuracy: 0.8336236933797909
Epoch: 8, Train Accuracy: 0.9955207166853304, Test Accuracy: 0.8371080139372822
Epoch: 9, Train Accuracy: 0.9977603583426652, Test Accuracy: 0.8362369337979094


In [74]:
df_q3result.groupby(['category']).count()

Unnamed: 0_level_0,text,label,prediction
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ageism,114,114,114
asian,331,331,331
mask,249,249,249
rus_ukr,192,192,192
us_capitol,158,158,158
vaccine,104,104,104


In [75]:
calculate(df_q3result)

ageism
              precision    recall  f1-score   support

           0      0.831     0.873     0.852        79
           1      0.677     0.600     0.636        35

    accuracy                          0.789       114
   macro avg      0.754     0.737     0.744       114
weighted avg      0.784     0.789     0.786       114

asian
              precision    recall  f1-score   support

           0      0.847     0.824     0.836       148
           1      0.861     0.880     0.870       183

    accuracy                          0.855       331
   macro avg      0.854     0.852     0.853       331
weighted avg      0.855     0.855     0.855       331

mask
              precision    recall  f1-score   support

           0      0.879     0.918     0.898       182
           1      0.746     0.657     0.698        67

    accuracy                          0.847       249
   macro avg      0.812     0.787     0.798       249
weighted avg      0.843     0.847     0.844       249

v

### Train with 3nd quarter and evaluate the 4rd quarter

In [79]:
train_dataset = HateDataset(df_q3) # we use the whole q1 data to train the model
test_dataset = HateDataset(df_q4)
train_dataloader, test_dataloader = get_dataloader(train_dataset, test_dataset, batch_size=32)

# fine-tune the model with q2 data
train_acc, test_acc = train_process(model, train_dataloader, test_dataloader, optimizer, device, epochs=10)
df_q4result = evaluate_and_save(model, test_dataloader, device) # evaluate

Epoch: 0, Train Accuracy: 0.8841463414634146, Test Accuracy: 0.8040737148399612
Epoch: 1, Train Accuracy: 0.960801393728223, Test Accuracy: 0.8225024248302619
Epoch: 2, Train Accuracy: 0.9799651567944251, Test Accuracy: 0.8292919495635306
Epoch: 3, Train Accuracy: 0.990418118466899, Test Accuracy: 0.8322017458777885
Epoch: 4, Train Accuracy: 0.9939024390243902, Test Accuracy: 0.8292919495635306
Epoch: 5, Train Accuracy: 0.9956445993031359, Test Accuracy: 0.8166828322017459
Epoch: 6, Train Accuracy: 0.9982578397212544, Test Accuracy: 0.8234723569350145
Epoch: 7, Train Accuracy: 0.9982578397212544, Test Accuracy: 0.8244422890397672
Epoch: 8, Train Accuracy: 0.9991289198606271, Test Accuracy: 0.8215324927255092
Epoch: 9, Train Accuracy: 0.9991289198606271, Test Accuracy: 0.8263821532492726


In [80]:
df_q4result.groupby(['category']).count()

Unnamed: 0_level_0,text,label,prediction
category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ageism,161,161,161
asian,262,262,262
mask,199,199,199
rus_ukr,60,60,60
us_capitol,123,123,123
vaccine,226,226,226


In [82]:
calculate(df_q4result)

ageism
              precision    recall  f1-score   support

           0      0.772     0.726     0.748        84
           1      0.720     0.766     0.742        77

    accuracy                          0.745       161
   macro avg      0.746     0.746     0.745       161
weighted avg      0.747     0.745     0.745       161

mask
              precision    recall  f1-score   support

           0      0.865     0.761     0.810       109
           1      0.748     0.856     0.798        90

    accuracy                          0.804       199
   macro avg      0.806     0.809     0.804       199
weighted avg      0.812     0.804     0.804       199

vaccine
              precision    recall  f1-score   support

           0      0.890     0.866     0.878       149
           1      0.753     0.792     0.772        77

    accuracy                          0.841       226
   macro avg      0.821     0.829     0.825       226
weighted avg      0.843     0.841     0.842       226
