In [1]:
! pip install transformers datasets


Collecting datasets
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Collecting pyarrow-hotfix (from datasets)
  Downloading pyarrow_hotfix-0.6-py3-none-any.whl (7.9 kB)
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyarrow-hotfix, dill, multiprocess, datasets
Successfully installed datasets-2.15.0 dill-0.3.7 multiprocess-0.70.15 pyarrow-hotfix-0.6


In [2]:
! pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99


In [4]:
import torch
torch.cuda.empty_cache()

# Confirm that the GPU is detected

assert torch.cuda.is_available()

# Get the GPU device name.
device_name = torch.cuda.get_device_name()
n_gpu = torch.cuda.device_count()
print(f"Found device: {device_name}, n_gpu: {n_gpu}")
device = torch.device("cuda")

Found device: Tesla T4, n_gpu: 1


In [5]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [6]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [7]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch import optim
import sys
import random
import math
import time
from tqdm import tqdm
from sklearn.metrics import precision_recall_fscore_support, f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import ConfusionMatrixDisplay

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

from transformers import BertTokenizer, AutoTokenizer
from transformers import BertModel, AutoModel, AutoModelForSequenceClassification
from transformers import AdamW, get_linear_schedule_with_warmup

from torch.utils.tensorboard import SummaryWriter

use_cuda = True if torch.cuda.is_available() else False
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# use_cuda=False
# device='cpu'

torch.autograd.set_detect_anomaly(True)
torch.backends.cudnn.benchmark = True
np.random.seed(0)
torch.manual_seed(0)

base_model = 'twitter-xlm-roberta-base-sentiment'
model_list = ['bert-base-uncased', 'bert-base-multilingual-uncased', 'google/muril-base-cased', 'xlm-roberta-base',
              'ai4bharat/indic-bert','cardiffnlp/twitter-xlm-roberta-base','cardiffnlp/twitter-xlm-roberta-base-sentiment',
              'cardiffnlp/twitter-roberta-base', 'cardiffnlp/twitter-roberta-base-sentiment',
              'cardiffnlp/twitter-roberta-base-hate', 'roberta-base']
model_path = '/content/drive/MyDrive/saved_models/'
results_path = '/content/drive/MyDrive/saved_results/'

In [8]:
lang = 'hx_modified'
model_choice = 6

In [9]:
writer = SummaryWriter(log_dir="/content/drive/MyDrive/" + base_model + "_" + lang)
device

device(type='cuda')

In [10]:
tokenizer = AutoTokenizer.from_pretrained(model_list[model_choice])

MAX_SEQ_LEN = 128

label_idx = 1
text_idx = 0

class HateData(Dataset):
    def __init__(self, data_path, split='train', lang='bengali', aug_prob=0.2, flip_prob=0.5):
        self.split = split
        # self.data = pd.read_parquet(data_path + lang + "_" + split + ".parquet", engine='fastparquet')
        # self.data = pd.read_csv(data_path, sep=',')
        # self.data = pd.read_csv(data_path + lang + "_" + split + ".tsv", sep='\t')
        self.data = pd.read_csv(data_path + split + "_" + lang + ".tsv", sep='\t', lineterminator='\n')

        if self.split == 'train':
            self.label2data = {0:[], 1:[], 2:[]}
            # self.data = self.data[self.data['language'] == lang]

            for i in tqdm(range(len(self.data))):
                row = self.data.iloc[i]
                self.label2data[row[label_idx]].append(row[text_idx])
            self.aug_prob = aug_prob
            self.flip_prob = flip_prob

    def __len__(self):
        return len(self.data)


    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.tolist()

        data = self.data.iloc[index]

        labels = data[label_idx]
        text = data[text_idx]
        inputs = tokenizer(text, padding='max_length', truncation=True, max_length=MAX_SEQ_LEN)
        # print(inputs)
        input_ids = inputs['input_ids']
        token_type_ids = np.zeros(MAX_SEQ_LEN)#inputs['token_type_ids']#
        attn_mask = inputs['attention_mask']

        aug_text = text
        labels_aug = labels

        if self.split == 'train' and labels == 1:
            if np.random.uniform() < self.aug_prob:
                aug_text = np.random.choice(self.label2data[0])

                if np.random.uniform() < self.flip_prob:
                    aug_text = aug_text + " [SEP] " + text
                else:
                    aug_text = text + " [SEP] " + aug_text
            labels_aug = 1

        inputs_aug = tokenizer(aug_text, padding='max_length', truncation=True, max_length=MAX_SEQ_LEN)
        # print(inputs)
        input_ids_aug = inputs_aug['input_ids']
        token_type_ids_aug = np.zeros(MAX_SEQ_LEN)#inputs_aug['token_type_ids']#
        attn_mask_aug = inputs_aug['attention_mask']

        input_ids = torch.tensor(np.vstack([input_ids, input_ids_aug]), dtype=torch.long).view(2, MAX_SEQ_LEN)
        token_type_ids = torch.tensor(np.vstack([token_type_ids, token_type_ids_aug]), dtype=torch.long).view(2, MAX_SEQ_LEN)
        attn_mask = torch.tensor(np.vstack([attn_mask, attn_mask_aug]), dtype=torch.long).view(2, MAX_SEQ_LEN)
        labels = torch.tensor(np.vstack([labels, labels_aug]), dtype=torch.long).view(2)


        return input_ids, attn_mask, token_type_ids, labels


# train_data = HateData(data_path="/home/jupyter/data/implicit-hate-corpus/", lang='latent')
# dataload = DataLoader(train_data, batch_size=4)

# for i in (dataload):
#     print(i[0].shape)
#     print(i[1].shape)
#     print(i[2].shape)
#     print(i[3].shape)
#     break




config.json:   0%|          | 0.00/841 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

In [11]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()

        H1, H2, num_class = 768, 128, 3
        self.bert = AutoModel.from_pretrained(model_list[model_choice])

        # for param in self.bert.parameters():
        #     param.requires_grad = False

        self.clf = nn.Sequential(
            nn.Linear(H1, H2),
            nn.ReLU(),
            nn.Linear(H2, H2),
            nn.ReLU(),
            nn.Linear(H2, num_class)
        )


    def forward(self, input_ids, attn_mask, token_type_ids):
        outputs = self.bert(input_ids, attn_mask)#, token_type_ids)
        cls_emb = outputs.pooler_output # (batch, 768)
        logits = self.clf(cls_emb) # (batch, num_class)
        return logits


In [12]:
loss_fn = nn.CrossEntropyLoss()#

In [13]:
def train(input_ids, attn_mask, token_type_ids, label, model, model_opt, scdl):

    model_opt.zero_grad()

    batch_size = input_ids.shape[0]
    seq_len = input_ids.shape[1]

    loss = 0.0

    if use_cuda:
        input_ids = input_ids.to(device)
        attn_mask = attn_mask.to(device)
        token_type_ids = token_type_ids.to(device)
        label = label.to(device)

    # label = label.flatten()

    logits = model(input_ids[:,0,:], attn_mask[:,0,:], token_type_ids[:,0,:])
    logits_aug = model(input_ids[:,1,:], attn_mask[:,1,:], token_type_ids[:,1,:])

    loss = loss_fn(logits, label[:,0]) + loss_fn(logits_aug, label[:,1])

    # if torch.isnan(loss):
    #     pass
    # else:
    loss.backward()
    # torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) # clip gradients to prevent exploding
    model_opt.step()
    scdl.step()
    # print(loss)
    return float(loss.item())



In [14]:
def evaluate(input_ids, attn_mask, token_type_ids, label, model, mode='train'):

    batch_size = input_ids.shape[0]
    seq_len = input_ids.shape[1]


    with torch.no_grad():
        if use_cuda:
            input_ids = input_ids.to(device)
            attn_mask = attn_mask.to(device)
            token_type_ids = token_type_ids.to(device)
            label = label.to(device)

        # label = label.flatten()

        logits = model(input_ids[:,0,:], attn_mask[:,0,:], token_type_ids[:,0,:])
        loss = loss_fn(logits, label[:,0])

        if mode == 'train':
            return float(loss.item())

        preds = torch.argmax(logits, dim=1).flatten()
        # acc = (preds == label).cpu().numpy().mean() * 100

        return float(loss.item()), preds.cpu().numpy()




In [15]:
df_test = pd.read_csv("/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/test_hx_modified.tsv", sep='\t', lineterminator='\n')
gt_labels = np.array(df_test['label'])

In [16]:
len(gt_labels)

500

In [17]:
def trainIters(model, epochs, train_loader, test_loader, learning_rate=3e-5, log_step=30, valid_step=30, mode='train'):

    model_opt = AdamW(model.parameters(), lr=learning_rate, eps=1e-8)
    num_train_steps = (len(train_loader)*epochs)
    scdl = get_linear_schedule_with_warmup(model_opt, num_warmup_steps=int(0.1*num_train_steps), num_training_steps=num_train_steps)

    print("Initialised optimizer and lr scheduler")

    # valid_best_loss = []
    best_acc = 0.0
    tot = len(train_data) // train_loader.batch_size
    tot_val = len(val_data) // test_loader.batch_size
    plot_steps = 0

    for epoch in range(epochs):
        train_loss_total = 0.0
        train_step = 0
        # Training

        model.train()
        for entry in tqdm(train_loader, total=tot, position=0, leave=True):
            loss = train(entry[0], entry[1], entry[2], entry[3], model, model_opt, scdl)
            plot_steps += 1
            train_step += 1
            # if not math.isnan(loss) :
            train_loss_total = train_loss_total + loss

            train_loss = train_loss_total / train_step

            if plot_steps % log_step == 0:
                writer.add_scalar("Train Loss", train_loss, plot_steps)

            if (plot_steps % valid_step == 0) or (plot_steps >= num_train_steps - 1):
                model.eval()
                test_pred = []

                for entry in tqdm(test_loader, total=tot_val, position=0, leave=True):
                    loss_v, pred_v = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
                    # if not math.isnan(loss) :
                    test_pred.extend([pd for pd in pred_v])

                # val_acc = (test_pred == gt_labels).mean().item()
                val_acc = f1_score(gt_labels, test_pred, average='macro')
                print("Validation F1: " + str(val_acc))
                writer.add_scalar("Val F1", val_acc, plot_steps)


                #   Save best model
                # state = {
                #         'epoch': epoch,
                #         'state_dict': model.state_dict(),
                #         'optimizer': model_opt.state_dict(),
                #         'loss': train_loss,
                #         'scheduler': scdl.state_dict(),
                # }


                if val_acc > best_acc:
                    torch.save(model.state_dict(), model_path + "model_" + base_model + "_" + lang + "_easymix_mono_redo" + ".pth")
                    print("Model saved for step: " + str(plot_steps))
                    best_acc = val_acc

                model.train()
            writer.flush()


        print('epoch: '+str(epoch))
        print('total loss: '+str(train_loss_total/tot))

        # wr_train = open(results_path + "train_loss_" + base_model + ".txt", "a")
        # wr_train.write("epoch " + str(epoch) + ": " + str(train_loss_total/tot) + "\n")
        # wr_train.close()




##For EasyMix Tech


In [18]:
train_data = HateData(data_path="/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/", split='train', lang=lang)
val_data = HateData(data_path="/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/", split='test', lang=lang)

100%|██████████| 15383/15383 [00:03<00:00, 4874.77it/s]


In [19]:
BS = 16
# weights = [1.0]*15383
# weights.extend([0.5]*(len(train_data) - 15383))
# sampler = WeightedRandomSampler(weights, num_samples=20000)

dataload = DataLoader(train_data, batch_size=BS, shuffle=True)
dataload_val = DataLoader(val_data, batch_size=BS, shuffle=False)

In [20]:
(len(train_data)/16)//2

480.0

In [21]:
model = Classifier()
model = model.to(device)

pytorch_model.bin:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

Some weights of XLMRobertaModel were not initialized from the model checkpoint at cardiffnlp/twitter-xlm-roberta-base-sentiment and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [22]:
trainIters(model, 5, dataload, dataload_val)

Initialised optimizer and lr scheduler


32it [00:03,  9.37it/s]                        


Validation F1: 0.1698956780923994


  3%|▎         | 30/961 [00:53<1:01:30,  3.96s/it]

Model saved for step: 30


32it [00:03,  9.09it/s]                        


Validation F1: 0.1947990960054005


  6%|▌         | 60/961 [01:46<1:01:21,  4.09s/it]

Model saved for step: 60


32it [00:03,  8.79it/s]                        


Validation F1: 0.40388680425265794


  9%|▉         | 90/961 [02:39<57:11,  3.94s/it]

Model saved for step: 90


32it [00:03,  8.62it/s]                        


Validation F1: 0.43700725103087507


 12%|█▏        | 120/961 [03:31<54:49,  3.91s/it]

Model saved for step: 120


32it [00:03,  8.59it/s]                        
 16%|█▌        | 150/961 [04:18<33:57,  2.51s/it]

Validation F1: 0.4292110874200426


32it [00:03,  8.85it/s]                        


Validation F1: 0.5019354752369577


 19%|█▊        | 180/961 [05:15<1:21:34,  6.27s/it]

Model saved for step: 180


32it [00:03,  8.77it/s]                        


Validation F1: 0.5186561430467277


 22%|██▏       | 210/961 [06:12<1:02:04,  4.96s/it]

Model saved for step: 210


32it [00:03,  8.57it/s]                        


Validation F1: 0.5553990409925869


 25%|██▍       | 240/961 [07:04<47:41,  3.97s/it]

Model saved for step: 240


32it [00:03,  8.91it/s]                        


Validation F1: 0.5814808563310546


 28%|██▊       | 270/961 [07:57<47:14,  4.10s/it]

Model saved for step: 270


32it [00:03,  8.89it/s]                        
 31%|███       | 300/961 [08:45<26:36,  2.41s/it]

Validation F1: 0.5161143599740091


32it [00:03,  8.95it/s]                        


Validation F1: 0.5907278096230676


 34%|███▍      | 330/961 [09:36<43:03,  4.09s/it]

Model saved for step: 330


32it [00:03,  8.86it/s]                        
 37%|███▋      | 360/961 [10:24<24:01,  2.40s/it]

Validation F1: 0.5819186657294


32it [00:03,  8.92it/s]                        


Validation F1: 0.6120239479548354


 41%|████      | 390/961 [11:14<38:29,  4.05s/it]

Model saved for step: 390


32it [00:03,  8.85it/s]                        
 44%|████▎     | 420/961 [12:02<21:17,  2.36s/it]

Validation F1: 0.6044091721723465


32it [00:03,  8.87it/s]                        
 47%|████▋     | 450/961 [12:48<20:42,  2.43s/it]

Validation F1: 0.5681792065846261


32it [00:03,  8.91it/s]                        
 50%|████▉     | 480/961 [13:34<20:47,  2.59s/it]

Validation F1: 0.6057997057176713


32it [00:03,  8.64it/s]                        


Validation F1: 0.6814147588340131


 53%|█████▎    | 510/961 [14:23<30:30,  4.06s/it]

Model saved for step: 510


32it [00:03,  8.87it/s]                        
 56%|█████▌    | 540/961 [15:11<17:56,  2.56s/it]

Validation F1: 0.6660111733670289


32it [00:03,  8.75it/s]                        
 59%|█████▉    | 570/961 [15:57<17:11,  2.64s/it]

Validation F1: 0.6115037986013595


32it [00:03,  8.56it/s]                        


Validation F1: 0.6842113877855164


 62%|██████▏   | 600/961 [16:53<36:24,  6.05s/it]

Model saved for step: 600


32it [00:03,  8.81it/s]                        


Validation F1: 0.6886044359375609


 66%|██████▌   | 630/961 [17:47<21:40,  3.93s/it]

Model saved for step: 630


32it [00:03,  8.64it/s]                        
 69%|██████▊   | 660/961 [18:34<11:55,  2.38s/it]

Validation F1: 0.6560644827823049


32it [00:03,  8.94it/s]                        
 72%|███████▏  | 690/961 [19:19<10:46,  2.39s/it]

Validation F1: 0.6861321148917687


32it [00:03,  8.87it/s]                        


Validation F1: 0.691268256849019


 75%|███████▍  | 720/961 [20:10<16:00,  3.99s/it]

Model saved for step: 720


32it [00:03,  8.51it/s]                        


Validation F1: 0.717395426387942


 78%|███████▊  | 750/961 [21:09<14:14,  4.05s/it]

Model saved for step: 750


32it [00:03,  8.74it/s]                        
 81%|████████  | 780/961 [21:57<07:56,  2.63s/it]

Validation F1: 0.7098869850970692


32it [00:03,  8.55it/s]                        


Validation F1: 0.7241524564816014


 84%|████████▍ | 810/961 [22:47<10:01,  3.98s/it]

Model saved for step: 810


32it [00:03,  8.80it/s]                        
 87%|████████▋ | 840/961 [23:41<05:02,  2.50s/it]

Validation F1: 0.662243234787662


32it [00:03,  8.77it/s]                        


Validation F1: 0.736126392126392


 91%|█████████ | 870/961 [24:40<06:05,  4.02s/it]

Model saved for step: 870


32it [00:04,  7.67it/s]                        
 94%|█████████▎| 900/961 [25:39<02:39,  2.62s/it]

Validation F1: 0.7017344770484444


32it [00:04,  7.60it/s]                        
 97%|█████████▋| 930/961 [26:57<01:57,  3.78s/it]

Validation F1: 0.690442039518015


32it [00:03,  8.57it/s]                        
100%|█████████▉| 960/961 [27:44<00:02,  2.46s/it]

Validation F1: 0.6551435548220351


962it [27:47,  1.73s/it]


epoch: 0
total loss: 1.7104706412547586


32it [00:03,  8.13it/s]                        
  3%|▎         | 28/961 [00:43<39:52,  2.56s/it]

Validation F1: 0.7042894570532762


32it [00:03,  8.87it/s]                        


Validation F1: 0.7472490093925517


  6%|▌         | 58/961 [01:33<59:39,  3.96s/it]

Model saved for step: 1020


32it [00:03,  8.73it/s]                        


Validation F1: 0.7506035281877536


  9%|▉         | 88/961 [02:26<58:11,  4.00s/it]

Model saved for step: 1050


32it [00:04,  7.91it/s]                        
 12%|█▏        | 118/961 [03:14<35:53,  2.55s/it]

Validation F1: 0.7142282270908202


32it [00:03,  8.58it/s]                        
 15%|█▌        | 148/961 [04:00<32:00,  2.36s/it]

Validation F1: 0.6756209356208122


32it [00:03,  8.92it/s]                        
 19%|█▊        | 178/961 [04:45<30:57,  2.37s/it]

Validation F1: 0.6956024920816378


32it [00:03,  8.69it/s]                        
 22%|██▏       | 208/961 [05:52<41:48,  3.33s/it]

Validation F1: 0.7320468060799552


32it [00:03,  8.66it/s]                        
 25%|██▍       | 238/961 [06:46<28:25,  2.36s/it]

Validation F1: 0.7302157900197116


32it [00:03,  8.64it/s]                        
 28%|██▊       | 268/961 [07:35<27:21,  2.37s/it]

Validation F1: 0.7460274147540321


32it [00:03,  8.98it/s]                        


Validation F1: 0.7584755715903256


 31%|███       | 298/961 [08:25<44:25,  4.02s/it]

Model saved for step: 1260


32it [00:03,  8.72it/s]                        
 34%|███▍      | 328/961 [09:20<28:07,  2.67s/it]

Validation F1: 0.7244426592568525


32it [00:03,  8.64it/s]                        
 37%|███▋      | 358/961 [10:04<24:00,  2.39s/it]

Validation F1: 0.7522658163977369


32it [00:03,  8.90it/s]                        
 40%|████      | 388/961 [10:59<26:51,  2.81s/it]

Validation F1: 0.7330133231877012


32it [00:03,  8.80it/s]                        
 43%|████▎     | 418/961 [11:55<21:50,  2.41s/it]

Validation F1: 0.7413964255140725


32it [00:04,  7.70it/s]                        
 47%|████▋     | 448/961 [12:54<30:05,  3.52s/it]

Validation F1: 0.7124688668969034


32it [00:04,  7.55it/s]                        


Validation F1: 0.7977687778176102


 50%|████▉     | 478/961 [14:05<39:59,  4.97s/it]

Model saved for step: 1440


32it [00:03,  8.85it/s]                        
 53%|█████▎    | 508/961 [14:55<21:09,  2.80s/it]

Validation F1: 0.7864068488819146


32it [00:03,  8.01it/s]                        


Validation F1: 0.7978828852140448


 56%|█████▌    | 538/961 [16:05<37:51,  5.37s/it]

Model saved for step: 1500


32it [00:04,  7.68it/s]                        
 59%|█████▉    | 568/961 [17:04<19:58,  3.05s/it]

Validation F1: 0.733993687680289


32it [00:03,  8.45it/s]                        
 62%|██████▏   | 598/961 [18:05<15:30,  2.56s/it]

Validation F1: 0.7674930988274317


32it [00:04,  7.98it/s]                        
 65%|██████▌   | 628/961 [19:00<14:24,  2.60s/it]

Validation F1: 0.7507883590193463


32it [00:03,  8.49it/s]                        
 68%|██████▊   | 658/961 [19:59<14:52,  2.95s/it]

Validation F1: 0.7675774868710993


32it [00:04,  7.92it/s]                        
 72%|███████▏  | 688/961 [20:58<14:16,  3.14s/it]

Validation F1: 0.7546810357801004


32it [00:03,  8.57it/s]                        
 75%|███████▍  | 718/961 [21:45<09:35,  2.37s/it]

Validation F1: 0.7838586101975767


32it [00:03,  8.89it/s]                        
 78%|███████▊  | 748/961 [22:29<08:17,  2.34s/it]

Validation F1: 0.7685732020927553


32it [00:03,  8.93it/s]                        
 81%|████████  | 778/961 [23:14<07:18,  2.40s/it]

Validation F1: 0.7670070725992898


32it [00:03,  8.76it/s]                        
 84%|████████▍ | 808/961 [24:07<05:59,  2.35s/it]

Validation F1: 0.7707933046246658


32it [00:03,  8.94it/s]                        
 87%|████████▋ | 838/961 [24:52<04:55,  2.40s/it]

Validation F1: 0.7936889041330817


32it [00:03,  8.90it/s]                        
 90%|█████████ | 868/961 [25:38<03:52,  2.50s/it]

Validation F1: 0.7824246423208617


32it [00:03,  8.79it/s]                        
 93%|█████████▎| 898/961 [26:24<02:46,  2.64s/it]

Validation F1: 0.7731092846690141


32it [00:03,  8.61it/s]                        


Validation F1: 0.8017658614581844


 97%|█████████▋| 928/961 [27:15<02:23,  4.36s/it]

Model saved for step: 1890


32it [00:03,  8.83it/s]                        


Validation F1: 0.8065496047310772


100%|█████████▉| 958/961 [28:08<00:12,  4.14s/it]

Model saved for step: 1920


962it [28:15,  1.76s/it]


epoch: 1
total loss: 1.283541322684809


32it [00:03,  8.82it/s]                        
  3%|▎         | 26/961 [00:41<37:30,  2.41s/it]

Validation F1: 0.7879079710065625


32it [00:03,  8.86it/s]                        
  6%|▌         | 56/961 [01:27<37:20,  2.48s/it]

Validation F1: 0.7863125907960397


32it [00:03,  8.84it/s]                        


Validation F1: 0.8143440687979394


  9%|▉         | 86/961 [02:18<1:01:05,  4.19s/it]

Model saved for step: 2010


32it [00:03,  8.84it/s]                        
 12%|█▏        | 116/961 [03:07<34:19,  2.44s/it]

Validation F1: 0.7892097566516171


32it [00:03,  8.93it/s]                        
 15%|█▌        | 146/961 [03:53<34:40,  2.55s/it]

Validation F1: 0.7854572114102906


32it [00:03,  8.77it/s]                        


Validation F1: 0.8256628914607012


 18%|█▊        | 176/961 [04:50<1:21:30,  6.23s/it]

Model saved for step: 2100


32it [00:03,  8.48it/s]                        


Validation F1: 0.829275845132082


 21%|██▏       | 206/961 [05:43<53:49,  4.28s/it]

Model saved for step: 2130


32it [00:03,  8.82it/s]                        
 25%|██▍       | 236/961 [06:31<32:17,  2.67s/it]

Validation F1: 0.8193287037037037


32it [00:03,  8.80it/s]                        


Validation F1: 0.844311458709632


 28%|██▊       | 266/961 [07:28<45:55,  3.97s/it]

Model saved for step: 2190


32it [00:03,  8.83it/s]                        
 31%|███       | 296/961 [08:16<26:15,  2.37s/it]

Validation F1: 0.8366549444008943


32it [00:03,  8.38it/s]                        
 34%|███▍      | 326/961 [09:02<27:27,  2.59s/it]

Validation F1: 0.8291208972004883


32it [00:03,  8.84it/s]                        
 37%|███▋      | 356/961 [09:53<24:00,  2.38s/it]

Validation F1: 0.8145767073361961


32it [00:03,  8.96it/s]                        
 40%|████      | 386/961 [10:39<23:31,  2.46s/it]

Validation F1: 0.8339404820026916


32it [00:03,  8.90it/s]                        
 43%|████▎     | 416/961 [11:25<24:08,  2.66s/it]

Validation F1: 0.8218104335751395


32it [00:03,  8.59it/s]                        
 46%|████▋     | 446/961 [12:09<22:02,  2.57s/it]

Validation F1: 0.8168383261973444


32it [00:03,  8.65it/s]                        


Validation F1: 0.8543734596509456


 50%|████▉     | 476/961 [12:59<32:31,  4.02s/it]

Model saved for step: 2400


32it [00:03,  8.51it/s]                        
 53%|█████▎    | 506/961 [13:46<19:10,  2.53s/it]

Validation F1: 0.8398231587007096


32it [00:03,  8.72it/s]                        
 56%|█████▌    | 536/961 [14:30<16:41,  2.36s/it]

Validation F1: 0.8371909716652128


32it [00:03,  8.88it/s]                        
 59%|█████▉    | 566/961 [15:15<15:51,  2.41s/it]

Validation F1: 0.8474251371099157


32it [00:03,  8.95it/s]                        


Validation F1: 0.8552224301124308


 62%|██████▏   | 596/961 [16:09<29:49,  4.90s/it]

Model saved for step: 2520


32it [00:03,  8.58it/s]                        


Validation F1: 0.8632888242968323


 65%|██████▌   | 626/961 [17:01<22:18,  3.99s/it]

Model saved for step: 2550


32it [00:03,  8.57it/s]                        


Validation F1: 0.8673231906530051


 68%|██████▊   | 656/961 [17:53<20:37,  4.06s/it]

Model saved for step: 2580


32it [00:03,  8.80it/s]                        
 71%|███████▏  | 686/961 [18:40<12:02,  2.63s/it]

Validation F1: 0.8654342459098404


32it [00:03,  8.53it/s]                        
 75%|███████▍  | 716/961 [19:25<10:15,  2.51s/it]

Validation F1: 0.864784371750007


32it [00:03,  8.77it/s]                        
 78%|███████▊  | 746/961 [20:08<08:24,  2.35s/it]

Validation F1: 0.8661136737012439


32it [00:03,  8.83it/s]                        
 81%|████████  | 776/961 [20:54<07:28,  2.42s/it]

Validation F1: 0.8637578005924137


32it [00:03,  8.93it/s]                        


Validation F1: 0.8795966895188686


 84%|████████▍ | 806/961 [21:46<12:34,  4.87s/it]

Model saved for step: 2730


32it [00:03,  8.70it/s]                        
 87%|████████▋ | 836/961 [22:33<04:56,  2.37s/it]

Validation F1: 0.8706370791932289


32it [00:03,  8.89it/s]                        


Validation F1: 0.8888366082907498


 90%|█████████ | 866/961 [23:24<06:26,  4.07s/it]

Model saved for step: 2790


32it [00:03,  8.60it/s]                        
 93%|█████████▎| 896/961 [24:11<02:34,  2.37s/it]

Validation F1: 0.8816498346551723


32it [00:03,  8.93it/s]                        
 96%|█████████▋| 926/961 [24:55<01:23,  2.37s/it]

Validation F1: 0.8679666964851295


32it [00:03,  8.92it/s]                        
 99%|█████████▉| 956/961 [25:40<00:12,  2.50s/it]

Validation F1: 0.8671112602410312


962it [25:49,  1.61s/it]


epoch: 2
total loss: 0.9831733296893762


32it [00:03,  8.77it/s]                        


Validation F1: 0.9020531425741698


  2%|▏         | 24/961 [00:40<1:03:35,  4.07s/it]

Model saved for step: 2910


32it [00:03,  8.86it/s]                        


Validation F1: 0.9116021168301479


  6%|▌         | 54/961 [01:33<59:15,  3.92s/it]

Model saved for step: 2940


32it [00:03,  8.83it/s]                        
  9%|▊         | 84/961 [02:21<34:46,  2.38s/it]

Validation F1: 0.897765368400643


32it [00:03,  8.92it/s]                        
 12%|█▏        | 114/961 [03:07<34:19,  2.43s/it]

Validation F1: 0.8734803113012498


32it [00:03,  8.86it/s]                        
 15%|█▍        | 144/961 [03:52<34:40,  2.55s/it]

Validation F1: 0.8955258552148432


32it [00:03,  8.67it/s]                        
 18%|█▊        | 174/961 [04:37<33:55,  2.59s/it]

Validation F1: 0.8918732411455395


32it [00:03,  8.67it/s]                        
 21%|██        | 204/961 [05:20<29:33,  2.34s/it]

Validation F1: 0.8976924501885901


32it [00:03,  8.86it/s]                        
 24%|██▍       | 234/961 [06:05<28:49,  2.38s/it]

Validation F1: 0.9001794303815963


32it [00:03,  8.91it/s]                        
 27%|██▋       | 264/961 [06:50<28:35,  2.46s/it]

Validation F1: 0.9005854721172639


32it [00:03,  8.82it/s]                        
 31%|███       | 294/961 [07:35<29:22,  2.64s/it]

Validation F1: 0.9112101463312247


32it [00:03,  8.54it/s]                        
 34%|███▎      | 324/961 [08:19<25:21,  2.39s/it]

Validation F1: 0.9058191504500365


32it [00:03,  8.90it/s]                        
 37%|███▋      | 354/961 [09:03<23:47,  2.35s/it]

Validation F1: 0.870889512535482


32it [00:03,  8.86it/s]                        


Validation F1: 0.9240388596910337


 40%|███▉      | 384/961 [09:53<37:23,  3.89s/it]

Model saved for step: 3270


32it [00:03,  8.80it/s]                        


Validation F1: 0.9272145854941553


 43%|████▎     | 414/961 [10:54<58:37,  6.43s/it]

Model saved for step: 3300


32it [00:03,  8.80it/s]                        
 46%|████▌     | 444/961 [11:42<21:01,  2.44s/it]

Validation F1: 0.9234931457565851


32it [00:03,  8.96it/s]                        
 49%|████▉     | 474/961 [12:28<20:49,  2.57s/it]

Validation F1: 0.8919920140761443


32it [00:03,  8.63it/s]                        


Validation F1: 0.9378039047638481


 52%|█████▏    | 504/961 [13:17<30:51,  4.05s/it]

Model saved for step: 3390


32it [00:03,  8.86it/s]                        
 56%|█████▌    | 534/961 [14:05<17:45,  2.50s/it]

Validation F1: 0.9327250901446726


32it [00:03,  8.81it/s]                        
 59%|█████▊    | 564/961 [14:49<16:59,  2.57s/it]

Validation F1: 0.9365490282848397


32it [00:03,  8.61it/s]                        
 62%|██████▏   | 594/961 [15:33<14:20,  2.34s/it]

Validation F1: 0.909368440071003


32it [00:03,  8.89it/s]                        


Validation F1: 0.9419135107396855


 65%|██████▍   | 624/961 [16:23<22:41,  4.04s/it]

Model saved for step: 3510


32it [00:03,  8.64it/s]                        
 68%|██████▊   | 654/961 [17:09<11:59,  2.35s/it]

Validation F1: 0.9216276637807118


32it [00:03,  8.93it/s]                        
 71%|███████   | 684/961 [17:53<10:54,  2.36s/it]

Validation F1: 0.9365048149283638


32it [00:03,  8.89it/s]                        


Validation F1: 0.9475602454722347


 74%|███████▍  | 714/961 [18:44<16:21,  3.98s/it]

Model saved for step: 3600


32it [00:03,  8.87it/s]                        
 77%|███████▋  | 744/961 [19:31<08:29,  2.35s/it]

Validation F1: 0.9277007277512413


32it [00:03,  8.97it/s]                        
 81%|████████  | 774/961 [20:16<07:32,  2.42s/it]

Validation F1: 0.9400247381958247


32it [00:03,  8.89it/s]                        
 84%|████████▎ | 804/961 [21:02<07:00,  2.68s/it]

Validation F1: 0.9468080788843013


32it [00:03,  8.59it/s]                        
 87%|████████▋ | 834/961 [21:47<05:23,  2.54s/it]

Validation F1: 0.9452205765249243


32it [00:03,  8.71it/s]                        
 90%|████████▉ | 864/961 [22:31<03:48,  2.36s/it]

Validation F1: 0.9449082826746859


32it [00:03,  8.84it/s]                        


Validation F1: 0.9547183949899182


 93%|█████████▎| 894/961 [23:27<06:29,  5.81s/it]

Model saved for step: 3780


32it [00:03,  8.78it/s]                        
 96%|█████████▌| 924/961 [24:15<01:34,  2.57s/it]

Validation F1: 0.943822637292465


32it [00:03,  8.66it/s]                        
 99%|█████████▉| 954/961 [25:00<00:18,  2.61s/it]

Validation F1: 0.9513673071567809


962it [25:10,  1.57s/it]


epoch: 3
total loss: 0.6365631845053847


32it [00:03,  8.59it/s]                        
  2%|▏         | 22/961 [00:33<37:03,  2.37s/it]

Validation F1: 0.9446037368607776


32it [00:03,  8.90it/s]                        
  5%|▌         | 52/961 [01:18<35:56,  2.37s/it]

Validation F1: 0.9520581154465932


32it [00:03,  8.88it/s]                        


Validation F1: 0.9560511342228448


  9%|▊         | 82/961 [02:08<58:11,  3.97s/it]

Model saved for step: 3930


32it [00:03,  8.84it/s]                        
 12%|█▏        | 112/961 [02:56<33:24,  2.36s/it]

Validation F1: 0.95237345943009


32it [00:03,  8.96it/s]                        
 15%|█▍        | 142/961 [03:42<33:54,  2.48s/it]

Validation F1: 0.9539664870277115


32it [00:03,  8.91it/s]                        
 18%|█▊        | 172/961 [04:27<34:39,  2.64s/it]

Validation F1: 0.9483495910245519


32it [00:03,  8.52it/s]                        
 21%|██        | 202/961 [05:11<31:39,  2.50s/it]

Validation F1: 0.9443069063758719


32it [00:03,  8.80it/s]                        


Validation F1: 0.9576824075062573


 24%|██▍       | 232/961 [06:03<58:30,  4.82s/it]

Model saved for step: 4080


32it [00:03,  8.78it/s]                        


Validation F1: 0.9585956634849313


 27%|██▋       | 262/961 [06:58<56:04,  4.81s/it]

Model saved for step: 4110


32it [00:03,  8.85it/s]                        
 30%|███       | 292/961 [07:46<26:50,  2.41s/it]

Validation F1: 0.9582797148057122


32it [00:03,  8.87it/s]                        
 34%|███▎      | 322/961 [08:32<26:26,  2.48s/it]

Validation F1: 0.9579148263411298


32it [00:03,  8.83it/s]                        


Validation F1: 0.9688353409912237


 37%|███▋      | 352/961 [09:21<41:37,  4.10s/it]

Model saved for step: 4200


32it [00:03,  8.81it/s]                        
 40%|███▉      | 382/961 [10:10<23:48,  2.47s/it]

Validation F1: 0.9652302306301571


32it [00:03,  8.93it/s]                        
 43%|████▎     | 412/961 [10:55<24:14,  2.65s/it]

Validation F1: 0.9641861359246827


32it [00:03,  8.53it/s]                        
 46%|████▌     | 442/961 [11:40<22:13,  2.57s/it]

Validation F1: 0.9641861359246827


32it [00:03,  8.52it/s]                        
 49%|████▉     | 472/961 [12:25<19:43,  2.42s/it]

Validation F1: 0.9674683841535003


32it [00:03,  8.86it/s]                        
 52%|█████▏    | 502/961 [13:11<18:16,  2.39s/it]

Validation F1: 0.9658664998566477


32it [00:03,  8.90it/s]                        
 55%|█████▌    | 532/961 [13:57<17:33,  2.46s/it]

Validation F1: 0.9600050065467647


32it [00:03,  8.81it/s]                        


Validation F1: 0.9707127860218497


 58%|█████▊    | 562/961 [14:47<27:39,  4.16s/it]

Model saved for step: 4410


32it [00:03,  8.74it/s]                        
 62%|██████▏   | 592/961 [15:36<14:57,  2.43s/it]

Validation F1: 0.9636480411046886


32it [00:03,  8.83it/s]                        
 65%|██████▍   | 622/961 [16:22<13:48,  2.44s/it]

Validation F1: 0.9633311307959357


32it [00:03,  8.86it/s]                        
 68%|██████▊   | 652/961 [17:08<13:44,  2.67s/it]

Validation F1: 0.9674683841535003


32it [00:03,  8.64it/s]                        
 71%|███████   | 682/961 [17:52<11:48,  2.54s/it]

Validation F1: 0.9677756697060423


32it [00:03,  8.74it/s]                        
 74%|███████▍  | 712/961 [18:36<09:49,  2.37s/it]

Validation F1: 0.9633311307959357


32it [00:03,  8.86it/s]                        


Validation F1: 0.9707206065401137


 77%|███████▋  | 742/961 [19:26<14:13,  3.90s/it]

Model saved for step: 4590


32it [00:03,  8.69it/s]                        
 80%|████████  | 772/961 [20:13<07:27,  2.37s/it]

Validation F1: 0.9693777821089062


32it [00:03,  8.88it/s]                        
 83%|████████▎ | 802/961 [20:57<06:16,  2.37s/it]

Validation F1: 0.9677756697060423


32it [00:03,  8.91it/s]                        
 87%|████████▋ | 832/961 [21:42<05:28,  2.55s/it]

Validation F1: 0.9669252834550438


32it [00:03,  8.77it/s]                        
 90%|████████▉ | 862/961 [22:27<04:14,  2.57s/it]

Validation F1: 0.9636394000159486


32it [00:03,  8.57it/s]                        
 93%|█████████▎| 892/961 [23:10<02:42,  2.35s/it]

Validation F1: 0.9609282282517094


32it [00:03,  8.66it/s]                        
 96%|█████████▌| 922/961 [24:00<01:44,  2.69s/it]

Validation F1: 0.9669252834550438


32it [00:03,  8.89it/s]                        
 99%|█████████▉| 952/961 [24:44<00:21,  2.36s/it]

Validation F1: 0.9633311307959357


32it [00:03,  8.90it/s]                        
100%|██████████| 961/961 [25:00<00:00,  2.38s/it]

Validation F1: 0.9633311307959357


32it [00:03,  8.59it/s]                        
962it [25:05,  1.56s/it]

Validation F1: 0.9633311307959357
epoch: 4
total loss: 0.3937284553251021





######################## TESTING ######################

TEST - 1 : test_hx_modified

In [23]:
model = Classifier()
model.load_state_dict(torch.load("/content/drive/MyDrive/saved_models/model_twitter-xlm-roberta-base-sentiment_hx_modified_easymix_mono_redo.pth", map_location=device))
model = model.to(device)

Some weights of XLMRobertaModel were not initialized from the model checkpoint at cardiffnlp/twitter-xlm-roberta-base-sentiment and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [24]:
lang = 'hx_modified'

In [25]:
# test_data = HateData(data_path="/home/jupyter/data/test_data/bq_test_" + lang + "_process_10k.csv")
test_data = HateData(data_path="/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/", split='test', lang=lang)
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)

In [26]:
model.eval()
test_loss = []
test_pred = []

# wr = open(results_path + "test_prediction_" + base_model + "_" + lang + "_process_10k.txt", "w")
wr = open(results_path + "test_prediction_" + base_model + "_" + lang + ".txt", "w")
for entry in tqdm(test_loader, total=len(test_data)//test_loader.batch_size, position=0, leave=True):
    v_loss, v_pred = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
    test_loss.append(v_loss)
    test_pred.append(v_pred)
    wr.write(str(v_pred)+"\n")

test_loss = np.mean(test_loss)#.item()

print("Test Loss: ", test_loss)

wr.close()

100%|██████████| 500/500 [00:07<00:00, 63.46it/s]

Test Loss:  0.11667285490594805





In [27]:
df_test = pd.read_csv("/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/test_hx_modified.tsv", sep='\t', lineterminator='\n')
gt_labels = np.array(df_test['label'])

In [28]:
print(classification_report(gt_labels, test_pred, digits=4))

              precision    recall  f1-score   support

           0     0.9961    0.9771    0.9865       262
           1     0.9286    0.9701    0.9489        67
           2     0.9711    0.9825    0.9767       171

    accuracy                         0.9780       500
   macro avg     0.9653    0.9766    0.9707       500
weighted avg     0.9785    0.9780    0.9781       500



TEST - 2 : test_approach1_Invariance

In [29]:
lang = 'approach1_Invariance'

In [30]:
# test_data = HateData(data_path="/home/jupyter/data/test_data/bq_test_" + lang + "_process_10k.csv")
test_data = HateData(data_path="/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/", split='test', lang=lang)
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)

In [31]:
model.eval()
test_loss = []
test_pred = []

# wr = open(results_path + "test_prediction_" + base_model + "_" + lang + "_process_10k.txt", "w")
wr = open(results_path + "test_prediction_" + base_model + "_" + lang + ".txt", "w")
for entry in tqdm(test_loader, total=len(test_data)//test_loader.batch_size, position=0, leave=True):
    v_loss, v_pred = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
    test_loss.append(v_loss)
    test_pred.append(v_pred)
    wr.write(str(v_pred)+"\n")

test_loss = np.mean(test_loss)#.item()

print("Test Loss: ", test_loss)

wr.close()

100%|██████████| 500/500 [00:08<00:00, 60.47it/s]

Test Loss:  0.12121780578047037





In [34]:
df_test = pd.read_csv("/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/test_approach1_Invariance.tsv", sep='\t', lineterminator='\n')
gt_labels = np.array(df_test['label'])

In [35]:
print(classification_report(gt_labels, test_pred, digits=4))

              precision    recall  f1-score   support

           0     0.9961    0.9809    0.9885       262
           1     0.9286    0.9701    0.9489        67
           2     0.9767    0.9825    0.9796       171

    accuracy                         0.9800       500
   macro avg     0.9671    0.9778    0.9723       500
weighted avg     0.9804    0.9800    0.9801       500



TEST - 3 : approach2

In [53]:

lang = 'approach2'
test_data = HateData(data_path="/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/", split='test', lang=lang)
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)

In [54]:
model.eval()
test_loss = []
test_pred = []

# wr = open(results_path + "test_prediction_" + base_model + "_" + lang + "_process_10k.txt", "w")
wr = open(results_path + "test_prediction_" + base_model + "_" + lang + ".txt", "w")
for entry in tqdm(test_loader, total=len(test_data)//test_loader.batch_size, position=0, leave=True):
    v_loss, v_pred = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
    test_loss.append(v_loss)
    test_pred.append(v_pred)
    wr.write(str(v_pred)+"\n")

test_loss = np.mean(test_loss)#.item()

print("Test Loss: ", test_loss)

wr.close()

100%|██████████| 500/500 [00:08<00:00, 62.32it/s]

Test Loss:  0.12736381910927594





In [56]:
df_test = pd.read_csv("/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/test_approach2.tsv", sep='\t', lineterminator='\n')
gt_labels = np.array(df_test['label'])
print(classification_report(gt_labels, test_pred, digits=4))

              precision    recall  f1-score   support

           0     0.9961    0.9695    0.9826       262
           1     0.9041    0.9851    0.9429        67
           2     0.9767    0.9825    0.9796       171

    accuracy                         0.9760       500
   macro avg     0.9590    0.9790    0.9683       500
weighted avg     0.9771    0.9760    0.9762       500



TEST - 4 : approach3

In [41]:

lang = 'approach3'
test_data = HateData(data_path="/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/", split='test', lang=lang)
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)

In [42]:
model.eval()
test_loss = []
test_pred = []

# wr = open(results_path + "test_prediction_" + base_model + "_" + lang + "_process_10k.txt", "w")
wr = open(results_path + "test_prediction_" + base_model + "_" + lang + ".txt", "w")
for entry in tqdm(test_loader, total=len(test_data)//test_loader.batch_size, position=0, leave=True):
    v_loss, v_pred = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
    test_loss.append(v_loss)
    test_pred.append(v_pred)
    wr.write(str(v_pred)+"\n")

test_loss = np.mean(test_loss)#.item()

print("Test Loss: ", test_loss)

wr.close()


100%|██████████| 500/500 [00:08<00:00, 56.16it/s]

Test Loss:  0.2159024285096675





In [43]:
df_test = pd.read_csv("/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/test_approach3.tsv", sep='\t', lineterminator='\n')
gt_labels = np.array(df_test['label'])
print(classification_report(gt_labels, test_pred, digits=4))

              precision    recall  f1-score   support

           0     0.9805    0.9618    0.9711       262
           1     0.7590    0.9403    0.8400        67
           2     0.9688    0.9064    0.9366       171

    accuracy                         0.9400       500
   macro avg     0.9028    0.9362    0.9159       500
weighted avg     0.9468    0.9400    0.9417       500



TEST - 5 : approach4_directional_expectation

In [44]:

lang = 'approach4_directional_expectation'
test_data = HateData(data_path="/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/", split='test', lang=lang)
test_loader = DataLoader(test_data, batch_size=1, shuffle=False)

In [45]:
model.eval()
test_loss = []
test_pred = []

# wr = open(results_path + "test_prediction_" + base_model + "_" + lang + "_process_10k.txt", "w")
wr = open(results_path + "test_prediction_" + base_model + "_" + lang + ".txt", "w")
for entry in tqdm(test_loader, total=len(test_data)//test_loader.batch_size, position=0, leave=True):
    v_loss, v_pred = evaluate(entry[0], entry[1], entry[2], entry[3], model, mode='test')
    test_loss.append(v_loss)
    test_pred.append(v_pred)
    wr.write(str(v_pred)+"\n")

test_loss = np.mean(test_loss)#.item()

print("Test Loss: ", test_loss)

wr.close()

100%|██████████| 500/500 [00:08<00:00, 57.54it/s]

Test Loss:  0.17388737560249864





In [46]:
df_test = pd.read_csv("/content/drive/MyDrive/data_efficient_hatedetect/data/multilingual/test_approach3.tsv", sep='\t', lineterminator='\n')
gt_labels = np.array(df_test['label'])
print(classification_report(gt_labels, test_pred, digits=4))

              precision    recall  f1-score   support

           0     0.9662    0.9809    0.9735       262
           1     0.9016    0.8209    0.8594        67
           2     0.9653    0.9766    0.9709       171

    accuracy                         0.9580       500
   macro avg     0.9444    0.9261    0.9346       500
weighted avg     0.9572    0.9580    0.9573       500

