In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from tqdm import tqdm
import os
from sklearn.metrics import classification_report, f1_score
from torch.utils.data import Dataset
import collections
import emoji
import re
from emot.emo_unicode import EMOTICONS_EMO
from bs4 import BeautifulSoup
import nltk
nltk.download('words')
import nltk, string, re, spacy,unicodedata, random
nltk.download('punkt')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.tokenize import ToktokTokenizer

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package words to
[nltk_data]     C:\Users\cs\AppData\Roaming\nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\cs\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
def deEmojify(string):
    emoji_pattern = re.compile("["
                              u"\U0001F600-\U0001F64F"  # emoticons
                              u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                              u"\U0001F680-\U0001F6FF"  # transport & map symbols
                              u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                              u"\U00002500-\U00002BEF"  # chinese char
                              u"\U00002702-\U000027B0"
                              u"\U00002702-\U000027B0"
                              u"\U000024C2-\U0001F251"
                              u"\U0001f926-\U0001f937"
                              u"\U00010000-\U0010ffff"
                              u"\u2640-\u2642"
                              u"\u2600-\u2B55"
                              u"\u200d"
                              u"\u23cf"
                              u"\u23e9"
                              u"\u231a"
                              u"\ufe0f"  # dingbats
                              u"\u3030"
                              "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', string)
        
def preprocess(text):
    text = str(text)
    text = deEmojify(text) #convert emojis to their defns in words, they might be useful
    text = re.sub(r'([\.\'\"\/\-\_\--])',' ', text) # remove punctuations , removes @USER / some abbreviatins
    to_remove_url = ('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|'
      '[!*,]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    text = re.sub(to_remove_url,'',text)  # remove url patterns
    text = re.sub(" \d+", " ", text)
    text = text.replace(","," ")
    text = re.sub(r'(?:^| )\w(?:$| )', ' ', text).strip()
    punctuation='!!"$%&()*+-/:;<=>?[\]^_{|}~.'
    text = ''.join(ch for ch in text if ch not in set(punctuation))
    # text = text.translate(str.maketrans('', '', string.punctuation))
    text = BeautifulSoup(text, 'html.parser').get_text()
    # Stopword Removing
    tokenizer = ToktokTokenizer()
    # convert sentence into token of words
    tokens = tokenizer.tokenize(text)
    tokens = [token.strip() for token in tokens]
    text = ' '.join(ch for ch in tokens)
    return text 

def clean(df):
    df['text'] = df['text'].apply(lambda x: preprocess(x))


In [3]:
train = pd.read_csv('tamil_train.csv')
val = pd.read_csv('tamil_dev.csv')
train.drop(['Unnamed: 0'], axis=1, inplace=True)
val.drop(['Unnamed: 0'], axis=1, inplace=True)
train.dropna()
train['label'] = pd.Categorical(train.label)
val.dropna()
val['label'] = pd.Categorical(val.label)
train['text'].apply(str)
clean(train)
clean(val)
train

Unnamed: 0,text,label
0,மோவி வேற லேவில் லா எரிகா பொகுது,Not_offensive
1,லோவ் அஜித் குமார் விவேகம் மோவி இங்கி மேஜி பட் ...,not-Tamil
2,படம் நல்ல காமெடி படாம இருகும் போலை,Not_offensive
3,கார்த்திக் சுப்பராஜ் அன்னி இந்த படம் வெற்றி அட...,Not_offensive
4,கவுண்டர் தேவர் சார்பாக வெற்றி பெற வாழ்த்துக்கள்,Not_offensive
...,...,...
35134,டிரெண்டிங் நம்பர் #2 இதுக்கு நம்மலாம் காரணம்னு...,Not_offensive
35135,மோவி ஸ்கிரிப்ட் சூப்பர் அதுவும் ஹிப் ஹாப் தமிழ...,Not_offensive
35136,ஜஸ்ட் லிக்ஸ் போர் லிக்ஸ்,Not_offensive
35137,ஆலோ லே லோ கண்டா லே லோ,not-Tamil


In [4]:
class tamil_Offensive_Dataset(Dataset):
    def __init__(self, encodings, labels, bpe = False):
        self.encodings = encodings
        self.labels = labels
        self.is_bpe_tokenized = bpe

    def __getitem__(self, idx):
        if not self.is_bpe_tokenized:
            item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        else:
            item = {
                'input_ids': torch.LongTensor(self.encodings[idx].ids),
                'attention_mask': torch.LongTensor(self.encodings[idx].attention_mask)
            }
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [5]:
from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained("simran-kh/muril-cased-temp")
model = AutoModelForSequenceClassification.from_pretrained("simran-kh/muril-cased-temp", num_labels=3)
model_name = 'MURIL_cased_temp_tamil_weighted'

Some weights of the model checkpoint at simran-kh/muril-cased-temp were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not

In [6]:
 # Optimiser
from transformers import AdamW
optimizer = AdamW(model.parameters(), lr=1e-5)



In [7]:
label_mapping = {
        'Not_offensive': 0, 
        'not-Tamil': 1, 
        'Offensive': 2, 
    }

In [8]:
# Collecting Text and Labels
train_batch_sentences = list(train['text'])
train_batch_labels =  [label_mapping[x] for x in train['label']]
dev_batch_sentences = list(val['text'])
dev_batch_labels =  [label_mapping[x] for x in val['label']]

In [9]:
# Convert to Tensor
if 'parameters' in tokenizer.__dict__.keys() and tokenizer.__dict__['_parameters']['model'] == 'ByteLevelBPE':
    train_encodings = tokenizer.encode_batch(train_batch_sentences)
    dev_encodings = tokenizer.encode_batch(dev_batch_sentences)
else:
    train_encodings = tokenizer(train_batch_sentences, padding='max_length', truncation=True, max_length=64, return_tensors="pt")
    dev_encodings = tokenizer(dev_batch_sentences, padding='max_length', truncation=True, max_length=64, return_tensors="pt")

train_labels = torch.tensor(train_batch_labels)
dev_labels = torch.tensor(dev_batch_labels)

In [10]:
# Defining Datasets
train_dataset = tamil_Offensive_Dataset(train_encodings, train_labels, bpe = False)
dev_dataset = tamil_Offensive_Dataset(dev_encodings, dev_labels, bpe = False)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
best_val_f1 = 0
count = 0

In [11]:
# Alternate Loss Fn
# Weighted Manual Loss Function
from sklearn.utils import class_weight
import torch.nn as nn
weights = class_weight.compute_class_weight(class_weight = 'balanced',classes = np.unique(train_batch_labels),y = train_batch_labels)
weights = np.exp(weights)/np.sum(np.exp(weights))
class_weights = torch.FloatTensor(weights).to(device)
loss_function = nn.CrossEntropyLoss(weight=class_weights, reduction='mean')

In [12]:
# Dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=16, shuffle=False)
loss_weighted = True

In [13]:
for epoch in range(30):
    train_preds = []
    train_labels = []
    total_train_loss = 0
    model.train()
    print("==========================================================")
    print("Epoch {}".format(epoch))
    print("Train")
    for batch in tqdm(train_loader):
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        if loss_weighted:
            loss = loss_function(outputs[1], labels)
        else:
            loss = outputs[0]
        loss.backward()
        optimizer.step()

        for logits in outputs[1].detach().cpu().numpy():
            train_preds.append(np.argmax(logits))
        for logits in labels.cpu().numpy():
            train_labels.append(logits)
        total_train_loss += loss.item()/len(train_loader)

    print("Dev")
    dev_preds = []
    model.eval()
    total_val_loss = 0
    with torch.set_grad_enabled(False):
        for batch in tqdm(dev_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            if loss_weighted:
                loss = loss_function(outputs[1], labels)
            else:
                loss = outputs[0]
            total_val_loss += loss.item()/len(dev_loader)

            for logits in outputs[1].cpu().numpy():
                dev_preds.append(np.argmax(logits))

    y_true = dev_batch_labels
    y_pred = dev_preds
    target_names = label_mapping.keys()
    train_report = classification_report(train_labels, train_preds, target_names=target_names)
    report = classification_report(y_true, y_pred, target_names=target_names)
    val_f1 = f1_score(y_true, y_pred, average='macro')

    if val_f1 > best_val_f1:
        PATH = 'finetuned_models/' + model_name + '.pth'
        torch.save(model.state_dict(), PATH)
        model.save_pretrained(os.path.join('finetuned_berts/', model_name))
        best_val_f1 = val_f1
        count = 0
    else:
        count += 1

    print(train_report)
    print(report)
    print("Epoch {}, Train Loss = {}, Val Loss = {}, Val F1 = {}, Best Val f1 = {}, stagnant = {}".format(epoch, total_train_loss, total_val_loss, val_f1, best_val_f1, count))
    if count == 5:
        print("No increase for 5 epochs, Stopping ...")
        break

Epoch 0
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:46<00:00,  4.71it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 21.11it/s]


               precision    recall  f1-score   support

Not_offensive       0.87      0.41      0.56     25425
    not-Tamil       0.07      0.80      0.13      1454
    Offensive       0.44      0.36      0.39      8260

     accuracy                           0.42     35139
    macro avg       0.46      0.52      0.36     35139
 weighted avg       0.74      0.42      0.50     35139

               precision    recall  f1-score   support

Not_offensive       0.92      0.56      0.69      3193
    not-Tamil       0.14      0.94      0.24       172
    Offensive       0.49      0.62      0.55      1023

     accuracy                           0.59      4388
    macro avg       0.52      0.70      0.49      4388
 weighted avg       0.79      0.59      0.64      4388

Epoch 0, Train Loss = 0.8800103211301468, Val Loss = 0.6014192890646782, Val F1 = 0.4939140101958632, Best Val f1 = 0.4939140101958632, stagnant = 0
Epoch 1
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:40<00:00,  4.77it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 21.15it/s]


               precision    recall  f1-score   support

Not_offensive       0.90      0.65      0.76     25425
    not-Tamil       0.23      0.90      0.37      1454
    Offensive       0.50      0.68      0.58      8260

     accuracy                           0.67     35139
    macro avg       0.55      0.74      0.57     35139
 weighted avg       0.78      0.67      0.70     35139

               precision    recall  f1-score   support

Not_offensive       0.89      0.76      0.82      3193
    not-Tamil       0.37      0.88      0.52       172
    Offensive       0.55      0.68      0.61      1023

     accuracy                           0.75      4388
    macro avg       0.61      0.77      0.65      4388
 weighted avg       0.79      0.75      0.76      4388

Epoch 1, Train Loss = 0.5331171129702588, Val Loss = 0.500526748807593, Val F1 = 0.6511876826401195, Best Val f1 = 0.6511876826401195, stagnant = 0
Epoch 2
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:40<00:00,  4.77it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 21.10it/s]


               precision    recall  f1-score   support

Not_offensive       0.91      0.70      0.79     25425
    not-Tamil       0.31      0.94      0.46      1454
    Offensive       0.53      0.71      0.61      8260

     accuracy                           0.72     35139
    macro avg       0.58      0.78      0.62     35139
 weighted avg       0.80      0.72      0.74     35139

               precision    recall  f1-score   support

Not_offensive       0.95      0.50      0.66      3193
    not-Tamil       0.18      0.97      0.31       172
    Offensive       0.45      0.79      0.57      1023

     accuracy                           0.59      4388
    macro avg       0.53      0.75      0.51      4388
 weighted avg       0.80      0.59      0.62      4388

Epoch 2, Train Loss = 0.4412118757385662, Val Loss = 0.5666332374360747, Val F1 = 0.5113287216443001, Best Val f1 = 0.6511876826401195, stagnant = 1
Epoch 3
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:40<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 21.12it/s]


               precision    recall  f1-score   support

Not_offensive       0.92      0.73      0.81     25425
    not-Tamil       0.38      0.95      0.54      1454
    Offensive       0.54      0.75      0.63      8260

     accuracy                           0.74     35139
    macro avg       0.61      0.81      0.66     35139
 weighted avg       0.81      0.74      0.76     35139

               precision    recall  f1-score   support

Not_offensive       0.94      0.52      0.67      3193
    not-Tamil       0.18      0.96      0.30       172
    Offensive       0.48      0.80      0.60      1023

     accuracy                           0.60      4388
    macro avg       0.53      0.76      0.52      4388
 weighted avg       0.81      0.60      0.64      4388

Epoch 3, Train Loss = 0.40328413091068444, Val Loss = 0.5462617726285346, Val F1 = 0.5237873837236088, Best Val f1 = 0.6511876826401195, stagnant = 2
Epoch 4
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:40<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:12<00:00, 21.16it/s]


               precision    recall  f1-score   support

Not_offensive       0.92      0.75      0.82     25425
    not-Tamil       0.41      0.97      0.57      1454
    Offensive       0.56      0.76      0.65      8260

     accuracy                           0.76     35139
    macro avg       0.63      0.82      0.68     35139
 weighted avg       0.82      0.76      0.77     35139

               precision    recall  f1-score   support

Not_offensive       0.89      0.80      0.84      3193
    not-Tamil       0.43      0.90      0.58       172
    Offensive       0.57      0.66      0.61      1023

     accuracy                           0.77      4388
    macro avg       0.63      0.78      0.68      4388
 weighted avg       0.80      0.77      0.78      4388

Epoch 4, Train Loss = 0.3747976782166211, Val Loss = 0.47594656323167434, Val F1 = 0.6797257563456803, Best Val f1 = 0.6797257563456803, stagnant = 0
Epoch 5
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:39<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 20.81it/s]


               precision    recall  f1-score   support

Not_offensive       0.93      0.76      0.84     25425
    not-Tamil       0.45      0.97      0.61      1454
    Offensive       0.58      0.79      0.67      8260

     accuracy                           0.78     35139
    macro avg       0.65      0.84      0.71     35139
 weighted avg       0.83      0.78      0.79     35139

               precision    recall  f1-score   support

Not_offensive       0.89      0.73      0.80      3193
    not-Tamil       0.24      0.94      0.38       172
    Offensive       0.58      0.63      0.60      1023

     accuracy                           0.71      4388
    macro avg       0.57      0.77      0.60      4388
 weighted avg       0.79      0.71      0.74      4388

Epoch 5, Train Loss = 0.34170618166949523, Val Loss = 0.4851590979810464, Val F1 = 0.5956447168028385, Best Val f1 = 0.6797257563456803, stagnant = 1
Epoch 6
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:39<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 21.09it/s]


               precision    recall  f1-score   support

Not_offensive       0.93      0.78      0.85     25425
    not-Tamil       0.50      0.98      0.66      1454
    Offensive       0.59      0.79      0.68      8260

     accuracy                           0.79     35139
    macro avg       0.68      0.85      0.73     35139
 weighted avg       0.83      0.79      0.80     35139

               precision    recall  f1-score   support

Not_offensive       0.93      0.67      0.78      3193
    not-Tamil       0.30      0.93      0.45       172
    Offensive       0.51      0.76      0.61      1023

     accuracy                           0.70      4388
    macro avg       0.58      0.79      0.61      4388
 weighted avg       0.80      0.70      0.72      4388

Epoch 6, Train Loss = 0.31508624821589354, Val Loss = 0.5552437703683973, Val F1 = 0.611004601500335, Best Val f1 = 0.6797257563456803, stagnant = 2
Epoch 7
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:39<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 21.12it/s]


               precision    recall  f1-score   support

Not_offensive       0.93      0.79      0.86     25425
    not-Tamil       0.53      0.98      0.69      1454
    Offensive       0.61      0.81      0.69      8260

     accuracy                           0.80     35139
    macro avg       0.69      0.86      0.75     35139
 weighted avg       0.84      0.80      0.81     35139

               precision    recall  f1-score   support

Not_offensive       0.90      0.80      0.85      3193
    not-Tamil       0.51      0.87      0.64       172
    Offensive       0.57      0.68      0.62      1023

     accuracy                           0.78      4388
    macro avg       0.66      0.78      0.70      4388
 weighted avg       0.80      0.78      0.79      4388

Epoch 7, Train Loss = 0.28196452151800344, Val Loss = 0.6016375110865656, Val F1 = 0.7019432968381318, Best Val f1 = 0.7019432968381318, stagnant = 0
Epoch 8
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:40<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 21.07it/s]


               precision    recall  f1-score   support

Not_offensive       0.94      0.79      0.86     25425
    not-Tamil       0.52      0.98      0.68      1454
    Offensive       0.61      0.81      0.70      8260

     accuracy                           0.81     35139
    macro avg       0.69      0.86      0.75     35139
 weighted avg       0.84      0.81      0.81     35139

               precision    recall  f1-score   support

Not_offensive       0.91      0.70      0.79      3193
    not-Tamil       0.21      0.95      0.35       172
    Offensive       0.55      0.63      0.59      1023

     accuracy                           0.69      4388
    macro avg       0.56      0.76      0.58      4388
 weighted avg       0.80      0.69      0.73      4388

Epoch 8, Train Loss = 0.28852368522207344, Val Loss = 0.6077516331591393, Val F1 = 0.5758198846541599, Best Val f1 = 0.7019432968381318, stagnant = 1
Epoch 9
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:39<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 21.13it/s]


               precision    recall  f1-score   support

Not_offensive       0.94      0.81      0.87     25425
    not-Tamil       0.59      0.99      0.74      1454
    Offensive       0.63      0.82      0.71      8260

     accuracy                           0.82     35139
    macro avg       0.72      0.87      0.77     35139
 weighted avg       0.85      0.82      0.83     35139

               precision    recall  f1-score   support

Not_offensive       0.91      0.74      0.82      3193
    not-Tamil       0.54      0.83      0.65       172
    Offensive       0.50      0.77      0.61      1023

     accuracy                           0.75      4388
    macro avg       0.65      0.78      0.69      4388
 weighted avg       0.80      0.75      0.76      4388

Epoch 9, Train Loss = 0.26302680736161754, Val Loss = 0.6321216509525073, Val F1 = 0.6923351946262932, Best Val f1 = 0.7019432968381318, stagnant = 2
Epoch 10
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:40<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 21.12it/s]


               precision    recall  f1-score   support

Not_offensive       0.95      0.82      0.88     25425
    not-Tamil       0.62      0.98      0.76      1454
    Offensive       0.63      0.83      0.72      8260

     accuracy                           0.83     35139
    macro avg       0.73      0.88      0.78     35139
 weighted avg       0.86      0.83      0.83     35139

               precision    recall  f1-score   support

Not_offensive       0.92      0.73      0.82      3193
    not-Tamil       0.57      0.81      0.67       172
    Offensive       0.51      0.79      0.62      1023

     accuracy                           0.75      4388
    macro avg       0.67      0.78      0.70      4388
 weighted avg       0.81      0.75      0.76      4388

Epoch 10, Train Loss = 0.2548949824828185, Val Loss = 0.6625015082853765, Val F1 = 0.7016481940165274, Best Val f1 = 0.7019432968381318, stagnant = 3
Epoch 11
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:39<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 21.13it/s]


               precision    recall  f1-score   support

Not_offensive       0.95      0.83      0.89     25425
    not-Tamil       0.68      0.99      0.80      1454
    Offensive       0.65      0.85      0.74      8260

     accuracy                           0.84     35139
    macro avg       0.76      0.89      0.81     35139
 weighted avg       0.87      0.84      0.85     35139

               precision    recall  f1-score   support

Not_offensive       0.88      0.85      0.86      3193
    not-Tamil       0.73      0.72      0.72       172
    Offensive       0.59      0.66      0.62      1023

     accuracy                           0.80      4388
    macro avg       0.73      0.74      0.74      4388
 weighted avg       0.81      0.80      0.80      4388

Epoch 11, Train Loss = 0.235747973533503, Val Loss = 1.0162223423577161, Val F1 = 0.7354817733720092, Best Val f1 = 0.7354817733720092, stagnant = 0
Epoch 12
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:39<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:12<00:00, 21.16it/s]


               precision    recall  f1-score   support

Not_offensive       0.95      0.83      0.89     25425
    not-Tamil       0.66      0.99      0.79      1454
    Offensive       0.65      0.85      0.74      8260

     accuracy                           0.84     35139
    macro avg       0.75      0.89      0.81     35139
 weighted avg       0.87      0.84      0.85     35139

               precision    recall  f1-score   support

Not_offensive       0.88      0.85      0.86      3193
    not-Tamil       0.65      0.76      0.70       172
    Offensive       0.59      0.64      0.62      1023

     accuracy                           0.80      4388
    macro avg       0.71      0.75      0.73      4388
 weighted avg       0.80      0.80      0.80      4388

Epoch 12, Train Loss = 0.2264156802471564, Val Loss = 0.9648495602463792, Val F1 = 0.7275699223329606, Best Val f1 = 0.7354817733720092, stagnant = 1
Epoch 13
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:39<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 21.13it/s]


               precision    recall  f1-score   support

Not_offensive       0.95      0.84      0.89     25425
    not-Tamil       0.67      0.99      0.80      1454
    Offensive       0.67      0.86      0.75      8260

     accuracy                           0.85     35139
    macro avg       0.76      0.90      0.81     35139
 weighted avg       0.88      0.85      0.86     35139

               precision    recall  f1-score   support

Not_offensive       0.90      0.80      0.84      3193
    not-Tamil       0.65      0.74      0.70       172
    Offensive       0.54      0.72      0.62      1023

     accuracy                           0.78      4388
    macro avg       0.70      0.75      0.72      4388
 weighted avg       0.80      0.78      0.79      4388

Epoch 13, Train Loss = 0.2090143829172508, Val Loss = 1.0425420778041539, Val F1 = 0.7191294430950991, Best Val f1 = 0.7354817733720092, stagnant = 2
Epoch 14
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:40<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:12<00:00, 21.18it/s]


               precision    recall  f1-score   support

Not_offensive       0.96      0.84      0.90     25425
    not-Tamil       0.69      0.99      0.81      1454
    Offensive       0.68      0.87      0.76      8260

     accuracy                           0.86     35139
    macro avg       0.77      0.90      0.82     35139
 weighted avg       0.88      0.86      0.86     35139

               precision    recall  f1-score   support

Not_offensive       0.90      0.78      0.84      3193
    not-Tamil       0.56      0.82      0.67       172
    Offensive       0.54      0.72      0.62      1023

     accuracy                           0.77      4388
    macro avg       0.67      0.77      0.71      4388
 weighted avg       0.80      0.77      0.78      4388

Epoch 14, Train Loss = 0.20633432141564143, Val Loss = 0.803391502392055, Val F1 = 0.7075316536819977, Best Val f1 = 0.7354817733720092, stagnant = 3
Epoch 15
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:39<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:12<00:00, 21.15it/s]


               precision    recall  f1-score   support

Not_offensive       0.96      0.85      0.90     25425
    not-Tamil       0.68      0.99      0.81      1454
    Offensive       0.69      0.87      0.77      8260

     accuracy                           0.86     35139
    macro avg       0.78      0.91      0.83     35139
 weighted avg       0.88      0.86      0.87     35139

               precision    recall  f1-score   support

Not_offensive       0.86      0.83      0.85      3193
    not-Tamil       0.28      0.91      0.43       172
    Offensive       0.66      0.50      0.57      1023

     accuracy                           0.75      4388
    macro avg       0.60      0.75      0.61      4388
 weighted avg       0.79      0.75      0.76      4388

Epoch 15, Train Loss = 0.19462832126047508, Val Loss = 0.7292213813427162, Val F1 = 0.6145358443326602, Best Val f1 = 0.7354817733720092, stagnant = 4
Epoch 16
Train


  if __name__ == "__main__":
  from ipykernel import kernelapp as app
100%|██████████| 2197/2197 [07:39<00:00,  4.78it/s]


Dev


100%|██████████| 275/275 [00:13<00:00, 21.14it/s]

               precision    recall  f1-score   support

Not_offensive       0.96      0.87      0.91     25425
    not-Tamil       0.78      1.00      0.87      1454
    Offensive       0.70      0.88      0.78      8260

     accuracy                           0.87     35139
    macro avg       0.81      0.91      0.86     35139
 weighted avg       0.89      0.87      0.88     35139

               precision    recall  f1-score   support

Not_offensive       0.90      0.76      0.82      3193
    not-Tamil       0.37      0.88      0.53       172
    Offensive       0.55      0.69      0.61      1023

     accuracy                           0.75      4388
    macro avg       0.61      0.78      0.65      4388
 weighted avg       0.80      0.75      0.76      4388

Epoch 16, Train Loss = 0.17818162232286827, Val Loss = 0.7078306015157563, Val F1 = 0.6523623885288946, Best Val f1 = 0.7354817733720092, stagnant = 5
No increase for 5 epochs, Stopping ...



