In [None]:
!pip install 'transformers[torch]' torch

In [130]:
import torch
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
import pandas as pd
import numpy as np
import random
import gc

from sklearn.metrics import accuracy_score, f1_score

def set_seeds(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

set_seeds(393)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def get_metrics(preds, labels):
    acc = accuracy_score(labels, preds)
    f1_micro = f1_score(labels, preds, average='micro')
    f1_macro = f1_score(labels, preds, average='macro')
    print ('jacc acc:{}, f1 micro score:{} f1 macro score:{}'.format(acc, f1_micro, f1_macro))
    return acc, f1_micro, f1_macro

In [131]:
tokenizer = AutoTokenizer.from_pretrained('/content/drive/MyDrive/data/abusexlmr')
model = AutoModelForSequenceClassification.from_pretrained('/content/drive/MyDrive/data/abusexlmr', num_labels=2)
model.cuda()

XLMRobertaForSequenceClassification(
  (roberta): XLMRobertaModel(
    (embeddings): XLMRobertaEmbeddings(
      (word_embeddings): Embedding(250002, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): XLMRobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x XLMRobertaLayer(
          (attention): XLMRobertaAttention(
            (self): XLMRobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): XLMRobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768,

In [138]:
test_dataset = pd.read_csv('/content/drive/MyDrive/data/telugu/macd_tel_test.csv')
accuracy = []
f1_micro = []
f1_macro = []
for i in range(0, 5000, 1000):
  test_text, test_labels = list(test_dataset.text)[i:i+1000], list(test_dataset.label_yn)[i:i+1000]
  test_encodings = tokenizer(test_text, truncation=True, padding=True, max_length=512, return_tensors='pt')
  test_encodings = test_encodings.to(device)
  gc.collect()
  torch.cuda.empty_cache()
  with torch.no_grad():
    outputs = model(**test_encodings)
  predicted_labels = torch.argmax(outputs.logits, dim=1).cpu().numpy()
  print(f'Batch {(i + 1000) // 1000}')
  a, f1m, f1M = get_metrics(predicted_labels, test_labels)
  accuracy.append(a)
  f1_micro.append(f1m)
  f1_macro.append(f1M)

Batch 1
jacc acc:0.939, f1 micro score:0.939 f1 macro score:0.9389950585997466
Batch 2
jacc acc:0.92, f1 micro score:0.92 f1 macro score:0.9196283615437785
Batch 3
jacc acc:0.924, f1 micro score:0.924 f1 macro score:0.9237434730433176
Batch 4
jacc acc:0.894, f1 micro score:0.894 f1 macro score:0.8931343885472325
Batch 5
jacc acc:0.905, f1 micro score:0.905 f1 macro score:0.9045455413222352


In [139]:
print(f'Accuracy: {np.mean(accuracy)}, F1 Micro: {np.mean(f1_micro)}, F1 Macro: {np.mean(f1_macro)}')


Accuracy: 0.9164, F1 Micro: 0.9164, F1 Macro: 0.9160093646112621
