In [1]:
import torch
import Models
from transformers import BertForTokenClassification


nb_bert_pipe = Models.get_nb_bert(2, task="token-classification", model_type=BertForTokenClassification)
mbert_pipe = Models.get_mbert(2, task="token-classification", model_type=BertForTokenClassification)
nor_bert_pipe = Models.get_nor_bert(2, task="token-classification", model_type=BertForTokenClassification)


Some weights of the model checkpoint at NbAiLab/nb-bert-base were not used when initializing BertForTokenClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from 

In [2]:
train_val_test_text = []
labels = []
with open('./word_level_sentiment_polarity/positive.txt', 'r') as polarity_data:
    for line in polarity_data:
        train_val_test_text.append(line)
        labels.append(1)
with open('./word_level_sentiment_polarity/negative.txt', 'r') as polarity_data:
    for line in polarity_data:
        train_val_test_text.append(line)
        labels.append(0)

train_val_test_text = train_val_test_text[:10]
labels = labels[:10]

In [3]:
from sklearn.model_selection import train_test_split

train_text, val_test_text, train_labels, val_test_labels = train_test_split(train_val_test_text, labels, test_size=0.7, random_state=27)
val_text, test_text, val_labels, test_labels = train_test_split(val_test_text, val_test_labels, test_size=0.5, random_state=27)




In [4]:
""" Parse data into datasets """
from transformers import AutoTokenizer
nor_bert_tokenizer = AutoTokenizer.from_pretrained("ltgoslo/norbert")
nb_bert_tokenizer = AutoTokenizer.from_pretrained('NbAiLab/nb-bert-base')
mbert_tokenizer = AutoTokenizer.from_pretrained('bert-base-multilingual-cased')


nor_bert_train_encodings = nor_bert_tokenizer(train_text, truncation=True, padding=True)
nb_bert_train_encodings = nb_bert_tokenizer(train_text, truncation=True, padding=True)
mbert_train_encodings = mbert_tokenizer(train_text, truncation=True, padding=True)

nor_bert_val_encodings = nor_bert_tokenizer(val_text, truncation=True, padding=True)
nb_bert_val_encodings = nb_bert_tokenizer(val_text, truncation=True, padding=True)
mbert_val_encodings = mbert_tokenizer(val_text, truncation=True, padding=True)

nor_bert_test_encodings = nor_bert_tokenizer(test_text, truncation=True, padding=True)
nb_bert_test_encodings = nb_bert_tokenizer(test_text, truncation=True, padding=True)
mbert_test_encodings = mbert_tokenizer(test_text, truncation=True, padding=True)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


In [5]:
class SentinentPolarityDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {}
        for key, val in self.encodings.items():
            item[key] = torch.tensor(val[idx])
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


nor_bert_train_dataset = SentinentPolarityDataset(nor_bert_train_encodings, train_labels)
nb_bert_train_dataset = SentinentPolarityDataset(nb_bert_train_encodings, train_labels)
mbert_train_dataset = SentinentPolarityDataset(mbert_train_encodings, train_labels)

nor_bert_test_dataset = SentinentPolarityDataset(nor_bert_test_encodings, test_labels)
nb_bert_test_dataset = SentinentPolarityDataset(nb_bert_test_encodings, test_labels)
mbert_test_dataset = SentinentPolarityDataset(mbert_test_encodings, test_labels)



In [6]:
""" Tune models """
from torch.utils.data import DataLoader, TensorDataset
from transformers import AdamW
from tqdm import tqdm

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')



loader = DataLoader(nor_bert_train_dataset, batch_size=4, shuffle=True)
for batch in loader:
    print(batch)


{'input_ids': tensor([[  102,  7044,  1956,   103,     0,     0],
        [  102,  7044,   103,     0,     0,     0],
        [  102, 19148,  5599, 25522,  3811,   103]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 0, 0],
        [1, 1, 1, 0, 0, 0],
        [1, 1, 1, 1, 1, 1]]), 'labels': tensor([1, 1, 1])}


In [7]:
def tune(model, optim, dataset):
    loader = DataLoader(dataset, batch_size=4, shuffle=True)
    model.train()
    for epoch in range(3):
        for batch in tqdm(loader):
            print(batch['input_ids'].shape)
            print(batch['attention_mask'].shape)
            print(batch['labels'].shape)
            optim.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            outputs = model(input_ids, token_type_ids=None, attention_mask=attention_mask, labels=labels)
            loss = outputs[0]
            loss.backward()
            optim.step()
    model.eval()

def f1_score(TP, FP, FN):
    return TP / (TP + (0.5 * (FP + FN)))

def accuracy(TP, TN, FP, FN):
    return (TP + TN) / (TP + TN + FP + FN)
    
def eval(model, dataset):
    loader = DataLoader(dataset, batch_size=1, shuffle=True)
    model.eval()
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    for batch in tqdm(loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids, token_type_ids=None, attention_mask=attention_mask, labels=labels)
        if(outputs['logits'][0][0] > outputs['logits'][0][1]):
            # False prediction
            if labels[0] == 1:
                FN += 1
            else:
                TN += 1
        else:
            # True prediction
            if labels[0] == 1:
                TP += 1
            else:
                FP += 1
    return f1_score(TP, FP, FN), accuracy(TP, TN, FP, FN)

In [8]:
nor_bert_model=nor_bert_pipe.model
nor_bert_model.to(device)
nor_bert_optim = AdamW(nor_bert_model.parameters(), lr=5e-5)
tune(nor_bert_model, nor_bert_optim, nor_bert_train_dataset)
nor_bert_f1, nor_bert_accuracy = eval(nor_bert_model, nor_bert_test_dataset)

nb_bert_model = nb_bert_pipe.model
nb_bert_model.to(device)
nb_bert_optim = AdamW(nb_bert_model.parameters(), lr=5e-5)
tune(nb_bert_model, nb_bert_optim, nb_bert_train_dataset)
nb_bert_f1, nb_bert_accuracy = eval(nb_bert_model, nb_bert_test_dataset)


mbert_model = mbert_pipe.model
mbert_model.to(device)
mbert_optim = AdamW(mbert_model.parameters(), lr=5e-5)
tune(mbert_model, mbert_optim, mbert_train_dataset)
mbert_f1, mbert_accuracy = eval(mbert_model, mbert_test_dataset)


print('NorBert - F1 score: ', nor_bert_f1, ' Accuracy: ', nor_bert_accuracy)
print('NbBert - F1 score: ', nb_bert_f1, ' Accuracy: ', nb_bert_accuracy)
print('mBert - F1 score: ', mbert_f1, ' Accuracy: ', mbert_accuracy)

  0%|          | 0/1 [00:00<?, ?it/s]

torch.Size([3, 6])
torch.Size([3, 6])
torch.Size([3])


  0%|          | 0/1 [00:00<?, ?it/s]


RuntimeError: The size of tensor a (18) must match the size of tensor b (3) at non-singleton dimension 0