In [1]:
import utils_data as ut
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import BertForMaskedLM, BertTokenizer
tokenizerBERT = BertTokenizer.from_pretrained('pranav-s/MaterialsBERT', model_max_length=512)
modelBERT = BertForMaskedLM.from_pretrained('pranav-s/MaterialsBERT')

In [2]:
classes = {'POLYMER': 1,
           'ORGANIC': 2,
           'MONOMER': 3,
           'PROP_NAME': 4,
           'INORGANIC': 5,
           'MATERIAL_AMOUNT': 6,
           'POLYMER_FAMILY': 7,
           'PROP_VALUE': 8,
           'O': 0}
max_length = 512
batch_size = 3
class NERBERTModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.bert = modelBERT.base_model
        self.linear = nn.Linear(768, len(classes) + 1)
        
    def forward(self, token):
        encoder_output= self.bert(token)  # torch.LongTensor of shape (batch_size, sequence_length)
        linear_output = self.linear(encoder_output.last_hidden_state)
        class_output = F.softmax(linear_output, dim=2)
        return class_output

In [3]:
model = NERBERTModel()

In [4]:
num_data = 100
data_list = ut.read_data('train.json', max_length)[:num_data]

In [5]:
token_tensors_all_list = [ut.list2token(tokenizerBERT, d['words'], max_length) for d in data_list]
data = torch.cat(token_tensors_all_list, dim=0)
data_batches = ut.to_batches(data, batch_size)
target_tensors_all_list = [ut.cat2digit(classes, d['ner'], max_length) for d in data_list]
target = torch.stack(target_tensors_all_list, dim=0)
target_batches = ut.to_batches(target, batch_size)

In [6]:
criterion = nn.CrossEntropyLoss(weight=torch.tensor([0.1, 1, 1, 1, 1, 1, 1, 1, 1, 0.1]))
optimizer = torch.optim.Adam(model.parameters(), lr=0.0002)
torch.manual_seed(101)

<torch._C.Generator at 0x7fe2ccfe84d0>

In [7]:
epochs = 5
train_losses = []
for epoch in range(epochs):
    for b, X in enumerate(data_batches):
        y_pred = model(X)
        y_pred = torch.swapaxes(y_pred, 1, 2)
        y = target_batches[b]
        # Correct entity name prediction
#         
        loss = criterion(y_pred, y)
        #acc = ut.accuracy(0, len(classes), y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if b % 5 == 0:
            print(f'epoch: {epoch:2}  batch: {b:3}  loss: {loss.item():.6f} ') #  accuracy: {acc:.4f}
#             print(predicted_classes)
#             print(true_classes)
    train_losses.append(loss)
    

We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


epoch:  0  batch:   0  loss: 2.319016 
epoch:  0  batch:   5  loss: 2.003860 
epoch:  0  batch:  10  loss: 2.056751 
epoch:  0  batch:  15  loss: 2.050579 
epoch:  0  batch:  20  loss: 2.117112 
epoch:  0  batch:  25  loss: 2.175548 
epoch:  0  batch:  30  loss: 2.160239 
epoch:  1  batch:   0  loss: 2.083002 
epoch:  1  batch:   5  loss: 2.072974 


KeyboardInterrupt: 

In [None]:
# indices = ((0 < y) & (y < 9)).nonzero(as_tuple=True) 

In [None]:
# y[indices[0], indices[1]]

In [None]:
# y_pred[indices[0], :, indices[1]]

In [None]:
# indices = ((0 < y) & (y < len(classes))).nonzero(as_tuple=True)

In [None]:
# _, max_indices = y_pred[indices[0], :, indices[1]].max(dim=1)

In [None]:
# max_indices

In [None]:
# true = y[indices[0], indices[1]]

In [None]:
# torch.eq(max_indices, true).sum() / true.shape[0]

In [None]:
# true.shape