In [2]:
## Load The Data
import json
import os
#raw = json.load(open('./data/annotations_consistent.json'))

raw = json.load(open('./data/FinEntity.json'))
# raw = data.get("examples")



In [2]:
pip install transformers


Note: you may need to restart the kernel to use updated packages.


In [3]:
## Preparing Sequence Labeling Data for Transformers
from sequence_aligner.labelset import LabelSet
from sequence_aligner.dataset import TrainingDatasetCRF
from sequence_aligner.containers import TraingingBatch
from transformers import AutoTokenizer


tokenizer = AutoTokenizer.from_pretrained('mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis')
label_set = LabelSet(labels=["Neutral", "Positive", "Negative"])  # label in this dataset
print(label_set.ids_to_label)
print(len(label_set.ids_to_label.values()))
dataset = TrainingDatasetCRF(data=raw, tokenizer=tokenizer, label_set=label_set,tokens_per_batch = 128)
print(len(dataset))

{0: 'O', 1: 'B-Neutral', 2: 'I-Neutral', 3: 'L-Neutral', 4: 'U-Neutral', 5: 'B-Positive', 6: 'I-Positive', 7: 'L-Positive', 8: 'U-Positive', 9: 'B-Negative', 10: 'I-Negative', 11: 'L-Negative', 12: 'U-Negative'}
13
987


In [4]:
## Prepare train data and valid data
from torch.utils.data import DataLoader, random_split
import config
train_size = int(config.dev_split_size * len(dataset))
validate_size = len(dataset) - train_size
train_dataset, validate_dataset = random_split(dataset, [train_size, validate_size])

train_loader = DataLoader(train_dataset, batch_size=16, collate_fn=TraingingBatch, shuffle=True, )
val_loader = DataLoader(validate_dataset, batch_size=16, collate_fn=TraingingBatch, shuffle=True, )

print(dataset[1].input_ids)
print(dataset[1].labels)
print(dataset[1].attention_masks)
print(tokenizer.decode(dataset[1].input_ids))
print(dataset.label_set.ids_to_label)

[4148, 5, 1313, 526, 6, 16422, 1290, 16, 18142, 231, 207, 71, 10, 8600, 11, 3472, 3365, 8, 9805, 4403, 8003, 6083, 424, 7387, 16, 67, 62, 30, 231, 207, 71, 1963, 1451, 2113, 4, 1437, 1437, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
[0, 0, 0, 0, 0, 5, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1

In [5]:
## DistilBert+Crf
import warnings
from model.DistilBERT_crf import DistilBertCrfForNer
from seqeval import metrics
from transformers import get_linear_schedule_with_warmup
from seqeval.metrics import f1_score, precision_score, accuracy_score
from torch import cuda
import config
from util.train import train_epoch, valid_epoch
from  torch.optim import AdamW

warnings.filterwarnings('ignore')
# 'DistilRoBERta-base-cased' 'mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis'
label_set = LabelSet(labels=["Neutral", "Positive", "Negative"]) 
model = DistilBertCrfForNer.from_pretrained('mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis', num_labels=len(label_set.ids_to_label.values()))

device = 'cuda:0' if cuda.is_available() else 'cpu'
model.to(device)

len_dataset = len(train_dataset)
t_total = len(train_dataset)
# Prepare optimizer and schedule (linear warmup and decay)
no_decay = ["bias", "LayerNorm.weight"]
bert_param_optimizer = list(model.distilbert.named_parameters())
crf_param_optimizer = list(model.crf.named_parameters())
optimizer_grouped_parameters = [
        {'params': [p for n, p in bert_param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay': config.weight_decay, 'lr': config.lr_crf},
        {'params': [p for n, p in bert_param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0,
         'lr': config.lr_crf},

        {'params': [p for n, p in crf_param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay': config.weight_decay, 'lr': config.crf_learning_rate},
        {'params': [p for n, p in crf_param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0,
         'lr': config.crf_learning_rate},

    ]
optimizer = AdamW(optimizer_grouped_parameters, lr=config.lr, eps=1e-6)
warmup_steps = int(t_total * config.warm_up_ratio)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps,
                                                num_training_steps=t_total)

EPOCHS = config.epoch_num # training epoch
for e in range(EPOCHS):
    print("=======START TRAIN EPOCHS %d=======" %(e+1))
    train_loss = train_epoch(e, model, train_loader, optimizer, scheduler,device)
    valid_epoch(e, model, val_loader,device,label_set)



You are using a model of type roberta to instantiate a model of type distilbert. This is not supported for all configurations of models and can yield errors.
Some weights of DistilBertCrfForNer were not initialized from the model checkpoint at mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis and are newly initialized: ['classifier.bias', 'classifier.weight', 'crf.end_transitions', 'crf.start_transitions', 'crf.transitions', 'embeddings.LayerNorm.bias', 'embeddings.LayerNorm.weight', 'embeddings.position_embeddings.weight', 'embeddings.word_embeddings.weight', 'transformer.layer.0.attention.k_lin.bias', 'transformer.layer.0.attention.k_lin.weight', 'transformer.layer.0.attention.out_lin.bias', 'transformer.layer.0.attention.out_lin.weight', 'transformer.layer.0.attention.q_lin.bias', 'transformer.layer.0.attention.q_lin.weight', 'transformer.layer.0.attention.v_lin.bias', 'transformer.layer.0.attention.v_lin.weight', 'transformer.layer.0.ffn.lin1.bias', 'transformer.lay

Epoch: 1, train Loss:41.8305
              precision    recall  f1-score   support

    Negative       0.00      0.00      0.00       103
     Neutral       0.00      0.00      0.00       244
    Positive       0.00      0.00      0.00        93

   micro avg       0.00      0.00      0.00       440
   macro avg       0.00      0.00      0.00       440
weighted avg       0.00      0.00      0.00       440

Epoch: 1, train Loss:22.6151
Epoch: 2, train Loss:17.2639
              precision    recall  f1-score   support

    Negative       0.50      0.06      0.10       103
     Neutral       0.54      0.09      0.15       244
    Positive       0.51      0.20      0.29        93

   micro avg       0.52      0.10      0.17       440
   macro avg       0.52      0.12      0.18       440
weighted avg       0.52      0.10      0.17       440

Epoch: 2, train Loss:15.1513
Epoch: 3, train Loss:11.0553
              precision    recall  f1-score   support

    Negative       0.29      0.06     

KeyboardInterrupt: 