![JohnSnowLabs](https://nlp.johnsnowlabs.com/assets/images/logo.png)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp-workshop/blob/master/finance-nlp/05.4.BertForTokenClassification_TrainEval.ipynb)

# BERT FOR TOKEN CLASSIFICATION - Training/Test Split and Evaluation
Using Hugging Face and importing it to Finance  NLP for scalability.

This is a transformer-based approach, which usually returns much bigger models (10x) compared to NerModel, but it can improve the performance over NerModel.

In this notebook we don't save the model, we just train and get metrics on test set. Please see next notebook to check how we finally train with all data and save the model in Spark NLP format.

# Installation

In [None]:
! pip -q install seqeval

In [None]:
! pip install transformers==4.8.1
! pip install pyspark==3.1.2
! pip install spark-nlp
! pip install spark-nlp-display

# Setting name of the project

In [None]:
PROJECT_NAME = 'financial_operations'

# Imports

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertConfig

from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

import sparknlp
from pyspark.sql import functions as F

from sparknlp.training import CoNLL
from google.colab import files

import pandas as pd
import numpy as np
from tqdm import tqdm, trange

import transformers
from transformers import BertForTokenClassification, TFBertForTokenClassification, AdamW
from transformers import get_linear_schedule_with_warmup

from sklearn.metrics import classification_report

## Setting up Torch

In [None]:
torch.__version__

'1.13.0+cu116'

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()

torch.cuda.get_device_name(0)

'Tesla T4'

# Check that files are available

In [None]:
! wget -q https://raw.githubusercontent.com/JohnSnowLabs/spark-nlp-workshop/master/finance-nlp/data/conll_noO.conll

In [None]:
! head -n 20 conll_noO.conll

head: cannot open 'conll_noO.conll' for reading: No such file or directory


# Creating folders for logs and checkpoints

In [None]:
! mkdir {PROJECT_NAME}

In [None]:
! mkdir {PROJECT_NAME}/logs

# Starting a Spark Session for SparkNLP

In [None]:
! pip install johnsnowlabs

In [None]:
from johnsnowlabs import nlp, finance

nlp.install(force_browser=True)

In [None]:
spark = nlp.start()

# Convert JSL conlls in dataframe format

In [None]:
def get_conll_df(pth):
  data = CoNLL().readDataset(spark, pth)
  data = data.withColumn("sentence_idx", F.monotonically_increasing_id())

  df = data.select('sentence_idx', F.explode(F.arrays_zip('token.result','label.result','pos.result')).alias("cols")) \
  .select('sentence_idx',
          F.expr("cols['0']").alias("word"),
          F.expr("cols['1']").alias("tag"),
          F.expr("cols['2']").alias("pos")).toPandas()
  return df

data_df = get_conll_df('./conll_noO.conll')

In [None]:
train_idx, test_idx = train_test_split(data_df['sentence_idx'].unique(), shuffle=True, random_state=42, train_size=0.85, test_size=0.15)

In [None]:
len(train_idx)

4268

In [None]:
len(train_idx)

4268

In [None]:
train_data_df = data_df[data_df['sentence_idx'].isin(train_idx)]
test_data_df = data_df[data_df['sentence_idx'].isin(test_idx)]

In [None]:
train_data_df

Unnamed: 0,sentence_idx,word,tag,pos
0,0,Exhibit,O,NN
1,0,10.6,O,NN
2,0,memorandum,B-DOC,NN
3,0,Between,O,NN
4,0,(hereinafter,B-PARTY,NN
...,...,...,...,...
98359,8589937102,.,O,NN
98360,8589937102,Language,O,NN
98361,8589937102,and,O,NN
98362,8589937102,propietary,B-ROLE,NN


In [None]:
test_data_df

Unnamed: 0,sentence_idx,word,tag,pos
93,8,ARTICLE,O,NN
94,8,IV,O,NN
95,8,DUTIES,O,NN
96,8,AS,O,NN
97,8,WATER,B-PARTY,NN
...,...,...,...,...
98334,8589937099,determined,O,NN
98335,8589937099,to,O,NN
98336,8589937099,be,O,NN
98337,8589937099,void,O,NN


## Checking the DF looks good

In [None]:
train_data_df.head(25)

Unnamed: 0,sentence_idx,word,tag,pos
0,0,Exhibit,O,NN
1,0,10.6,O,NN
2,0,memorandum,B-DOC,NN
3,0,Between,O,NN
4,0,(hereinafter,B-PARTY,NN
5,0,collectively,I-PARTY,NN
6,0,called,I-PARTY,NN
7,0,"""Parties""",I-PARTY,NN
8,0,and,I-PARTY,NN
9,0,individually,I-PARTY,NN


In [None]:
test_data_df.head(25)

Unnamed: 0,sentence_idx,word,tag,pos
93,8,ARTICLE,O,NN
94,8,IV,O,NN
95,8,DUTIES,O,NN
96,8,AS,O,NN
97,8,WATER,B-PARTY,NN
98,8,"NOW,",I-PARTY,NN
99,8,INC.,I-PARTY,NN
100,8,9,O,NN
126,12,6.6,O,NN
127,12,PRODUCT,B-DOC,NN


In [None]:
print (train_data_df.shape)

(83262, 4)


In [None]:
print (test_data_df.shape)

(15102, 4)


In [None]:
train_data_df['tag'].value_counts()

O            60545
I-PARTY      10349
B-PARTY       4894
I-DOC         2730
B-DOC         1689
B-DATE        1527
B-LAW          659
B-ROLE         282
B-LOC          221
B-ORDINAL      132
B-PERCENT      116
B-PERSON        86
I-EFFDATE       17
B-EFFDATE       15
Name: tag, dtype: int64

In [None]:
test_data_df['tag'].value_counts()

O            11125
I-PARTY       1748
B-PARTY        910
I-DOC          473
B-DOC          318
B-DATE         269
B-LAW          123
B-LOC           38
B-ROLE          34
B-PERCENT       20
B-ORDINAL       19
I-EFFDATE       16
B-PERSON         6
B-EFFDATE        3
Name: tag, dtype: int64

# First, train / fine-tune a model on the dataset

## Iterating function to feed the model with sentences
Converting conll sentence annotations to tuples (word, pos, tag)

In [None]:
## convert conll file to sentences

class SentenceGetter(object):
    
    def __init__(self, dataset):
        self.n_sent = 1
        self.dataset = dataset
        self.empty = False
        agg_func = lambda s: [(w,p, t) for w,p, t in zip(s["word"].values.tolist(),
                                                       s['pos'].values.tolist(),
                                                        s["tag"].values.tolist())]
        self.grouped = self.dataset.groupby("sentence_idx").apply(agg_func)
        self.sentences = [s for s in self.grouped]
    
    def get_next(self):
        try:
            s = self.grouped["Sentence: {}".format(self.n_sent)]
            self.n_sent += 1
            return s
        except:
            return None

train_getter = SentenceGetter(train_data_df)
test_getter = SentenceGetter(test_data_df)

## Getting sentences and labels
- Sentences: concatenation of first element of tuple (word)
- Labels: concatenation of second element of tuple (label)

In [None]:
# Sentences 
train_sentences = [[word[0] for word in sentence] for sentence in train_getter.sentences]
print("Example of train sentence:")
print (train_sentences[5])

test_sentences = [[word[0] for word in sentence] for sentence in test_getter.sentences]
print("Example of test sentence:")
print (test_sentences[5])

# Labels
train_labels = [[s[2] for s in sentence] for sentence in train_getter.sentences]
print("Example of train sentence:")
print(train_labels[5])

test_labels = [[s[2] for s in sentence] for sentence in test_getter.sentences]
print("Example of test sentence:")
print(test_labels[5])

Example of train sentence:
['3.2', '__________', '("Professional")', 'Default', '7']
Example of test sentence:
['on', 'which', 'commercial', 'banks', 'in', 'Dallas', ',']
Example of train sentence:
['O', 'B-PARTY', 'I-PARTY', 'O', 'O']
Example of test sentence:
['O', 'O', 'O', 'O', 'O', 'B-LOC', 'O']


## Converting tags to numeric values with a dict

In [None]:
tag_values = list(set(train_data_df["tag"].values))
tag_values.append("PAD")
tag2idx = {t: i for i, t in enumerate(tag_values)}

In [None]:
print(tag_values[:10])
print(tag2idx)

['O', 'I-PARTY', 'B-PERSON', 'B-PERCENT', 'B-LAW', 'I-DOC', 'B-ROLE', 'B-LOC', 'B-PARTY', 'B-EFFDATE']
{'O': 0, 'I-PARTY': 1, 'B-PERSON': 2, 'B-PERCENT': 3, 'B-LAW': 4, 'I-DOC': 5, 'B-ROLE': 6, 'B-LOC': 7, 'B-PARTY': 8, 'B-EFFDATE': 9, 'B-DOC': 10, 'B-ORDINAL': 11, 'B-DATE': 12, 'I-EFFDATE': 13, 'PAD': 14}


## Model metadata

### Bulding on top of biobert

In [None]:
MODEL_TO_TRAIN = 'yiyanghkust/finbert-pretrain'

### Hyperparam settings

In [None]:
# Defining some key variables that will be used later on in the training
MAX_LEN = 256
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 32
EPOCHS = 15
LEARNING_RATE = 2e-05

## Instantiating the proper tokenizer

In [None]:
tokenizer = BertTokenizer.from_pretrained(MODEL_TO_TRAIN, do_lower_case=False)

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

### Tokenize and extend the labels in case a word is split

In [None]:
def tokenize_and_preserve_labels(sentence, text_labels):
    tokenized_sentence = []
    labels = []

    for word, label in zip(sentence, text_labels):

        # Tokenize the word and count # of subwords the word is broken into
        tokenized_word = tokenizer.tokenize(word)
        n_subwords = len(tokenized_word)

        # Add the tokenized word to the final tokenized word list
        tokenized_sentence.extend(tokenized_word)

        # Add the same label to the new list of labels `n_subwords` times
        labels.extend([label] * n_subwords)

    return tokenized_sentence, labels

## Tokenize and get tokens and labels

In [None]:
train_tokenized_texts_and_labels = [
    tokenize_and_preserve_labels(sent, labs)
    for sent, labs in zip(train_sentences, train_labels)
]

test_tokenized_texts_and_labels = [
    tokenize_and_preserve_labels(sent, labs)
    for sent, labs in zip(test_sentences, test_labels)
]

train_tokenized_texts_tokens = [token_label_pair[0] for token_label_pair in train_tokenized_texts_and_labels]
test_tokenized_texts_tokens = [token_label_pair[0] for token_label_pair in test_tokenized_texts_and_labels]

train_tokenized_texts_labels = [token_label_pair[1] for token_label_pair in train_tokenized_texts_and_labels]
test_tokenized_texts_labels = [token_label_pair[1] for token_label_pair in test_tokenized_texts_and_labels]

In [None]:
print(train_tokenized_texts_tokens[5])
print(train_tokenized_texts_labels[5])

['3', '.', '2', '_', '_', '_', '_', '_', '_', '_', '_', '_', '_', '(', '"', 'P', '##ro', '##fes', '##sion', '##al', '"', ')', 'D', '##ef', '##aul', '##t', '7']
['O', 'O', 'O', 'B-PARTY', 'B-PARTY', 'B-PARTY', 'B-PARTY', 'B-PARTY', 'B-PARTY', 'B-PARTY', 'B-PARTY', 'B-PARTY', 'B-PARTY', 'I-PARTY', 'I-PARTY', 'I-PARTY', 'I-PARTY', 'I-PARTY', 'I-PARTY', 'I-PARTY', 'I-PARTY', 'I-PARTY', 'O', 'O', 'O', 'O', 'O']


In [None]:
print(test_tokenized_texts_tokens[5])
print(test_tokenized_texts_labels[5])

['on', 'which', 'commercial', 'banks', 'in', 'D', '##all', '##as', ',']
['O', 'O', 'O', 'O', 'O', 'B-LOC', 'B-LOC', 'B-LOC', 'O']


## Converting tokens to id && padding sentences to have fixed length

In [None]:
train_input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in train_tokenized_texts_tokens],
                          maxlen=MAX_LEN, dtype="long", value=0.0,
                          truncating="post", padding="post")

test_input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in test_tokenized_texts_tokens],
                          maxlen=MAX_LEN, dtype="long", value=0.0,
                          truncating="post", padding="post")

train_tags = pad_sequences([[tag2idx.get(l) for l in lab] for lab in train_tokenized_texts_labels],
                     maxlen=MAX_LEN, value=tag2idx["PAD"], padding="post",
                     dtype="long", truncating="post")

test_tags = pad_sequences([[tag2idx.get(l) for l in lab] for lab in test_tokenized_texts_labels],
                     maxlen=MAX_LEN, value=tag2idx["PAD"], padding="post",
                     dtype="long", truncating="post")

In [None]:
print(train_input_ids[5])
print(test_input_ids[5])
print(train_tags[5])
print(test_tags[5])

[  547    48   513 30765 30765 30765 30765 30765 30765 30765 30765 30765
 30765   333  3699 30842  3602 23010  5729   979  3699   765 30708  9921
 21177   463  1181     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0   

## Now that sentences are padded, I need to prevent attention from seeing pads (id=0)

In [None]:
train_attention_masks = [[float(i != 0.0) for i in ii] for ii in train_input_ids]
test_attention_masks = [[float(i != 0.0) for i in ii] for ii in test_input_ids]

In [None]:
print(train_attention_masks[5])
print(test_attention_masks[5])

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

### Double checking that pairing input-mask is in place

In [None]:
for i,m in zip(train_input_ids[5], train_attention_masks[5]):
  print(f"Token id: {i}\Token mask: {m}")

Token id: 547\Token mask: 1.0
Token id: 48\Token mask: 1.0
Token id: 513\Token mask: 1.0
Token id: 30765\Token mask: 1.0
Token id: 30765\Token mask: 1.0
Token id: 30765\Token mask: 1.0
Token id: 30765\Token mask: 1.0
Token id: 30765\Token mask: 1.0
Token id: 30765\Token mask: 1.0
Token id: 30765\Token mask: 1.0
Token id: 30765\Token mask: 1.0
Token id: 30765\Token mask: 1.0
Token id: 30765\Token mask: 1.0
Token id: 333\Token mask: 1.0
Token id: 3699\Token mask: 1.0
Token id: 30842\Token mask: 1.0
Token id: 3602\Token mask: 1.0
Token id: 23010\Token mask: 1.0
Token id: 5729\Token mask: 1.0
Token id: 979\Token mask: 1.0
Token id: 3699\Token mask: 1.0
Token id: 765\Token mask: 1.0
Token id: 30708\Token mask: 1.0
Token id: 9921\Token mask: 1.0
Token id: 21177\Token mask: 1.0
Token id: 463\Token mask: 1.0
Token id: 1181\Token mask: 1.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token

In [None]:
for i,m in zip(test_input_ids[5], test_attention_masks[5]):
  print(f"Token id: {i}\Token mask: {m}")

Token id: 19\Token mask: 1.0
Token id: 31\Token mask: 1.0
Token id: 256\Token mask: 1.0
Token id: 1352\Token mask: 1.0
Token id: 10\Token mask: 1.0
Token id: 30708\Token mask: 1.0
Token id: 5678\Token mask: 1.0
Token id: 3015\Token mask: 1.0
Token id: 585\Token mask: 1.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0
Token id: 0\Token mask: 0.0


## Arrays to tensors transformation

In [None]:
tr_inputs = torch.tensor(train_input_ids)
val_inputs = torch.tensor(test_input_ids)
tr_tags = torch.tensor(train_tags)
val_tags = torch.tensor(test_tags)
tr_masks = torch.tensor(train_attention_masks)
val_masks = torch.tensor(test_attention_masks)

In [None]:
print(tr_inputs[5])
print(tr_tags[5])
print(tr_masks[5])

tensor([  547,    48,   513, 30765, 30765, 30765, 30765, 30765, 30765, 30765,
        30765, 30765, 30765,   333,  3699, 30842,  3602, 23010,  5729,   979,
         3699,   765, 30708,  9921, 21177,   463,  1181,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0, 

In [None]:
print(val_inputs[5])
print(val_tags[5])
print(val_masks[5])

tensor([   19,    31,   256,  1352,    10, 30708,  5678,  3015,   585,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0, 

### Checking sizes match

#### Training

In [None]:
len([x for x in tr_inputs[5] if x != 0]) # How many NO_PADs we have?

27

In [None]:
len([x for x in tr_tags[5] if x != 7])

256

In [None]:
len([x for x in tr_masks[5] if x != 0])

27

#### Test

In [None]:
len([x for x in val_inputs[5] if x != 0]) # How many NO_PADs we have?

9

In [None]:
len([x for x in val_tags[5] if x != 7])

253

In [None]:
len([x for x in val_masks[5] if x != 0])

9

## Creating the DataLoaders to feed the batches during training

In [None]:
train_data = TensorDataset(tr_inputs, tr_masks, tr_tags)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=TRAIN_BATCH_SIZE)

valid_data = TensorDataset(val_inputs, val_masks, val_tags)
valid_sampler = SequentialSampler(valid_data)
valid_dataloader = DataLoader(valid_data, sampler=valid_sampler, batch_size=TRAIN_BATCH_SIZE)

# Loading the transformer model

In [None]:
transformers.__version__

'4.8.1'

In [None]:
model = BertForTokenClassification.from_pretrained(
    MODEL_TO_TRAIN,
    num_labels=len(tag2idx),
    output_attentions = False,
    output_hidden_states = False
)
model.to(device)

Downloading:   0%|          | 0.00/359 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/442M [00:00<?, ?B/s]

Some weights of the model checkpoint at yiyanghkust/finbert-pretrain were not used when initializing BertForTokenClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at 

BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30873, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwis

## Setting up the optimizer.
We want to optimize weight values, so we add a decay.
We can get all the weights from `model_named_parameters()`
But we need to remove `bias`, `gamma` and `beta` which are Layer Normalization parameters we don't want to touch.

Activate `FULL_TINETUNING` to modify weights in all the layers.

In [None]:
FULL_FINETUNING = True
if FULL_FINETUNING:
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
         'weight_decay_rate': 0.0}
    ]
else:
    param_optimizer = list(model.classifier.named_parameters())
    optimizer_grouped_parameters = [{"params": [p for n, p in param_optimizer]}]

optimizer = AdamW(
    optimizer_grouped_parameters,
    lr=3e-5,
    eps=1e-8
)


## Setting up the scheduler
It will manage Optimizer and Learning Rate changes. We use warmup

In [None]:
epochs = 15
max_grad_norm = 1.0

# Total number of training steps is number of batches * number of epochs.
total_steps = len(train_dataloader) * epochs

# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
)


Now, let's train

In [None]:
## Store the average loss after each epoch so we can plot them.
loss_values, validation_loss_values = [], []

for EPOCH in trange(epochs, desc="Epoch"):
    # Put the model into training mode.
    model.train()
    # Reset the total loss for this epoch.
    total_loss = 0

    # Training loop
    for step, batch in enumerate(train_dataloader):
        # add batch to gpu
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch
        # Always clear any previously calculated gradients before performing a backward pass.
        model.zero_grad()
        # forward pass
        # This will return the loss (rather than the model output)
        # because we have provided the `labels`.
        outputs = model(b_input_ids, token_type_ids=None,
                        attention_mask=b_input_mask, labels=b_labels)
        # get the loss
        loss = outputs[0]
        # Perform a backward pass to calculate the gradients.
        loss.backward()
        # track train loss
        total_loss += loss.item()
        # Clip the norm of the gradient
        # This is to help prevent the "exploding gradients" problem.
        torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=max_grad_norm)
        # update parameters
        optimizer.step()
        # Update the learning rate.
        scheduler.step()

    # Calculate the average loss over the training data.
    avg_train_loss = total_loss / len(train_dataloader)
    tr_loss = f"Average train loss: {str(avg_train_loss)}\n"

    # Saving partial models (this creates the folder too)
    if step > epochs - 5:

      tokenizer.save_pretrained(f'{PROJECT_NAME}/{str(EPOCH)}/tokenizer/')
      model.save_pretrained(save_directory=f'{PROJECT_NAME}/{str(EPOCH)}/',
                            save_config=True, state_dict=model.state_dict)
      # Saving checkpoint in case it crashes, to restore work
      torch.save({
          'epoch': EPOCH,
          'model_state_dict': model.state_dict(),
          'optimizer_state_dict': optimizer.state_dict(),
          'loss': avg_train_loss,
          }, f'{PROJECT_NAME}/{str(EPOCH)}/checkpoint.pth')
    else:
      print("Skipping saving the model. Too early")

    # Store the loss value for plotting the learning curve.
    loss_values.append(avg_train_loss)

    # Put the model into evaluation mode
    model.eval()
    # Reset the validation loss for this epoch.
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps, nb_eval_examples = 0, 0
    predictions , true_labels = [], []
    for batch in valid_dataloader:
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_input_mask, b_labels = batch

        # Telling the model not to compute or store gradients,
        # saving memory and speeding up validation
        with torch.no_grad():
            # Forward pass, calculate logit predictions.
            # This will return the logits rather than the loss because we have not provided labels.
            outputs = model(b_input_ids, token_type_ids=None,
                            attention_mask=b_input_mask, labels=b_labels)
        # Move logits and labels to CPU
        logits = outputs[1].detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        # Calculate the accuracy for this batch of test sentences.
        eval_loss += outputs[0].mean().item()
        predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
        true_labels.extend(label_ids)

    eval_loss = eval_loss / len(valid_dataloader)
    validation_loss_values.append(eval_loss)

    val_loss = f"Validation loss: {str(eval_loss)}\n"
    
    # Saving losses log
    with open(f'{PROJECT_NAME}/logs/epoch_' + str(EPOCH) + '_loss.log', 'a') as f:
      f.write(tr_loss)
      f.write(val_loss)

    # Calculating metrics
    pred_tags = [tag_values[p_i] for p, l in zip(predictions, true_labels)
                                 for p_i, l_i in zip(p, l) if tag_values[l_i] != "PAD"]
    valid_tags = [tag_values[l_i] for l in true_labels
                                  for l_i in l if tag_values[l_i] != "PAD"]
    
    report = classification_report(valid_tags, pred_tags)
    
    # Saving metrics
    with open(f'{PROJECT_NAME}/logs/epoch_' + str(EPOCH) + '_metrics.log', 'a') as f:
      f.write(report)

    # Printing also to stdout
    print(tr_loss)
    print(val_loss)
    print(report)


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:   7%|▋         | 1/15 [03:06<43:24, 186.02s/it]

Average train loss: 0.6661555767059326

Validation loss: 0.4082468195507924

              precision    recall  f1-score   support

      B-DATE       0.72      0.77      0.74       291
       B-DOC       0.80      0.55      0.65       610
   B-EFFDATE       0.00      0.00      0.00         7
       B-LAW       0.71      0.56      0.63       282
       B-LOC       0.00      0.00      0.00        77
   B-ORDINAL       0.00      0.00      0.00        21
     B-PARTY       0.72      0.70      0.71      1994
   B-PERCENT       1.00      0.64      0.78        28
    B-PERSON       0.00      0.00      0.00        17
      B-ROLE       1.00      0.14      0.24        44
       I-DOC       0.70      0.61      0.66       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.90      0.94      0.92      4273
           O       0.92      0.94      0.93     12945

    accuracy                           0.88     21349
   macro avg       0.53      0.42      0.45     21349
wei

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:  13%|█▎        | 2/15 [06:15<40:43, 187.96s/it]

Average train loss: 0.32737147240941206

Validation loss: 0.32456328223148984

              precision    recall  f1-score   support

      B-DATE       0.76      0.82      0.79       291
       B-DOC       0.87      0.68      0.76       610
   B-EFFDATE       1.00      0.43      0.60         7
       B-LAW       0.84      0.64      0.73       282
       B-LOC       0.35      0.30      0.32        77
   B-ORDINAL       1.00      0.38      0.55        21
     B-PARTY       0.79      0.74      0.77      1994
   B-PERCENT       0.88      0.82      0.85        28
    B-PERSON       0.00      0.00      0.00        17
      B-ROLE       0.97      0.64      0.77        44
       I-DOC       0.76      0.75      0.76       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.93      0.95      0.94      4273
           O       0.93      0.95      0.94     12945

    accuracy                           0.91     21349
   macro avg       0.72      0.58      0.63     21349
w

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:  20%|██        | 3/15 [09:24<37:43, 188.59s/it]

Average train loss: 0.22690292497846618

Validation loss: 0.3047486152499914

              precision    recall  f1-score   support

      B-DATE       0.75      0.83      0.79       291
       B-DOC       0.93      0.73      0.82       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.84      0.69      0.76       282
       B-LOC       0.40      0.48      0.44        77
   B-ORDINAL       0.92      0.52      0.67        21
     B-PARTY       0.75      0.82      0.78      1994
   B-PERCENT       0.88      0.79      0.83        28
    B-PERSON       0.00      0.00      0.00        17
      B-ROLE       0.97      0.84      0.90        44
       I-DOC       0.88      0.76      0.81       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.95      0.95      0.95      4273
           O       0.94      0.95      0.94     12945

    accuracy                           0.91     21349
   macro avg       0.73      0.65      0.68     21349
we

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:  27%|██▋       | 4/15 [12:33<34:37, 188.85s/it]

Average train loss: 0.17406717376477682

Validation loss: 0.3034484824165702

              precision    recall  f1-score   support

      B-DATE       0.81      0.78      0.79       291
       B-DOC       0.92      0.77      0.84       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.76      0.78      0.77       282
       B-LOC       0.47      0.51      0.49        77
   B-ORDINAL       0.92      0.52      0.67        21
     B-PARTY       0.77      0.78      0.78      1994
   B-PERCENT       0.88      0.82      0.85        28
    B-PERSON       0.36      0.24      0.29        17
      B-ROLE       0.95      0.86      0.90        44
       I-DOC       0.92      0.75      0.83       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.95      0.95      0.95      4273
           O       0.94      0.95      0.95     12945

    accuracy                           0.92     21349
   macro avg       0.76      0.67      0.71     21349
we

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:  33%|███▎      | 5/15 [15:43<31:30, 189.10s/it]

Average train loss: 0.13843781617817594

Validation loss: 0.3172941567997138

              precision    recall  f1-score   support

      B-DATE       0.78      0.82      0.80       291
       B-DOC       0.89      0.78      0.83       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.84      0.70      0.76       282
       B-LOC       0.47      0.61      0.53        77
   B-ORDINAL       0.85      0.52      0.65        21
     B-PARTY       0.82      0.76      0.79      1994
   B-PERCENT       0.88      1.00      0.93        28
    B-PERSON       0.43      0.71      0.53        17
      B-ROLE       0.95      0.86      0.90        44
       I-DOC       0.88      0.78      0.83       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.94      0.97      0.95      4273
           O       0.94      0.96      0.95     12945

    accuracy                           0.92     21349
   macro avg       0.76      0.73      0.74     21349
we

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:  40%|████      | 6/15 [18:52<28:22, 189.16s/it]

Average train loss: 0.11775353534230545

Validation loss: 0.33558829439183074

              precision    recall  f1-score   support

      B-DATE       0.80      0.83      0.81       291
       B-DOC       0.92      0.79      0.85       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.81      0.72      0.76       282
       B-LOC       0.63      0.47      0.54        77
   B-ORDINAL       0.85      0.52      0.65        21
     B-PARTY       0.84      0.77      0.80      1994
   B-PERCENT       0.90      0.93      0.91        28
    B-PERSON       0.71      0.59      0.65        17
      B-ROLE       0.97      0.86      0.92        44
       I-DOC       0.88      0.79      0.83       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.92      0.97      0.95      4273
           O       0.94      0.96      0.95     12945

    accuracy                           0.92     21349
   macro avg       0.80      0.71      0.75     21349
w

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:  47%|████▋     | 7/15 [22:01<25:13, 189.18s/it]

Average train loss: 0.10042216404994476

Validation loss: 0.33176160665849846

              precision    recall  f1-score   support

      B-DATE       0.76      0.85      0.80       291
       B-DOC       0.92      0.81      0.86       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.86      0.75      0.80       282
       B-LOC       0.56      0.57      0.56        77
   B-ORDINAL       0.85      0.52      0.65        21
     B-PARTY       0.81      0.79      0.80      1994
   B-PERCENT       0.90      0.93      0.91        28
    B-PERSON       0.69      0.65      0.67        17
      B-ROLE       0.97      0.86      0.92        44
       I-DOC       0.83      0.82      0.82       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.96      0.95      0.96      4273
           O       0.94      0.96      0.95     12945

    accuracy                           0.92     21349
   macro avg       0.79      0.73      0.75     21349
w

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:  53%|█████▎    | 8/15 [25:11<22:04, 189.25s/it]

Average train loss: 0.09047526877317856

Validation loss: 0.33075979600350064

              precision    recall  f1-score   support

      B-DATE       0.79      0.82      0.81       291
       B-DOC       0.87      0.82      0.85       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.79      0.79      0.79       282
       B-LOC       0.56      0.58      0.57        77
   B-ORDINAL       0.85      0.52      0.65        21
     B-PARTY       0.83      0.78      0.80      1994
   B-PERCENT       0.90      1.00      0.95        28
    B-PERSON       0.50      0.71      0.59        17
      B-ROLE       0.97      0.86      0.92        44
       I-DOC       0.87      0.81      0.84       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.97      0.95      0.96      4273
           O       0.94      0.96      0.95     12945

    accuracy                           0.93     21349
   macro avg       0.77      0.74      0.75     21349
w

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:  60%|██████    | 9/15 [28:20<18:55, 189.25s/it]

Average train loss: 0.07959265198641971

Validation loss: 0.35898341704159975

              precision    recall  f1-score   support

      B-DATE       0.80      0.84      0.82       291
       B-DOC       0.91      0.81      0.86       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.76      0.78      0.77       282
       B-LOC       0.51      0.60      0.55        77
   B-ORDINAL       0.85      0.52      0.65        21
     B-PARTY       0.83      0.79      0.81      1994
   B-PERCENT       0.90      1.00      0.95        28
    B-PERSON       0.58      0.65      0.61        17
      B-ROLE       1.00      0.86      0.93        44
       I-DOC       0.87      0.80      0.84       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.97      0.95      0.96      4273
           O       0.94      0.96      0.95     12945

    accuracy                           0.93     21349
   macro avg       0.78      0.74      0.75     21349
w

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:  67%|██████▋   | 10/15 [31:29<15:46, 189.27s/it]

Average train loss: 0.07113950391675332

Validation loss: 0.35835727738837403

              precision    recall  f1-score   support

      B-DATE       0.80      0.83      0.81       291
       B-DOC       0.95      0.81      0.87       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.81      0.77      0.79       282
       B-LOC       0.54      0.58      0.56        77
   B-ORDINAL       0.85      0.52      0.65        21
     B-PARTY       0.82      0.80      0.81      1994
   B-PERCENT       0.90      0.96      0.93        28
    B-PERSON       0.55      0.65      0.59        17
      B-ROLE       0.93      0.86      0.89        44
       I-DOC       0.90      0.81      0.85       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.95      0.96      0.95      4273
           O       0.94      0.96      0.95     12945

    accuracy                           0.93     21349
   macro avg       0.78      0.73      0.75     21349
w

Epoch:  73%|███████▎  | 11/15 [34:39<12:37, 189.29s/it]

Average train loss: 0.06599384129269799

Validation loss: 0.37251468437413376

              precision    recall  f1-score   support

      B-DATE       0.82      0.81      0.81       291
       B-DOC       0.91      0.82      0.86       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.77      0.77      0.77       282
       B-LOC       0.64      0.61      0.62        77
   B-ORDINAL       0.85      0.52      0.65        21
     B-PARTY       0.79      0.83      0.81      1994
   B-PERCENT       0.90      0.96      0.93        28
    B-PERSON       0.50      0.65      0.56        17
      B-ROLE       0.97      0.86      0.92        44
       I-DOC       0.88      0.82      0.85       736
   I-EFFDATE       1.00      0.04      0.08        24
     I-PARTY       0.96      0.96      0.96      4273
           O       0.95      0.95      0.95     12945

    accuracy                           0.93     21349
   macro avg       0.85      0.74      0.76     21349
w

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:  80%|████████  | 12/15 [37:48<09:27, 189.32s/it]

Average train loss: 0.06072392664265944

Validation loss: 0.38432850906004506

              precision    recall  f1-score   support

      B-DATE       0.82      0.81      0.81       291
       B-DOC       0.90      0.82      0.86       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.77      0.79      0.78       282
       B-LOC       0.62      0.61      0.61        77
   B-ORDINAL       0.86      0.57      0.69        21
     B-PARTY       0.81      0.82      0.81      1994
   B-PERCENT       0.90      0.96      0.93        28
    B-PERSON       0.50      0.65      0.56        17
      B-ROLE       0.93      0.86      0.89        44
       I-DOC       0.80      0.84      0.82       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.96      0.96      0.96      4273
           O       0.95      0.95      0.95     12945

    accuracy                           0.93     21349
   macro avg       0.77      0.74      0.75     21349
w

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:  87%|████████▋ | 13/15 [40:57<06:18, 189.27s/it]

Average train loss: 0.055569285193263596

Validation loss: 0.38382988423109055

              precision    recall  f1-score   support

      B-DATE       0.81      0.82      0.81       291
       B-DOC       0.93      0.82      0.87       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.80      0.77      0.78       282
       B-LOC       0.59      0.64      0.61        77
   B-ORDINAL       0.85      0.52      0.65        21
     B-PARTY       0.83      0.81      0.82      1994
   B-PERCENT       0.90      0.96      0.93        28
    B-PERSON       0.52      0.65      0.58        17
      B-ROLE       0.93      0.86      0.89        44
       I-DOC       0.90      0.82      0.86       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.96      0.96      0.96      4273
           O       0.95      0.96      0.95     12945

    accuracy                           0.93     21349
   macro avg       0.78      0.74      0.75     21349


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch:  93%|█████████▎| 14/15 [44:07<03:09, 189.33s/it]

Average train loss: 0.05339805579938884

Validation loss: 0.3924471413095792

              precision    recall  f1-score   support

      B-DATE       0.80      0.83      0.81       291
       B-DOC       0.93      0.82      0.87       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.76      0.78      0.77       282
       B-LOC       0.56      0.65      0.60        77
   B-ORDINAL       0.86      0.57      0.69        21
     B-PARTY       0.82      0.81      0.81      1994
   B-PERCENT       0.90      0.96      0.93        28
    B-PERSON       0.39      0.65      0.49        17
      B-ROLE       0.93      0.86      0.89        44
       I-DOC       0.90      0.81      0.85       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.96      0.96      0.96      4273
           O       0.95      0.96      0.95     12945

    accuracy                           0.93     21349
   macro avg       0.77      0.74      0.75     21349
we

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
Epoch: 100%|██████████| 15/15 [47:16<00:00, 189.10s/it]

Average train loss: 0.050092193492646535

Validation loss: 0.39353389851748943

              precision    recall  f1-score   support

      B-DATE       0.80      0.83      0.81       291
       B-DOC       0.91      0.82      0.87       610
   B-EFFDATE       1.00      0.71      0.83         7
       B-LAW       0.77      0.78      0.78       282
       B-LOC       0.56      0.65      0.60        77
   B-ORDINAL       0.86      0.57      0.69        21
     B-PARTY       0.82      0.81      0.81      1994
   B-PERCENT       0.90      0.96      0.93        28
    B-PERSON       0.38      0.65      0.48        17
      B-ROLE       0.90      0.86      0.88        44
       I-DOC       0.87      0.82      0.85       736
   I-EFFDATE       0.00      0.00      0.00        24
     I-PARTY       0.96      0.96      0.96      4273
           O       0.95      0.96      0.95     12945

    accuracy                           0.93     21349
   macro avg       0.76      0.74      0.75     21349



