In [1]:
#use GPU
import torch
torch.cuda.is_available()

True

Set flair

In [2]:
import flair

In [3]:
# for test
#https://github.com/flairNLP/flair/blob/master/resources/docs/TUTORIAL_2_TAGGING.md

from flair.data import Sentence
from flair.models import SequenceTagger

# load the NER tagger
tagger_de = SequenceTagger.load('de-ner-large')

text_de = "Erledigung des Prüfungsberichtes Der Stadtrechnungshof Wien unterzog die Gebarung der Vienna Film Commission GmbH in den Jahren 2015 bis 2017 einer Prüfung. Der diesbezügliche Bericht des Stadtrechnungshofes Wien wurde am 8. Oktober 2019 veröffentlicht und im Rahmen der Sitzung des Stadtrechnungshofausschusses vom 16. Oktober 2019, Ausschusszahl 63/19 mit Beschluss zur Kenntnis genommen."
# make a sentence
sentence_de = Sentence(text_de)

# run NER over sentence
tagger_de.predict(sentence_de)

# print the sentence with all annotations
print(sentence_de)

print('The following NER tags are found:')

# iterate over entities and print each
for entity in sentence_de.get_spans('ner'):
    print(entity)



Downloading:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

2022-10-11 13:08:46,020 loading file /root/.flair/models/ner-german-large/6b8de9edd73722050be2547acf64c037b2df833c6e8f0e88934de08385e26c1e.4b0797effcc6ebb1889d5d29784b97f0a099c1569b319d87d7c387e44e2bba48


Downloading:   0%|          | 0.00/616 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/9.10M [00:00<?, ?B/s]

2022-10-11 13:09:20,783 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, B-PER, E-PER, S-LOC, B-MISC, I-MISC, E-MISC, S-PER, B-ORG, E-ORG, S-ORG, I-ORG, B-LOC, E-LOC, S-MISC, I-PER, I-LOC, <START>, <STOP>
Sentence: "Erledigung des Prüfungsberichtes Der Stadtrechnungshof Wien unterzog die Gebarung der Vienna Film Commission GmbH in den Jahren 2015 bis 2017 einer Prüfung . Der diesbezügliche Bericht des Stadtrechnungshofes Wien wurde am 8. Oktober 2019 veröffentlicht und im Rahmen der Sitzung des Stadtrechnungshofausschusses vom 16. Oktober 2019 , Ausschusszahl 63 / 19 mit Beschluss zur Kenntnis genommen ." → ["Wien"/LOC, "Vienna Film Commission GmbH"/ORG, "Wien"/LOC]
The following NER tags are found:
Span[5:6]: "Wien" → LOC (1.0)
Span[10:14]: "Vienna Film Commission GmbH" → ORG (1.0)
Span[28:29]: "Wien" → LOC (1.0)


Reading Your Own Sequence Labeling Dataset

In [4]:
# 1. get the corpus
from flair.data import Corpus
from flair.datasets import ColumnCorpus

# define columns
columns = {0: 'text', 1: 'pos', 2: 'ner'}

# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus("", columns,
                              train_file='train_flair.txt',
                              test_file='test_flair.txt')
#                               dev_file = 'dev_flair.txt')

# 2. what label do we want to predict?
label_type = 'ner'

# 3. make the label dictionary from the corpus
label_dict = corpus.make_label_dictionary(label_type=label_type)
print(label_dict)

2022-10-11 13:10:29,449 Reading data from .
2022-10-11 13:10:29,461 Train: train_flair.txt
2022-10-11 13:10:29,463 Dev: None
2022-10-11 13:10:29,471 Test: test_flair.txt
2022-10-11 13:10:34,854 Computing label dictionary. Progress:


2334it [00:00, 46239.73it/s]

2022-10-11 13:10:34,916 Dictionary created for label 'ner' with 2 values: ORG (seen 2066 times)
Dictionary with 2 tags: <unk>, ORG





In [7]:
from flair.embeddings import WordEmbeddings, FlairEmbeddings, StackedEmbeddings
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

# 4. initialize embedding stack with Flair and GloVe
embedding_types = [
    WordEmbeddings('glove'),
    FlairEmbeddings('news-forward'),
    FlairEmbeddings('news-backward'),
]

embeddings = StackedEmbeddings(embeddings=embedding_types)

# 5. initialize sequence tagger
tagger = SequenceTagger(hidden_size=256,
                        embeddings=embeddings,
                        tag_dictionary=label_dict,
                        tag_type=label_type,
                        use_crf=True)

# 6. initialize trainer
trainer = ModelTrainer(tagger, corpus)

# 7. start training
trainer.train('resources/taggers/sota-ner-flair',
              learning_rate=0.1,
              mini_batch_size=16,
              max_epochs=10)

2022-10-11 13:18:27,433 SequenceTagger predicts: Dictionary with 5 tags: O, S-ORG, B-ORG, E-ORG, I-ORG
2022-10-11 13:18:27,694 ----------------------------------------------------------------------------------------------------
2022-10-11 13:18:27,697 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings(
      'glove'
      (embedding): Embedding(400001, 100)
    )
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.05, inplace=False)
        (encoder): Embedding(300, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=300, bias=True)
      )
    )
    (list_embedding_2): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.05, inplace=False)
        (encoder): Embedding(300, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=300, bias=True)
      )
    )
  )
  (word_dropout): WordDropout(p=0.05)
 

  "There should be no best model saved at epoch 1 except there "


2022-10-11 13:18:30,683 epoch 1 - iter 14/146 - loss 0.56249803 - samples/sec: 76.61 - lr: 0.100000
2022-10-11 13:18:33,328 epoch 1 - iter 28/146 - loss 0.37171176 - samples/sec: 84.80 - lr: 0.100000
2022-10-11 13:18:36,135 epoch 1 - iter 42/146 - loss 0.29607390 - samples/sec: 79.88 - lr: 0.100000
2022-10-11 13:18:39,035 epoch 1 - iter 56/146 - loss 0.24566340 - samples/sec: 77.38 - lr: 0.100000
2022-10-11 13:18:41,737 epoch 1 - iter 70/146 - loss 0.22201969 - samples/sec: 83.02 - lr: 0.100000
2022-10-11 13:18:44,792 epoch 1 - iter 84/146 - loss 0.20042550 - samples/sec: 73.39 - lr: 0.100000
2022-10-11 13:18:47,568 epoch 1 - iter 98/146 - loss 0.18506481 - samples/sec: 80.78 - lr: 0.100000
2022-10-11 13:18:50,366 epoch 1 - iter 112/146 - loss 0.17451829 - samples/sec: 80.17 - lr: 0.100000
2022-10-11 13:18:52,939 epoch 1 - iter 126/146 - loss 0.16286257 - samples/sec: 87.16 - lr: 0.100000
2022-10-11 13:18:55,467 epoch 1 - iter 140/146 - loss 0.15489250 - samples/sec: 88.70 - lr: 0.1000

100%|██████████| 17/17 [00:03<00:00,  4.98it/s]

2022-10-11 13:19:00,106 Evaluating as a multi-label problem: False
2022-10-11 13:19:00,120 DEV : loss 0.055510684847831726 - f1-score (micro avg)  0.5814
2022-10-11 13:19:00,187 BAD EPOCHS (no improvement): 0
2022-10-11 13:19:00,189 saving best model





2022-10-11 13:19:02,009 ----------------------------------------------------------------------------------------------------
2022-10-11 13:19:04,627 epoch 2 - iter 14/146 - loss 0.07529324 - samples/sec: 85.68 - lr: 0.100000
2022-10-11 13:19:08,152 epoch 2 - iter 28/146 - loss 0.08049998 - samples/sec: 63.60 - lr: 0.100000
2022-10-11 13:19:11,021 epoch 2 - iter 42/146 - loss 0.07924217 - samples/sec: 78.19 - lr: 0.100000
2022-10-11 13:19:13,754 epoch 2 - iter 56/146 - loss 0.07523777 - samples/sec: 82.09 - lr: 0.100000
2022-10-11 13:19:16,395 epoch 2 - iter 70/146 - loss 0.07145265 - samples/sec: 85.03 - lr: 0.100000
2022-10-11 13:19:19,355 epoch 2 - iter 84/146 - loss 0.07077404 - samples/sec: 75.79 - lr: 0.100000
2022-10-11 13:19:21,969 epoch 2 - iter 98/146 - loss 0.06894996 - samples/sec: 85.79 - lr: 0.100000
2022-10-11 13:19:24,672 epoch 2 - iter 112/146 - loss 0.06864009 - samples/sec: 82.97 - lr: 0.100000
2022-10-11 13:19:27,495 epoch 2 - iter 126/146 - loss 0.06811528 - samples

100%|██████████| 17/17 [00:03<00:00,  4.85it/s]

2022-10-11 13:19:34,768 Evaluating as a multi-label problem: False
2022-10-11 13:19:34,782 DEV : loss 0.03968806192278862 - f1-score (micro avg)  0.6226
2022-10-11 13:19:34,848 BAD EPOCHS (no improvement): 0
2022-10-11 13:19:34,850 saving best model





2022-10-11 13:19:36,720 ----------------------------------------------------------------------------------------------------
2022-10-11 13:19:39,308 epoch 3 - iter 14/146 - loss 0.05681850 - samples/sec: 86.78 - lr: 0.100000
2022-10-11 13:19:42,615 epoch 3 - iter 28/146 - loss 0.05649845 - samples/sec: 67.80 - lr: 0.100000
2022-10-11 13:19:45,178 epoch 3 - iter 42/146 - loss 0.05742400 - samples/sec: 87.51 - lr: 0.100000
2022-10-11 13:19:48,076 epoch 3 - iter 56/146 - loss 0.05680287 - samples/sec: 77.35 - lr: 0.100000
2022-10-11 13:19:50,883 epoch 3 - iter 70/146 - loss 0.05579273 - samples/sec: 79.88 - lr: 0.100000
2022-10-11 13:19:53,405 epoch 3 - iter 84/146 - loss 0.05666546 - samples/sec: 89.07 - lr: 0.100000
2022-10-11 13:19:56,250 epoch 3 - iter 98/146 - loss 0.05490161 - samples/sec: 78.84 - lr: 0.100000
2022-10-11 13:19:59,096 epoch 3 - iter 112/146 - loss 0.05493604 - samples/sec: 78.77 - lr: 0.100000
2022-10-11 13:20:02,044 epoch 3 - iter 126/146 - loss 0.05522619 - samples

100%|██████████| 17/17 [00:03<00:00,  4.95it/s]

2022-10-11 13:20:09,272 Evaluating as a multi-label problem: False
2022-10-11 13:20:09,284 DEV : loss 0.035438667982816696 - f1-score (micro avg)  0.6232
2022-10-11 13:20:09,347 BAD EPOCHS (no improvement): 0
2022-10-11 13:20:09,348 saving best model





2022-10-11 13:20:11,187 ----------------------------------------------------------------------------------------------------
2022-10-11 13:20:14,345 epoch 4 - iter 14/146 - loss 0.06262403 - samples/sec: 71.02 - lr: 0.100000
2022-10-11 13:20:16,846 epoch 4 - iter 28/146 - loss 0.05368603 - samples/sec: 89.74 - lr: 0.100000
2022-10-11 13:20:19,713 epoch 4 - iter 42/146 - loss 0.05375158 - samples/sec: 78.19 - lr: 0.100000
2022-10-11 13:20:22,275 epoch 4 - iter 56/146 - loss 0.05051161 - samples/sec: 87.52 - lr: 0.100000
2022-10-11 13:20:24,975 epoch 4 - iter 70/146 - loss 0.04955756 - samples/sec: 83.08 - lr: 0.100000
2022-10-11 13:20:27,712 epoch 4 - iter 84/146 - loss 0.04946368 - samples/sec: 81.90 - lr: 0.100000
2022-10-11 13:20:30,292 epoch 4 - iter 98/146 - loss 0.05028984 - samples/sec: 86.95 - lr: 0.100000
2022-10-11 13:20:33,174 epoch 4 - iter 112/146 - loss 0.04985094 - samples/sec: 77.79 - lr: 0.100000
2022-10-11 13:20:35,970 epoch 4 - iter 126/146 - loss 0.04935425 - samples

100%|██████████| 17/17 [00:03<00:00,  4.94it/s]

2022-10-11 13:20:43,459 Evaluating as a multi-label problem: False
2022-10-11 13:20:43,475 DEV : loss 0.03667806461453438 - f1-score (micro avg)  0.6136
2022-10-11 13:20:43,540 BAD EPOCHS (no improvement): 1
2022-10-11 13:20:43,542 ----------------------------------------------------------------------------------------------------





2022-10-11 13:20:46,285 epoch 5 - iter 14/146 - loss 0.04725937 - samples/sec: 81.80 - lr: 0.100000
2022-10-11 13:20:49,108 epoch 5 - iter 28/146 - loss 0.04620849 - samples/sec: 79.43 - lr: 0.100000
2022-10-11 13:20:51,713 epoch 5 - iter 42/146 - loss 0.04413384 - samples/sec: 86.09 - lr: 0.100000
2022-10-11 13:20:54,595 epoch 5 - iter 56/146 - loss 0.04618054 - samples/sec: 77.82 - lr: 0.100000
2022-10-11 13:20:57,753 epoch 5 - iter 70/146 - loss 0.04668612 - samples/sec: 70.99 - lr: 0.100000
2022-10-11 13:21:00,540 epoch 5 - iter 84/146 - loss 0.04748203 - samples/sec: 80.44 - lr: 0.100000
2022-10-11 13:21:03,250 epoch 5 - iter 98/146 - loss 0.04738605 - samples/sec: 82.75 - lr: 0.100000
2022-10-11 13:21:07,000 epoch 5 - iter 112/146 - loss 0.04656185 - samples/sec: 59.78 - lr: 0.100000
2022-10-11 13:21:09,684 epoch 5 - iter 126/146 - loss 0.04615415 - samples/sec: 83.59 - lr: 0.100000
2022-10-11 13:21:12,353 epoch 5 - iter 140/146 - loss 0.04531297 - samples/sec: 84.00 - lr: 0.1000

100%|██████████| 17/17 [00:03<00:00,  4.98it/s]

2022-10-11 13:21:17,072 Evaluating as a multi-label problem: False
2022-10-11 13:21:17,087 DEV : loss 0.02828693948686123 - f1-score (micro avg)  0.6494
2022-10-11 13:21:17,153 BAD EPOCHS (no improvement): 0
2022-10-11 13:21:17,154 saving best model





2022-10-11 13:21:19,113 ----------------------------------------------------------------------------------------------------
2022-10-11 13:21:21,791 epoch 6 - iter 14/146 - loss 0.03525628 - samples/sec: 83.83 - lr: 0.100000
2022-10-11 13:21:24,610 epoch 6 - iter 28/146 - loss 0.03637641 - samples/sec: 79.55 - lr: 0.100000
2022-10-11 13:21:27,351 epoch 6 - iter 42/146 - loss 0.03930715 - samples/sec: 81.86 - lr: 0.100000
2022-10-11 13:21:30,190 epoch 6 - iter 56/146 - loss 0.03889151 - samples/sec: 79.07 - lr: 0.100000
2022-10-11 13:21:32,988 epoch 6 - iter 70/146 - loss 0.04129425 - samples/sec: 80.14 - lr: 0.100000
2022-10-11 13:21:36,038 epoch 6 - iter 84/146 - loss 0.04266545 - samples/sec: 73.50 - lr: 0.100000
2022-10-11 13:21:38,539 epoch 6 - iter 98/146 - loss 0.04241009 - samples/sec: 89.71 - lr: 0.100000
2022-10-11 13:21:41,067 epoch 6 - iter 112/146 - loss 0.04182324 - samples/sec: 88.84 - lr: 0.100000
2022-10-11 13:21:43,813 epoch 6 - iter 126/146 - loss 0.04157452 - samples

100%|██████████| 17/17 [00:03<00:00,  5.01it/s]

2022-10-11 13:21:51,518 Evaluating as a multi-label problem: False
2022-10-11 13:21:51,533 DEV : loss 0.02700667642056942 - f1-score (micro avg)  0.648
2022-10-11 13:21:51,599 BAD EPOCHS (no improvement): 1
2022-10-11 13:21:51,601 ----------------------------------------------------------------------------------------------------





2022-10-11 13:21:54,285 epoch 7 - iter 14/146 - loss 0.03670209 - samples/sec: 83.61 - lr: 0.100000
2022-10-11 13:21:57,899 epoch 7 - iter 28/146 - loss 0.04770547 - samples/sec: 62.05 - lr: 0.100000
2022-10-11 13:22:00,767 epoch 7 - iter 42/146 - loss 0.04487721 - samples/sec: 78.18 - lr: 0.100000
2022-10-11 13:22:03,618 epoch 7 - iter 56/146 - loss 0.04347542 - samples/sec: 78.66 - lr: 0.100000
2022-10-11 13:22:06,394 epoch 7 - iter 70/146 - loss 0.04220254 - samples/sec: 80.93 - lr: 0.100000
2022-10-11 13:22:08,928 epoch 7 - iter 84/146 - loss 0.04109233 - samples/sec: 88.49 - lr: 0.100000
2022-10-11 13:22:11,740 epoch 7 - iter 98/146 - loss 0.04167064 - samples/sec: 79.74 - lr: 0.100000
2022-10-11 13:22:14,316 epoch 7 - iter 112/146 - loss 0.04150065 - samples/sec: 87.08 - lr: 0.100000
2022-10-11 13:22:16,752 epoch 7 - iter 126/146 - loss 0.04120344 - samples/sec: 92.07 - lr: 0.100000
2022-10-11 13:22:19,624 epoch 7 - iter 140/146 - loss 0.04182555 - samples/sec: 78.13 - lr: 0.1000

100%|██████████| 17/17 [00:03<00:00,  4.96it/s]

2022-10-11 13:22:24,268 Evaluating as a multi-label problem: False
2022-10-11 13:22:24,280 DEV : loss 0.028297198936343193 - f1-score (micro avg)  0.6651
2022-10-11 13:22:24,356 BAD EPOCHS (no improvement): 0
2022-10-11 13:22:24,359 saving best model





2022-10-11 13:22:26,220 ----------------------------------------------------------------------------------------------------
2022-10-11 13:22:29,607 epoch 8 - iter 14/146 - loss 0.04137154 - samples/sec: 66.38 - lr: 0.100000
2022-10-11 13:22:32,516 epoch 8 - iter 28/146 - loss 0.04116820 - samples/sec: 77.08 - lr: 0.100000
2022-10-11 13:22:35,210 epoch 8 - iter 42/146 - loss 0.04105112 - samples/sec: 83.22 - lr: 0.100000
2022-10-11 13:22:38,079 epoch 8 - iter 56/146 - loss 0.04078057 - samples/sec: 78.17 - lr: 0.100000
2022-10-11 13:22:40,870 epoch 8 - iter 70/146 - loss 0.04044259 - samples/sec: 80.36 - lr: 0.100000
2022-10-11 13:22:43,665 epoch 8 - iter 84/146 - loss 0.04037007 - samples/sec: 80.23 - lr: 0.100000
2022-10-11 13:22:46,366 epoch 8 - iter 98/146 - loss 0.03968468 - samples/sec: 83.15 - lr: 0.100000
2022-10-11 13:22:48,854 epoch 8 - iter 112/146 - loss 0.03979185 - samples/sec: 90.29 - lr: 0.100000
2022-10-11 13:22:51,550 epoch 8 - iter 126/146 - loss 0.03926638 - samples

100%|██████████| 17/17 [00:03<00:00,  4.93it/s]

2022-10-11 13:22:58,875 Evaluating as a multi-label problem: False
2022-10-11 13:22:58,890 DEV : loss 0.02517099492251873 - f1-score (micro avg)  0.7064
2022-10-11 13:22:58,960 BAD EPOCHS (no improvement): 0
2022-10-11 13:22:58,962 saving best model





2022-10-11 13:23:00,842 ----------------------------------------------------------------------------------------------------
2022-10-11 13:23:03,763 epoch 9 - iter 14/146 - loss 0.03648426 - samples/sec: 76.82 - lr: 0.100000
2022-10-11 13:23:06,162 epoch 9 - iter 28/146 - loss 0.03740705 - samples/sec: 93.55 - lr: 0.100000
2022-10-11 13:23:08,751 epoch 9 - iter 42/146 - loss 0.03502402 - samples/sec: 86.66 - lr: 0.100000
2022-10-11 13:23:11,173 epoch 9 - iter 56/146 - loss 0.03757736 - samples/sec: 92.61 - lr: 0.100000
2022-10-11 13:23:13,774 epoch 9 - iter 70/146 - loss 0.03635203 - samples/sec: 86.39 - lr: 0.100000
2022-10-11 13:23:16,834 epoch 9 - iter 84/146 - loss 0.03712737 - samples/sec: 73.28 - lr: 0.100000
2022-10-11 13:23:19,729 epoch 9 - iter 98/146 - loss 0.03660671 - samples/sec: 77.44 - lr: 0.100000
2022-10-11 13:23:22,579 epoch 9 - iter 112/146 - loss 0.03655153 - samples/sec: 78.66 - lr: 0.100000
2022-10-11 13:23:25,675 epoch 9 - iter 126/146 - loss 0.03770027 - samples

100%|██████████| 17/17 [00:03<00:00,  4.93it/s]

2022-10-11 13:23:33,364 Evaluating as a multi-label problem: False
2022-10-11 13:23:33,379 DEV : loss 0.023691877722740173 - f1-score (micro avg)  0.7594
2022-10-11 13:23:33,442 BAD EPOCHS (no improvement): 0
2022-10-11 13:23:33,443 saving best model





2022-10-11 13:23:35,314 ----------------------------------------------------------------------------------------------------
2022-10-11 13:23:38,721 epoch 10 - iter 14/146 - loss 0.04345506 - samples/sec: 65.86 - lr: 0.100000
2022-10-11 13:23:41,588 epoch 10 - iter 28/146 - loss 0.03748644 - samples/sec: 78.24 - lr: 0.100000
2022-10-11 13:23:45,408 epoch 10 - iter 42/146 - loss 0.03684744 - samples/sec: 58.71 - lr: 0.100000
2022-10-11 13:23:48,147 epoch 10 - iter 56/146 - loss 0.03603312 - samples/sec: 81.86 - lr: 0.100000
2022-10-11 13:23:50,515 epoch 10 - iter 70/146 - loss 0.03648455 - samples/sec: 94.70 - lr: 0.100000
2022-10-11 13:23:53,420 epoch 10 - iter 84/146 - loss 0.03503575 - samples/sec: 77.19 - lr: 0.100000
2022-10-11 13:23:56,201 epoch 10 - iter 98/146 - loss 0.03531599 - samples/sec: 80.77 - lr: 0.100000
2022-10-11 13:23:58,660 epoch 10 - iter 112/146 - loss 0.03561289 - samples/sec: 91.20 - lr: 0.100000
2022-10-11 13:24:01,518 epoch 10 - iter 126/146 - loss 0.03629372 

100%|██████████| 17/17 [00:03<00:00,  5.02it/s]

2022-10-11 13:24:08,753 Evaluating as a multi-label problem: False
2022-10-11 13:24:08,768 DEV : loss 0.022004157304763794 - f1-score (micro avg)  0.7433
2022-10-11 13:24:08,831 BAD EPOCHS (no improvement): 1





2022-10-11 13:24:10,681 ----------------------------------------------------------------------------------------------------
2022-10-11 13:24:10,684 loading file resources/taggers/sota-ner-flair/best-model.pt
2022-10-11 13:24:11,715 SequenceTagger predicts: Dictionary with 7 tags: O, S-ORG, B-ORG, E-ORG, I-ORG, <START>, <STOP>


100%|██████████| 70/70 [01:25<00:00,  1.22s/it]

2022-10-11 13:25:37,140 Evaluating as a multi-label problem: False
2022-10-11 13:25:37,159 0.7491	0.6941	0.7205	0.5632
2022-10-11 13:25:37,160 
Results:
- F-score (micro) 0.7205
- F-score (macro) 0.7205
- Accuracy 0.5632

By class:
              precision    recall  f1-score   support

         ORG     0.7491    0.6941    0.7205       925

   micro avg     0.7491    0.6941    0.7205       925
   macro avg     0.7491    0.6941    0.7205       925
weighted avg     0.7491    0.6941    0.7205       925

2022-10-11 13:25:37,163 ----------------------------------------------------------------------------------------------------





{'test_score': 0.7205387205387206,
 'dev_score_history': [0.5813953488372093,
  0.6226415094339622,
  0.6231884057971014,
  0.6136363636363636,
  0.6494117647058824,
  0.6480186480186481,
  0.6651053864168619,
  0.7064439140811457,
  0.7594339622641508,
  0.7432762836185819],
 'train_loss_history': [0.1527855202287295,
  0.06633509599347677,
  0.05386010580421053,
  0.04868079927052878,
  0.045621785198081195,
  0.04234102438319594,
  0.041796973833808364,
  0.03858111388641701,
  0.03751684271699241,
  0.03578229459732843],
 'dev_loss_history': [0.055510684847831726,
  0.03968806192278862,
  0.035438667982816696,
  0.03667806461453438,
  0.02828693948686123,
  0.02700667642056942,
  0.028297198936343193,
  0.02517099492251873,
  0.023691877722740173,
  0.022004157304763794]}