# Privacy Text Transformation on Verbmobil using Flair

In [1]:
import pandas as pd
import numpy as np

from flair.data import Sentence
from flair.models import SequenceTagger
from flair.data import Corpus
from flair.datasets import ColumnCorpus
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings
from typing import List
from flair.trainers import ModelTrainer
from flair.visual.training_curves import Plotter



In [2]:
# make a sentence
sentence = Sentence('I love Berlin. Mark is going there .')

# load the NER tagger
tagger = SequenceTagger.load('ner')

# run NER over sentence
tagger.predict(sentence)

2019-08-05 11:09:23,738 https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/models-v0.4/NER-conll03-english/en-ner-conll03-v0.4.pt not found in cache, downloading to /tmp/tmpiysxso35


100%|██████████| 432197603/432197603 [00:37<00:00, 11568959.51B/s]

2019-08-05 11:10:01,237 copying /tmp/tmpiysxso35 to cache at /nethome/didelani/.flair/models/en-ner-conll03-v0.4.pt





2019-08-05 11:10:38,464 removing temp file /tmp/tmpiysxso35
2019-08-05 11:10:38,508 loading file /nethome/didelani/.flair/models/en-ner-conll03-v0.4.pt


[Sentence: "I love Berlin. Mark is going there ." - 8 Tokens]

In [3]:
print(sentence)
print('The following NER tags are found:')

# iterate over entities and print
for entity in sentence.get_spans('ner'):
    print(entity)

Sentence: "I love Berlin. Mark is going there ." - 8 Tokens
The following NER tags are found:
PER-span [3,4]: "Berlin. Mark"


## Load training data

In [4]:
columns = {0: 'text', 1: 'ner'}

In [12]:
def train_ner(input_dir, output_dir):
    # this is the folder in which train, test and dev files reside
    data_folder = input_dir

    # init a corpus using column format, data folder and the names of the train, dev and test files
    corpus: Corpus = ColumnCorpus(data_folder, columns,
                                  train_file='train.tsv',
                                  test_file='test.tsv',
                                  dev_file='valid.tsv')

    print(len(corpus.train))
    print(corpus.train[1].to_tagged_string('ner'))

    # 1. get the corpus
    print(corpus)

    # 2. what tag do we want to predict?
    tag_type = 'ner'

    # 3. make the tag dictionary from the corpus
    tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
    print(tag_dictionary.idx2item)

    # 4. initialize embeddings
    embedding_types: List[TokenEmbeddings] = [

        WordEmbeddings('glove'),

        # comment in this line to use character embeddings
        # CharacterEmbeddings(),

        # comment in these lines to use flair embeddings
        # FlairEmbeddings('news-forward'),
        # FlairEmbeddings('news-backward'),
    ]

    embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

    # 5. initialize sequence tagger
    tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                            embeddings=embeddings,
                                            tag_dictionary=tag_dictionary,
                                            tag_type=tag_type,
                                            use_crf=True)

    # 6. initialize trainer

    trainer: ModelTrainer = ModelTrainer(tagger, corpus)

    # 7. start training
    trainer.train(output_dir,
                  learning_rate=0.1,
                  mini_batch_size=32,
                  max_epochs=80)


    # 8. plot training curves (optional)
    from flair.visual.training_curves import Plotter
    plotter = Plotter()
    plotter.plot_training_curves(output_dir+'loss.tsv')
    plotter.plot_weights(output_dir+'weights.txt')
    

### uncased model

In [14]:
train_ner(input_dir ='../data/bio_uncased/'  , output_dir = 'resources/taggers/uncased-ner/' )

2019-08-05 12:40:06,742 Reading data from ../data/bio_uncased
2019-08-05 12:40:06,747 Train: ../data/bio_uncased/train.tsv
2019-08-05 12:40:06,750 Dev: ../data/bio_uncased/valid.tsv
2019-08-05 12:40:06,753 Test: ../data/bio_uncased/test.tsv
9299
what is good for you
Corpus: 9299 train + 975 dev + 1043 test sentences
[b'<unk>', b'O', b'B-PER', b'B-DATE', b'I-DATE', b'B-TIME', b'I-TIME', b'B-LOC', b'I-LOC', b'B-ORG', b'I-ORG', b'I-PER', b'<START>', b'<STOP>']
2019-08-05 12:40:09,463 ----------------------------------------------------------------------------------------------------
2019-08-05 12:40:09,464 Evaluation method: MICRO_F1_SCORE
2019-08-05 12:40:09,651 ----------------------------------------------------------------------------------------------------
2019-08-05 12:40:10,034 epoch 1 - iter 0/291 - loss 40.72638702
2019-08-05 12:40:12,970 epoch 1 - iter 29/291 - loss 11.91876203
2019-08-05 12:40:15,729 epoch 1 - iter 58/291 - loss 9.34245343
2019-08-05 12:40:18,629 epoch 1 - ite

2019-08-05 12:45:54,126 epoch 7 - iter 174/291 - loss 1.42376271
2019-08-05 12:45:56,985 epoch 7 - iter 203/291 - loss 1.39802066
2019-08-05 12:46:01,077 epoch 7 - iter 232/291 - loss 1.38388640
2019-08-05 12:46:03,884 epoch 7 - iter 261/291 - loss 1.40206465
2019-08-05 12:46:06,798 epoch 7 - iter 290/291 - loss 1.39304294
2019-08-05 12:46:07,120 ----------------------------------------------------------------------------------------------------
2019-08-05 12:46:07,121 EPOCH 7 done: loss 1.3930 - lr 0.1000 - bad epochs 0
2019-08-05 12:46:10,403 DEV : loss 0.8708064556121826 - score 0.8071
2019-08-05 12:46:13,518 TEST : loss 0.8337541222572327 - score 0.8339
2019-08-05 12:46:31,293 ----------------------------------------------------------------------------------------------------
2019-08-05 12:46:31,697 epoch 8 - iter 0/291 - loss 1.31921887
2019-08-05 12:46:34,391 epoch 8 - iter 29/291 - loss 1.32728059
2019-08-05 12:46:37,317 epoch 8 - iter 58/291 - loss 1.35659544
2019-08-05 12:46:4

2019-08-05 12:51:04,101 epoch 14 - iter 145/291 - loss 1.11510730
2019-08-05 12:51:07,185 epoch 14 - iter 174/291 - loss 1.13041461
2019-08-05 12:51:10,011 epoch 14 - iter 203/291 - loss 1.13384280
2019-08-05 12:51:12,691 epoch 14 - iter 232/291 - loss 1.13932148
2019-08-05 12:51:15,668 epoch 14 - iter 261/291 - loss 1.16430717
2019-08-05 12:51:18,371 epoch 14 - iter 290/291 - loss 1.14610612
2019-08-05 12:51:18,708 ----------------------------------------------------------------------------------------------------
2019-08-05 12:51:18,708 EPOCH 14 done: loss 1.1461 - lr 0.1000 - bad epochs 1
2019-08-05 12:51:22,166 DEV : loss 0.6808368563652039 - score 0.8491
2019-08-05 12:51:25,358 TEST : loss 0.6692715287208557 - score 0.8557
2019-08-05 12:51:25,361 ----------------------------------------------------------------------------------------------------
2019-08-05 12:51:25,745 epoch 15 - iter 0/291 - loss 1.40806210
2019-08-05 12:51:30,105 epoch 15 - iter 29/291 - loss 1.15189580
2019-08-

2019-08-05 12:56:12,797 epoch 21 - iter 116/291 - loss 1.01889960
2019-08-05 12:56:15,874 epoch 21 - iter 145/291 - loss 0.99685554
2019-08-05 12:56:18,868 epoch 21 - iter 174/291 - loss 0.99814535
2019-08-05 12:56:21,877 epoch 21 - iter 203/291 - loss 1.00176743
2019-08-05 12:56:24,670 epoch 21 - iter 232/291 - loss 1.00141593
2019-08-05 12:56:27,465 epoch 21 - iter 261/291 - loss 0.99837195
2019-08-05 12:56:31,491 epoch 21 - iter 290/291 - loss 1.00178910
2019-08-05 12:56:31,801 ----------------------------------------------------------------------------------------------------
2019-08-05 12:56:31,802 EPOCH 21 done: loss 1.0018 - lr 0.1000 - bad epochs 0
2019-08-05 12:56:35,102 DEV : loss 0.5818219184875488 - score 0.8656
2019-08-05 12:56:38,300 TEST : loss 0.5748113989830017 - score 0.8769
2019-08-05 12:56:38,303 ----------------------------------------------------------------------------------------------------
2019-08-05 12:56:38,701 epoch 22 - iter 0/291 - loss 0.77629995
2019-08

2019-08-05 13:00:47,440 epoch 28 - iter 58/291 - loss 0.86441124
2019-08-05 13:00:50,318 epoch 28 - iter 87/291 - loss 0.88906935
2019-08-05 13:00:53,090 epoch 28 - iter 116/291 - loss 0.89194791
2019-08-05 13:00:55,945 epoch 28 - iter 145/291 - loss 0.89247109
2019-08-05 13:00:59,997 epoch 28 - iter 174/291 - loss 0.90582490
2019-08-05 13:01:02,956 epoch 28 - iter 203/291 - loss 0.90813097
2019-08-05 13:01:05,820 epoch 28 - iter 232/291 - loss 0.89402375
2019-08-05 13:01:09,033 epoch 28 - iter 261/291 - loss 0.91331203
2019-08-05 13:01:12,074 epoch 28 - iter 290/291 - loss 0.90829028
2019-08-05 13:01:12,409 ----------------------------------------------------------------------------------------------------
2019-08-05 13:01:12,410 EPOCH 28 done: loss 0.9083 - lr 0.0500 - bad epochs 1
2019-08-05 13:01:15,987 DEV : loss 0.5567962527275085 - score 0.873
2019-08-05 13:01:19,281 TEST : loss 0.5340730547904968 - score 0.8826
2019-08-05 13:01:37,510 -------------------------------------------

2019-08-05 13:06:52,152 epoch 35 - iter 29/291 - loss 0.94821394
2019-08-05 13:06:56,105 epoch 35 - iter 58/291 - loss 0.88510495
2019-08-05 13:06:58,902 epoch 35 - iter 87/291 - loss 0.86692917
2019-08-05 13:07:01,662 epoch 35 - iter 116/291 - loss 0.85170616
2019-08-05 13:07:04,566 epoch 35 - iter 145/291 - loss 0.83623268
2019-08-05 13:07:07,338 epoch 35 - iter 174/291 - loss 0.83794498
2019-08-05 13:07:10,320 epoch 35 - iter 203/291 - loss 0.86796391
2019-08-05 13:07:13,169 epoch 35 - iter 232/291 - loss 0.87699873
2019-08-05 13:07:16,001 epoch 35 - iter 261/291 - loss 0.87464506
2019-08-05 13:07:18,904 epoch 35 - iter 290/291 - loss 0.88122491
2019-08-05 13:07:19,212 ----------------------------------------------------------------------------------------------------
2019-08-05 13:07:19,213 EPOCH 35 done: loss 0.8812 - lr 0.0500 - bad epochs 0
2019-08-05 13:07:22,531 DEV : loss 0.538253903388977 - score 0.8774
2019-08-05 13:07:25,646 TEST : loss 0.5256217122077942 - score 0.8801
20

2019-08-05 13:11:27,552 epoch 42 - iter 0/291 - loss 1.03772521
2019-08-05 13:11:30,290 epoch 42 - iter 29/291 - loss 0.76659187
2019-08-05 13:11:33,048 epoch 42 - iter 58/291 - loss 0.82237575
2019-08-05 13:11:35,943 epoch 42 - iter 87/291 - loss 0.84970517
2019-08-05 13:11:38,993 epoch 42 - iter 116/291 - loss 0.86929868
2019-08-05 13:11:42,016 epoch 42 - iter 145/291 - loss 0.85055581
2019-08-05 13:11:44,804 epoch 42 - iter 174/291 - loss 0.84888931
2019-08-05 13:11:47,850 epoch 42 - iter 203/291 - loss 0.85975133
2019-08-05 13:11:51,860 epoch 42 - iter 232/291 - loss 0.85765994
2019-08-05 13:11:54,675 epoch 42 - iter 261/291 - loss 0.86818775
2019-08-05 13:11:57,542 epoch 42 - iter 290/291 - loss 0.86349697
2019-08-05 13:11:57,858 ----------------------------------------------------------------------------------------------------
2019-08-05 13:11:57,859 EPOCH 42 done: loss 0.8635 - lr 0.0250 - bad epochs 1
2019-08-05 13:12:01,141 DEV : loss 0.5215002298355103 - score 0.8764
2019-08

2019-08-05 13:16:03,883 ----------------------------------------------------------------------------------------------------
2019-08-05 13:16:04,277 epoch 49 - iter 0/291 - loss 0.55129969
2019-08-05 13:16:07,335 epoch 49 - iter 29/291 - loss 0.82257584
2019-08-05 13:16:10,548 epoch 49 - iter 58/291 - loss 0.86950860
2019-08-05 13:16:14,926 epoch 49 - iter 87/291 - loss 0.86122101
2019-08-05 13:16:17,918 epoch 49 - iter 116/291 - loss 0.87686786
2019-08-05 13:16:20,583 epoch 49 - iter 145/291 - loss 0.85280795
2019-08-05 13:16:23,224 epoch 49 - iter 174/291 - loss 0.84348589
2019-08-05 13:16:25,947 epoch 49 - iter 203/291 - loss 0.84015369
2019-08-05 13:16:28,576 epoch 49 - iter 232/291 - loss 0.82414375
2019-08-05 13:16:31,205 epoch 49 - iter 261/291 - loss 0.82481044
2019-08-05 13:16:33,935 epoch 49 - iter 290/291 - loss 0.84208787
2019-08-05 13:16:34,274 ----------------------------------------------------------------------------------------------------
2019-08-05 13:16:34,275 EPOCH

2019-08-05 13:21:17,086 TEST : loss 0.4920291602611542 - score 0.8872
2019-08-05 13:21:17,089 ----------------------------------------------------------------------------------------------------
2019-08-05 13:21:17,491 epoch 56 - iter 0/291 - loss 1.27809930
2019-08-05 13:21:20,361 epoch 56 - iter 29/291 - loss 0.75234582
2019-08-05 13:21:23,195 epoch 56 - iter 58/291 - loss 0.79222089
2019-08-05 13:21:25,812 epoch 56 - iter 87/291 - loss 0.80523493
2019-08-05 13:21:28,573 epoch 56 - iter 116/291 - loss 0.81486004
2019-08-05 13:21:31,575 epoch 56 - iter 145/291 - loss 0.82335293
2019-08-05 13:21:34,596 epoch 56 - iter 174/291 - loss 0.83162108
2019-08-05 13:21:37,462 epoch 56 - iter 203/291 - loss 0.82331072
2019-08-05 13:21:40,193 epoch 56 - iter 232/291 - loss 0.82076981
2019-08-05 13:21:43,013 epoch 56 - iter 261/291 - loss 0.82937348
2019-08-05 13:21:45,789 epoch 56 - iter 290/291 - loss 0.81431384
2019-08-05 13:21:46,110 ------------------------------------------------------------

2019-08-05 13:25:34,934 EPOCH 62 done: loss 0.8357 - lr 0.0063 - bad epochs 3
2019-08-05 13:25:38,437 DEV : loss 0.5090065598487854 - score 0.8851
2019-08-05 13:25:41,937 TEST : loss 0.4880102574825287 - score 0.8896
2019-08-05 13:25:59,979 ----------------------------------------------------------------------------------------------------
2019-08-05 13:26:00,408 epoch 63 - iter 0/291 - loss 1.28772819
2019-08-05 13:26:03,535 epoch 63 - iter 29/291 - loss 0.97331915
2019-08-05 13:26:06,398 epoch 63 - iter 58/291 - loss 0.86105998
2019-08-05 13:26:09,326 epoch 63 - iter 87/291 - loss 0.83740593
2019-08-05 13:26:12,585 epoch 63 - iter 116/291 - loss 0.83161169
2019-08-05 13:26:15,395 epoch 63 - iter 145/291 - loss 0.81652867
2019-08-05 13:26:19,479 epoch 63 - iter 174/291 - loss 0.80052361
2019-08-05 13:26:23,442 epoch 63 - iter 203/291 - loss 0.78997290
2019-08-05 13:26:34,927 epoch 63 - iter 232/291 - loss 0.78630801
2019-08-05 13:26:38,921 epoch 63 - iter 261/291 - loss 0.79219431
201

2019-08-05 13:30:41,740 ----------------------------------------------------------------------------------------------------
2019-08-05 13:30:41,741 EPOCH 69 done: loss 0.7946 - lr 0.0031 - bad epochs 2
2019-08-05 13:30:45,151 DEV : loss 0.5047354102134705 - score 0.8847
2019-08-05 13:30:48,373 TEST : loss 0.48548585176467896 - score 0.8886
2019-08-05 13:30:48,375 ----------------------------------------------------------------------------------------------------
2019-08-05 13:30:48,800 epoch 70 - iter 0/291 - loss 0.92592114
2019-08-05 13:30:51,668 epoch 70 - iter 29/291 - loss 0.80386215
2019-08-05 13:30:54,458 epoch 70 - iter 58/291 - loss 0.73816009
2019-08-05 13:30:57,374 epoch 70 - iter 87/291 - loss 0.74835635
2019-08-05 13:31:00,464 epoch 70 - iter 116/291 - loss 0.75838881
2019-08-05 13:31:03,395 epoch 70 - iter 145/291 - loss 0.75968670
2019-08-05 13:31:06,355 epoch 70 - iter 174/291 - loss 0.79549098
2019-08-05 13:31:09,223 epoch 70 - iter 203/291 - loss 0.80485435
2019-08-0

2019-08-05 13:35:23,120 epoch 76 - iter 232/291 - loss 0.79264183
2019-08-05 13:35:25,810 epoch 76 - iter 261/291 - loss 0.78890226
2019-08-05 13:35:28,522 epoch 76 - iter 290/291 - loss 0.79488329
2019-08-05 13:35:28,854 ----------------------------------------------------------------------------------------------------
2019-08-05 13:35:28,855 EPOCH 76 done: loss 0.7949 - lr 0.0008 - bad epochs 0
2019-08-05 13:35:32,284 DEV : loss 0.5050367116928101 - score 0.8852
2019-08-05 13:35:35,545 TEST : loss 0.48480892181396484 - score 0.8877
2019-08-05 13:35:35,547 ----------------------------------------------------------------------------------------------------
2019-08-05 13:35:35,936 epoch 77 - iter 0/291 - loss 1.15083206
2019-08-05 13:35:38,627 epoch 77 - iter 29/291 - loss 0.79807291
2019-08-05 13:35:41,644 epoch 77 - iter 58/291 - loss 0.76897033
2019-08-05 13:35:44,593 epoch 77 - iter 87/291 - loss 0.77060526
2019-08-05 13:35:47,322 epoch 77 - iter 116/291 - loss 0.76792667
2019-08-0

### cased model

In [16]:
train_ner(input_dir ='../data/bio_cased/'  , output_dir = 'resources/taggers/cased-ner/' )

2019-08-05 14:11:02,518 Reading data from ../data/bio_cased
2019-08-05 14:11:02,524 Train: ../data/bio_cased/train.tsv
2019-08-05 14:11:02,527 Dev: ../data/bio_cased/valid.tsv
2019-08-05 14:11:02,529 Test: ../data/bio_cased/test.tsv
9299
what is good for you
Corpus: 9299 train + 975 dev + 1043 test sentences
[b'<unk>', b'O', b'B-PER', b'B-DATE', b'I-DATE', b'B-TIME', b'I-TIME', b'B-LOC', b'I-LOC', b'B-ORG', b'I-ORG', b'I-PER', b'<START>', b'<STOP>']
2019-08-05 14:11:04,774 ----------------------------------------------------------------------------------------------------
2019-08-05 14:11:04,775 Evaluation method: MICRO_F1_SCORE
2019-08-05 14:11:05,020 ----------------------------------------------------------------------------------------------------
2019-08-05 14:11:05,493 epoch 1 - iter 0/291 - loss 19.21309090
2019-08-05 14:11:08,366 epoch 1 - iter 29/291 - loss 9.47932544
2019-08-05 14:11:11,302 epoch 1 - iter 58/291 - loss 8.19926015
2019-08-05 14:11:15,347 epoch 1 - iter 87/291 

2019-08-05 14:16:53,470 epoch 7 - iter 174/291 - loss 1.39982241
2019-08-05 14:16:56,369 epoch 7 - iter 203/291 - loss 1.38259710
2019-08-05 14:16:59,158 epoch 7 - iter 232/291 - loss 1.38805859
2019-08-05 14:17:01,907 epoch 7 - iter 261/291 - loss 1.39395225
2019-08-05 14:17:04,803 epoch 7 - iter 290/291 - loss 1.38436395
2019-08-05 14:17:05,130 ----------------------------------------------------------------------------------------------------
2019-08-05 14:17:05,130 EPOCH 7 done: loss 1.3844 - lr 0.1000 - bad epochs 0
2019-08-05 14:17:08,498 DEV : loss 0.8184205889701843 - score 0.8163
2019-08-05 14:17:13,368 TEST : loss 0.7835094928741455 - score 0.8396
2019-08-05 14:17:31,033 ----------------------------------------------------------------------------------------------------
2019-08-05 14:17:31,534 epoch 8 - iter 0/291 - loss 2.88907480
2019-08-05 14:17:34,328 epoch 8 - iter 29/291 - loss 1.31224082
2019-08-05 14:17:37,262 epoch 8 - iter 58/291 - loss 1.35468979
2019-08-05 14:17:4

2019-08-05 14:22:20,660 epoch 14 - iter 145/291 - loss 1.14505372
2019-08-05 14:22:23,441 epoch 14 - iter 174/291 - loss 1.14193144
2019-08-05 14:22:27,888 epoch 14 - iter 203/291 - loss 1.12569671
2019-08-05 14:22:30,776 epoch 14 - iter 232/291 - loss 1.13405819
2019-08-05 14:22:33,555 epoch 14 - iter 261/291 - loss 1.13617253
2019-08-05 14:22:36,278 epoch 14 - iter 290/291 - loss 1.14605968
2019-08-05 14:22:36,613 ----------------------------------------------------------------------------------------------------
2019-08-05 14:22:36,614 EPOCH 14 done: loss 1.1461 - lr 0.1000 - bad epochs 1
2019-08-05 14:22:39,926 DEV : loss 0.6762326955795288 - score 0.8529
2019-08-05 14:22:43,102 TEST : loss 0.6339519023895264 - score 0.8682
2019-08-05 14:23:00,963 ----------------------------------------------------------------------------------------------------
2019-08-05 14:23:01,425 epoch 15 - iter 0/291 - loss 0.28596130
2019-08-05 14:23:04,459 epoch 15 - iter 29/291 - loss 1.06502988
2019-08-

2019-08-05 14:27:31,086 epoch 21 - iter 116/291 - loss 0.97634348
2019-08-05 14:27:35,658 epoch 21 - iter 145/291 - loss 0.99460011
2019-08-05 14:27:38,591 epoch 21 - iter 174/291 - loss 0.98149467
2019-08-05 14:27:41,415 epoch 21 - iter 203/291 - loss 0.99220699
2019-08-05 14:27:44,282 epoch 21 - iter 232/291 - loss 0.98795199
2019-08-05 14:27:47,131 epoch 21 - iter 261/291 - loss 1.00456613
2019-08-05 14:27:49,981 epoch 21 - iter 290/291 - loss 1.00939764
2019-08-05 14:27:50,314 ----------------------------------------------------------------------------------------------------
2019-08-05 14:27:50,314 EPOCH 21 done: loss 1.0094 - lr 0.1000 - bad epochs 0
2019-08-05 14:27:53,746 DEV : loss 0.6197673082351685 - score 0.8658
2019-08-05 14:27:56,992 TEST : loss 0.5722768902778625 - score 0.877
2019-08-05 14:28:14,812 ----------------------------------------------------------------------------------------------------
2019-08-05 14:28:15,291 epoch 22 - iter 0/291 - loss 1.42341757
2019-08-

2019-08-05 14:32:23,344 epoch 28 - iter 58/291 - loss 0.91580488
2019-08-05 14:32:26,137 epoch 28 - iter 87/291 - loss 0.91938677
2019-08-05 14:32:28,978 epoch 28 - iter 116/291 - loss 0.93941872
2019-08-05 14:32:31,715 epoch 28 - iter 145/291 - loss 0.90073164
2019-08-05 14:32:34,621 epoch 28 - iter 174/291 - loss 0.91387984
2019-08-05 14:32:39,129 epoch 28 - iter 203/291 - loss 0.92751847
2019-08-05 14:32:41,974 epoch 28 - iter 232/291 - loss 0.93443004
2019-08-05 14:32:44,832 epoch 28 - iter 261/291 - loss 0.92993771
2019-08-05 14:32:47,556 epoch 28 - iter 290/291 - loss 0.92533102
2019-08-05 14:32:47,880 ----------------------------------------------------------------------------------------------------
2019-08-05 14:32:47,881 EPOCH 28 done: loss 0.9253 - lr 0.0500 - bad epochs 1
2019-08-05 14:32:51,237 DEV : loss 0.5586217641830444 - score 0.8755
2019-08-05 14:32:54,413 TEST : loss 0.5308376550674438 - score 0.8801
2019-08-05 14:33:12,157 ------------------------------------------

2019-08-05 14:37:15,231 epoch 35 - iter 29/291 - loss 0.78042497
2019-08-05 14:37:17,986 epoch 35 - iter 58/291 - loss 0.83002253
2019-08-05 14:37:22,500 epoch 35 - iter 87/291 - loss 0.86874995
2019-08-05 14:37:25,381 epoch 35 - iter 116/291 - loss 0.89296549
2019-08-05 14:37:28,326 epoch 35 - iter 145/291 - loss 0.89814283
2019-08-05 14:37:31,149 epoch 35 - iter 174/291 - loss 0.87711781
2019-08-05 14:37:33,990 epoch 35 - iter 203/291 - loss 0.88658138
2019-08-05 14:37:36,827 epoch 35 - iter 232/291 - loss 0.88999673
2019-08-05 14:37:39,517 epoch 35 - iter 261/291 - loss 0.87912166
2019-08-05 14:37:42,318 epoch 35 - iter 290/291 - loss 0.88717646
2019-08-05 14:37:42,639 ----------------------------------------------------------------------------------------------------
2019-08-05 14:37:42,640 EPOCH 35 done: loss 0.8872 - lr 0.0500 - bad epochs 2
2019-08-05 14:37:45,965 DEV : loss 0.5526865124702454 - score 0.8801
2019-08-05 14:37:49,327 TEST : loss 0.5179744362831116 - score 0.8846
2

2019-08-05 14:42:09,539 epoch 42 - iter 0/291 - loss 1.13410318
2019-08-05 14:42:12,375 epoch 42 - iter 29/291 - loss 0.86795329
2019-08-05 14:42:15,310 epoch 42 - iter 58/291 - loss 0.88948927
2019-08-05 14:42:18,292 epoch 42 - iter 87/291 - loss 0.84353766
2019-08-05 14:42:21,055 epoch 42 - iter 116/291 - loss 0.84554197
2019-08-05 14:42:23,880 epoch 42 - iter 145/291 - loss 0.85013108
2019-08-05 14:42:26,841 epoch 42 - iter 174/291 - loss 0.84911249
2019-08-05 14:42:29,717 epoch 42 - iter 203/291 - loss 0.84710115
2019-08-05 14:42:32,748 epoch 42 - iter 232/291 - loss 0.84149223
2019-08-05 14:42:35,533 epoch 42 - iter 261/291 - loss 0.84603094
2019-08-05 14:42:38,268 epoch 42 - iter 290/291 - loss 0.84785371
2019-08-05 14:42:38,587 ----------------------------------------------------------------------------------------------------
2019-08-05 14:42:38,588 EPOCH 42 done: loss 0.8479 - lr 0.0250 - bad epochs 1
2019-08-05 14:42:42,063 DEV : loss 0.521311342716217 - score 0.8784
2019-08-

2019-08-05 14:46:48,161 ----------------------------------------------------------------------------------------------------
2019-08-05 14:46:48,633 epoch 49 - iter 0/291 - loss 1.32801867
2019-08-05 14:46:51,311 epoch 49 - iter 29/291 - loss 0.73803388
2019-08-05 14:46:54,347 epoch 49 - iter 58/291 - loss 0.75739723
2019-08-05 14:46:57,090 epoch 49 - iter 87/291 - loss 0.74579454
2019-08-05 14:46:59,901 epoch 49 - iter 116/291 - loss 0.73401123
2019-08-05 14:47:02,688 epoch 49 - iter 145/291 - loss 0.75604614
2019-08-05 14:47:05,528 epoch 49 - iter 174/291 - loss 0.77412794
2019-08-05 14:47:08,392 epoch 49 - iter 203/291 - loss 0.77617710
2019-08-05 14:47:11,246 epoch 49 - iter 232/291 - loss 0.77281547
2019-08-05 14:47:14,047 epoch 49 - iter 261/291 - loss 0.79158677
2019-08-05 14:47:16,844 epoch 49 - iter 290/291 - loss 0.79852883
2019-08-05 14:47:17,168 ----------------------------------------------------------------------------------------------------
2019-08-05 14:47:17,169 EPOCH

2019-08-05 14:51:44,415 TEST : loss 0.5013455748558044 - score 0.8855
2019-08-05 14:51:44,418 ----------------------------------------------------------------------------------------------------
2019-08-05 14:51:44,860 epoch 56 - iter 0/291 - loss 0.52007651
2019-08-05 14:51:47,849 epoch 56 - iter 29/291 - loss 0.88509523
2019-08-05 14:51:50,668 epoch 56 - iter 58/291 - loss 0.81039937
2019-08-05 14:51:53,625 epoch 56 - iter 87/291 - loss 0.79038299
2019-08-05 14:51:56,719 epoch 56 - iter 116/291 - loss 0.79527392
2019-08-05 14:51:59,571 epoch 56 - iter 145/291 - loss 0.82317102
2019-08-05 14:52:02,410 epoch 56 - iter 174/291 - loss 0.81932568
2019-08-05 14:52:06,835 epoch 56 - iter 203/291 - loss 0.81289968
2019-08-05 14:52:09,443 epoch 56 - iter 232/291 - loss 0.81737862
2019-08-05 14:52:12,409 epoch 56 - iter 261/291 - loss 0.82152731
2019-08-05 14:52:15,063 epoch 56 - iter 290/291 - loss 0.83139624
2019-08-05 14:52:15,402 ------------------------------------------------------------

2019-08-05 14:56:15,012 EPOCH 62 done: loss 0.8222 - lr 0.0031 - bad epochs 0
2019-08-05 14:56:20,114 DEV : loss 0.503943681716919 - score 0.8831
2019-08-05 14:56:23,327 TEST : loss 0.49009132385253906 - score 0.8863
2019-08-05 14:56:23,330 ----------------------------------------------------------------------------------------------------
2019-08-05 14:56:23,798 epoch 63 - iter 0/291 - loss 0.92227614
2019-08-05 14:56:26,643 epoch 63 - iter 29/291 - loss 0.78953132
2019-08-05 14:56:29,749 epoch 63 - iter 58/291 - loss 0.78382208
2019-08-05 14:56:32,735 epoch 63 - iter 87/291 - loss 0.75841438
2019-08-05 14:56:35,443 epoch 63 - iter 116/291 - loss 0.76251086
2019-08-05 14:56:38,147 epoch 63 - iter 145/291 - loss 0.77667066
2019-08-05 14:56:41,062 epoch 63 - iter 174/291 - loss 0.78302119
2019-08-05 14:56:43,836 epoch 63 - iter 203/291 - loss 0.78345276
2019-08-05 14:56:46,769 epoch 63 - iter 232/291 - loss 0.78549390
2019-08-05 14:56:49,638 epoch 63 - iter 261/291 - loss 0.79194086
201

2019-08-05 15:00:56,607 EPOCH 69 done: loss 0.8221 - lr 0.0031 - bad epochs 3
2019-08-05 15:00:59,952 DEV : loss 0.5081146359443665 - score 0.8832
2019-08-05 15:01:03,125 TEST : loss 0.49080127477645874 - score 0.8849
Epoch    68: reducing learning rate of group 0 to 1.5625e-03.
2019-08-05 15:01:03,127 ----------------------------------------------------------------------------------------------------
2019-08-05 15:01:03,586 epoch 70 - iter 0/291 - loss 0.75532293
2019-08-05 15:01:06,389 epoch 70 - iter 29/291 - loss 0.80840824
2019-08-05 15:01:09,389 epoch 70 - iter 58/291 - loss 0.79907450
2019-08-05 15:01:12,031 epoch 70 - iter 87/291 - loss 0.78128427
2019-08-05 15:01:14,964 epoch 70 - iter 116/291 - loss 0.81535849
2019-08-05 15:01:17,626 epoch 70 - iter 145/291 - loss 0.81218299
2019-08-05 15:01:20,477 epoch 70 - iter 174/291 - loss 0.81368988
2019-08-05 15:01:23,563 epoch 70 - iter 203/291 - loss 0.82588663
2019-08-05 15:01:26,411 epoch 70 - iter 232/291 - loss 0.81716921
2019-0

2019-08-05 15:05:16,287 epoch 76 - iter 290/291 - loss 0.78310180
2019-08-05 15:05:16,669 ----------------------------------------------------------------------------------------------------
2019-08-05 15:05:16,670 EPOCH 76 done: loss 0.7831 - lr 0.0008 - bad epochs 2
2019-08-05 15:05:20,197 DEV : loss 0.50480717420578 - score 0.8831
2019-08-05 15:05:23,451 TEST : loss 0.4889009892940521 - score 0.8871
2019-08-05 15:05:23,453 ----------------------------------------------------------------------------------------------------
2019-08-05 15:05:23,937 epoch 77 - iter 0/291 - loss 0.50614524
2019-08-05 15:05:26,861 epoch 77 - iter 29/291 - loss 0.80630461
2019-08-05 15:05:29,766 epoch 77 - iter 58/291 - loss 0.75860569
2019-08-05 15:05:32,599 epoch 77 - iter 87/291 - loss 0.78756485
2019-08-05 15:05:35,399 epoch 77 - iter 116/291 - loss 0.78309023
2019-08-05 15:05:38,269 epoch 77 - iter 145/291 - loss 0.79104171
2019-08-05 15:05:41,101 epoch 77 - iter 174/291 - loss 0.79486935
2019-08-05 1

FileNotFoundError: [Errno 2] No such file or directory: 'resources/taggers/cased-nerloss.tsv'

In [18]:
# load the model you trained
model = SequenceTagger.load('resources/taggers/uncased-ner/final-model.pt')

# create example sentence
sentence = Sentence('I love Berlin')

# predict tags and print
model.predict(sentence)

print(sentence.to_tagged_string())

2019-08-05 16:06:34,823 loading file resources/taggers/uncased-ner/final-model.pt
I love Berlin <B-LOC>


In [34]:
sentence = Sentence('MIMVMX_q002nxx0_022_130020_VM1_06 i can"t come on friday the twenty first the weekend is free ')

# predict tags and print
model.predict(sentence)

print(sentence.to_tagged_string())

MIMVMX_q002nxx0_022_130020_VM1_06 i can"t come on friday <B-DATE> the <I-DATE> twenty <I-DATE> first <I-DATE> the <I-DATE> weekend <I-DATE> is free


[Sentence: "well i would like very much not to have to wait until walked to do you have any time free between now and the end of this month" - 28 Tokens]

In [29]:
 # this is the folder in which train, test and dev files reside
data_folder = '../data/bio_uncased/'

# init a corpus using column format, data folder and the names of the train, dev and test files
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='train.tsv',
                              test_file='test.tsv',
                              dev_file='valid.tsv')
base_path = 'resources/taggers/uncased-ner/'

2019-08-05 16:34:11,764 Reading data from ../data/bio_uncased
2019-08-05 16:34:11,770 Train: ../data/bio_uncased/train.tsv
2019-08-05 16:34:11,773 Dev: ../data/bio_uncased/valid.tsv
2019-08-05 16:34:11,776 Test: ../data/bio_uncased/test.tsv


In [33]:
from flair.training_utils import Result
test_results, test_loss = model.evaluate(
            corpus.test,
            out_path=base_path +"test_.tsv",
        )

test_results: Result = test_results
print(test_results.log_line)
print(test_results.detailed_results)


0.8876	0.8924	0.89

MICRO_AVG: acc 0.8018 - f1-score 0.89
MACRO_AVG: acc 0.8658 - f1-score 0.9258200000000001
DATE       tp: 515 - fp: 51 - fn: 66 - tn: 515 - precision: 0.9099 - recall: 0.8864 - accuracy: 0.8149 - f1-score: 0.8980
LOC        tp: 54 - fp: 4 - fn: 6 - tn: 54 - precision: 0.9310 - recall: 0.9000 - accuracy: 0.8438 - f1-score: 0.9152
ORG        tp: 25 - fp: 0 - fn: 0 - tn: 25 - precision: 1.0000 - recall: 1.0000 - accuracy: 1.0000 - f1-score: 1.0000
PER        tp: 57 - fp: 4 - fn: 1 - tn: 57 - precision: 0.9344 - recall: 0.9828 - accuracy: 0.9194 - f1-score: 0.9580
TIME       tp: 344 - fp: 67 - fn: 47 - tn: 344 - precision: 0.8370 - recall: 0.8798 - accuracy: 0.7511 - f1-score: 0.8579


In [25]:
rr[1]

tensor(0.4846, device='cuda:0')