In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

from flair.data import Corpus
from flair.datasets import ColumnCorpus

columns = {0: 'text', 1: '_', 2: '_', 3: 'ner'}

data_folder = './data'

corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file='train.txt',
                              test_file='test.txt',
                              dev_file='valid.txt')
print(corpus)
tag_type = 'ner'
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary)

2020-04-09 03:50:07,713 Reading data from data
2020-04-09 03:50:07,714 Train: data/train.txt
2020-04-09 03:50:07,715 Dev: data/valid.txt
2020-04-09 03:50:07,716 Test: data/test.txt
Corpus: 14041 train + 3250 dev + 3453 test sentences
Dictionary with 12 tags: <unk>, O, B-ORG, B-MISC, B-PER, I-PER, B-LOC, I-ORG, I-MISC, I-LOC, <START>, <STOP>


In [2]:
from flair.embeddings import ELMoEmbeddings,BertEmbeddings,FlairEmbeddings,XLNetEmbeddings
from flair.models import SequenceTagger
from ensemble_tagger import EnsembleTagger
from typing import List

elmo_tagger = SequenceTagger(hidden_size=256,
                             embeddings=ELMoEmbeddings('small'),
                             tag_dictionary=tag_dictionary,
                             tag_type=tag_type,
                             use_crf=True)
bert_tagger = SequenceTagger(hidden_size=256,
                             embeddings=BertEmbeddings(),
                             tag_dictionary=tag_dictionary,
                             tag_type=tag_type,
                             use_crf=True)
xlnet_tagger = SequenceTagger(hidden_size=256,
                              embeddings=XLNetEmbeddings(),
                              tag_dictionary=tag_dictionary,
                              tag_type=tag_type,
                              use_crf=True)
# flair_tagger = SequenceTagger(hidden_size=256,
#                               embeddings=FlairEmbeddings('en-forward'),
#                               tag_dictionary=tag_dictionary,
#                               tag_type=tag_type,
#                               use_crf=True)
ensemble_tagger = EnsembleTagger(models=[xlnet_tagger, elmo_tagger, bert_tagger],
                                 tag_type=tag_type,
                                 mode='loss')
print(str(ensemble_tagger))
model_path = "/hdd1/kurisu/cs6207/log/ensemble/"

Ensemble Tagger: [
SequenceTagger(
  (embeddings): XLNetEmbeddings(
    model=xlnet-large-cased
    (model): XLNetModel(
      (word_embedding): Embedding(32000, 1024)
      (layer): ModuleList(
        (0): XLNetLayer(
          (rel_attn): XLNetRelativeAttention(
            (layer_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (ff): XLNetFeedForward(
            (layer_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
            (layer_1): Linear(in_features=1024, out_features=4096, bias=True)
            (layer_2): Linear(in_features=4096, out_features=1024, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (1): XLNetLayer(
          (rel_attn): XLNetRelativeAttention(
            (layer_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, i

In [None]:
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(ensemble_tagger, corpus)

trainer.train(model_path,
              learning_rate=0.01,
              mini_batch_size=32,
              max_epochs=150)

2020-04-09 03:51:07,202 ----------------------------------------------------------------------------------------------------
2020-04-09 03:51:07,210 Model: "Ensemble Tagger: [
SequenceTagger(
  (embeddings): XLNetEmbeddings(
    model=xlnet-large-cased
    (model): XLNetModel(
      (word_embedding): Embedding(32000, 1024)
      (layer): ModuleList(
        (0): XLNetLayer(
          (rel_attn): XLNetRelativeAttention(
            (layer_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (ff): XLNetFeedForward(
            (layer_norm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
            (layer_1): Linear(in_features=1024, out_features=4096, bias=True)
            (layer_2): Linear(in_features=4096, out_features=1024, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (1): XLNetLayer(
         

2020-04-09 03:51:07,212 ----------------------------------------------------------------------------------------------------
2020-04-09 03:51:07,214 Corpus: "Corpus: 14041 train + 3250 dev + 3453 test sentences"
2020-04-09 03:51:07,215 ----------------------------------------------------------------------------------------------------
2020-04-09 03:51:07,216 Parameters:
2020-04-09 03:51:07,217  - learning_rate: "0.01"
2020-04-09 03:51:07,217  - mini_batch_size: "32"
2020-04-09 03:51:07,218  - patience: "3"
2020-04-09 03:51:07,219  - anneal_factor: "0.5"
2020-04-09 03:51:07,221  - max_epochs: "150"
2020-04-09 03:51:07,222  - shuffle: "True"
2020-04-09 03:51:07,223  - train_with_dev: "False"
2020-04-09 03:51:07,224  - batch_growth_annealing: "False"
2020-04-09 03:51:07,224 ----------------------------------------------------------------------------------------------------
2020-04-09 03:51:07,225 Model training base path: "/hdd1/kurisu/cs6207/log/ensemble"
2020-04-09 03:51:07,225 --------

2020-04-09 05:46:43,870 epoch 6 - iter 215/439 - loss 2.12181409 - samples/sec: 27.32
2020-04-09 05:48:32,141 epoch 6 - iter 258/439 - loss 2.09430246 - samples/sec: 26.01
2020-04-09 05:50:18,658 epoch 6 - iter 301/439 - loss 2.08973149 - samples/sec: 26.90
2020-04-09 05:52:06,076 epoch 6 - iter 344/439 - loss 2.10253926 - samples/sec: 26.32
2020-04-09 05:54:00,127 epoch 6 - iter 387/439 - loss 2.09163527 - samples/sec: 27.04
2020-04-09 05:55:55,042 epoch 6 - iter 430/439 - loss 2.08742856 - samples/sec: 26.16
2020-04-09 05:57:00,681 ----------------------------------------------------------------------------------------------------
2020-04-09 05:57:00,683 EPOCH 6 done: loss 2.0808 - lr 0.0100
2020-04-09 06:00:21,306 DEV : loss 1.4148730258731281 - score 0.8857
2020-04-09 06:00:21,344 BAD EPOCHS (no improvement): 0
2020-04-09 06:00:47,570 ----------------------------------------------------------------------------------------------------
2020-04-09 06:01:39,279 epoch 7 - iter 43/439 - 

2020-04-09 08:17:57,065 epoch 12 - iter 430/439 - loss 1.50854994 - samples/sec: 26.52
2020-04-09 08:19:03,896 ----------------------------------------------------------------------------------------------------
2020-04-09 08:19:03,898 EPOCH 12 done: loss 1.5085 - lr 0.0100
2020-04-09 08:22:22,881 DEV : loss 1.0733929993767364 - score 0.9107
2020-04-09 08:22:22,918 BAD EPOCHS (no improvement): 0
2020-04-09 08:22:49,622 ----------------------------------------------------------------------------------------------------
2020-04-09 08:23:41,823 epoch 13 - iter 43/439 - loss 1.53249447 - samples/sec: 26.36
2020-04-09 08:25:31,313 epoch 13 - iter 86/439 - loss 1.49757430 - samples/sec: 26.20
2020-04-09 08:27:21,354 epoch 13 - iter 129/439 - loss 1.47264957 - samples/sec: 26.09
2020-04-09 08:29:10,872 epoch 13 - iter 172/439 - loss 1.46099316 - samples/sec: 26.25
2020-04-09 08:30:59,358 epoch 13 - iter 215/439 - loss 1.46703105 - samples/sec: 27.09
2020-04-09 08:32:48,254 epoch 13 - iter 258

2020-04-09 10:35:03,126 ----------------------------------------------------------------------------------------------------
2020-04-09 10:35:55,250 epoch 19 - iter 43/439 - loss 1.13429280 - samples/sec: 26.40
2020-04-09 10:37:43,473 epoch 19 - iter 86/439 - loss 1.17742671 - samples/sec: 26.63
2020-04-09 10:39:32,484 epoch 19 - iter 129/439 - loss 1.18181824 - samples/sec: 26.86
2020-04-09 10:41:22,232 epoch 19 - iter 172/439 - loss 1.18643664 - samples/sec: 26.18
2020-04-09 10:43:10,707 epoch 19 - iter 215/439 - loss 1.19045662 - samples/sec: 26.52
2020-04-09 10:45:00,559 epoch 19 - iter 258/439 - loss 1.19545245 - samples/sec: 26.18
2020-04-09 10:46:49,165 epoch 19 - iter 301/439 - loss 1.19693297 - samples/sec: 26.49
2020-04-09 10:48:37,241 epoch 19 - iter 344/439 - loss 1.20093590 - samples/sec: 26.84
2020-04-09 10:50:27,088 epoch 19 - iter 387/439 - loss 1.20607905 - samples/sec: 26.16
2020-04-09 10:52:17,010 epoch 19 - iter 430/439 - loss 1.21338079 - samples/sec: 26.05
2020-04

2020-04-09 12:53:33,230 epoch 25 - iter 172/439 - loss 1.03953488 - samples/sec: 27.01
2020-04-09 12:55:21,909 epoch 25 - iter 215/439 - loss 1.04553555 - samples/sec: 26.52
2020-04-09 12:57:11,085 epoch 25 - iter 258/439 - loss 1.03644982 - samples/sec: 26.36
2020-04-09 12:59:00,186 epoch 25 - iter 301/439 - loss 1.04528149 - samples/sec: 26.42
2020-04-09 13:00:49,946 epoch 25 - iter 344/439 - loss 1.05690599 - samples/sec: 26.82
2020-04-09 13:02:38,818 epoch 25 - iter 387/439 - loss 1.05694051 - samples/sec: 26.36
2020-04-09 13:04:28,505 epoch 25 - iter 430/439 - loss 1.05031599 - samples/sec: 26.02
2020-04-09 13:05:47,354 ----------------------------------------------------------------------------------------------------
2020-04-09 13:05:47,356 EPOCH 25 done: loss 1.0523 - lr 0.0100
2020-04-09 13:09:07,316 DEV : loss 0.8442029889088636 - score 0.9338
2020-04-09 13:09:07,354 BAD EPOCHS (no improvement): 2
2020-04-09 13:09:07,370 -------------------------------------------------------

In [None]:
test_ensemble_tagger = EnsembleTagger.load(model_path + 'best-model.pt')

sentence = corpus.test[0]

for entity in sentence.get_spans('ner'):
    print(entity)

for token in sentence.tokens:
    print(str(token.get_tag("ner")))
    print(str(token.get_tags_proba_dist("ner")))

test_ensemble_tagger.predict(sentence,all_tag_prob=True)

for token in sentence.tokens:
    print(token.get_tag("ner").value)

In [None]:
from conlleval import evaluate

real = []
for sentence in corpus.test:
    for token in sentence.tokens:
        real.append(token.get_tag("ner").value)

def test(model, data):
    results = []
    for sentence in data:
        model.predict(sentence,all_tag_prob=True)
        for token in sentence.tokens:
            results.append(token.get_tag("ner").value)
    return results

ensemble_pred = test(test_ensemble_tagger, corpus.test)
print(evaluate(real, ensemble_pred))
elmo_pred = test(elmo_tagger, corpus.test)
print(evaluate(real, elmo_pred))
# bert_pred = test(bert_tagger, corpus.test)
# print(evaluate(real, bert_pred))
# xlnet_pred = test(xlnet_tagger, corpus.test)
# print(evaluate(real, xlnet_pred))

processed 46435 tokens with 6847 phrases; found: 6417 phrases; correct: 61.
accuracy:   2.41%; (non-O)
accuracy:   2.56%; precision:   0.95%; recall:   0.89%; FB1:   0.92
              LOC: precision:   2.59%; recall:   1.51%; FB1:   1.91  1895
             MISC: precision:   0.00%; recall:   0.00%; FB1:   0.00  367
              ORG: precision:   0.56%; recall:   8.89%; FB1:   1.05  2140
              PER: precision:   0.00%; recall:   0.00%; FB1:   0.00  2015
(0.9505999688327879, 0.8909011245801081, 0.9197828709288298)
