In [1]:
from flair.data import Corpus
from flair.datasets import UD_ENGLISH
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings

# 1. get the corpus
corpus: Corpus = UD_ENGLISH().downsample(0.1)
print(corpus)

# 2. what tag do we want to predict?
tag_type = 'pos'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary)

2020-10-03 15:14:26,958 Reading data from C:\Users\a\.flair\datasets\ud_english
2020-10-03 15:14:26,969 Train: C:\Users\a\.flair\datasets\ud_english\en_ewt-ud-train.conllu
2020-10-03 15:14:26,971 Dev: C:\Users\a\.flair\datasets\ud_english\en_ewt-ud-dev.conllu
2020-10-03 15:14:26,973 Test: C:\Users\a\.flair\datasets\ud_english\en_ewt-ud-test.conllu
Corpus: 1254 train + 200 dev + 208 test sentences
Dictionary with 52 tags: <unk>, O, DT, NNP, IN, VBZ, JJ, ,, RB, MD, VB, VBG, NN, NNS, NNPS, ., VBD, VBN, CD, PRP, PRP$, TO, VBP, RBR, WRB, CC, JJR, HYPH, EX, -LRB-


In [6]:
# 4. initialize embeddings
embedding_types = [

    WordEmbeddings('en-glove'),
    # comment in this line to use character embeddings
    # CharacterEmbeddings(),

    # comment in these lines to use flair embeddings
    # FlairEmbeddings('news-forward'),
    # FlairEmbeddings('news-backward'),
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

2020-10-03 15:24:02,279 https://flair.informatik.hu-berlin.de/resources/embeddings/token/glove.gensim.vectors.npy not found in cache, downloading to C:\Users\a\AppData\Local\Temp\tmpkymfyqa_


100%|█████████████████████████| 160000128/160000128 [02:40<00:00, 996462.21B/s]

2020-10-03 15:26:44,840 copying C:\Users\a\AppData\Local\Temp\tmpkymfyqa_ to cache at C:\Users\a\.flair\embeddings\glove.gensim.vectors.npy





2020-10-03 15:26:45,084 removing temp file C:\Users\a\AppData\Local\Temp\tmpkymfyqa_
2020-10-03 15:26:47,123 https://flair.informatik.hu-berlin.de/resources/embeddings/token/glove.gensim not found in cache, downloading to C:\Users\a\AppData\Local\Temp\tmpoy9a74du


100%|██████████████████████████| 21494764/21494764 [00:17<00:00, 1212884.97B/s]

2020-10-03 15:27:06,738 copying C:\Users\a\AppData\Local\Temp\tmpoy9a74du to cache at C:\Users\a\.flair\embeddings\glove.gensim
2020-10-03 15:27:06,795 removing temp file C:\Users\a\AppData\Local\Temp\tmpoy9a74du





In [7]:
embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# 5. initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=True)

# 6. initialize trainer
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(tagger, corpus)

# 7. start training
trainer.train('resources/taggers/example-pos',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=150)

2020-10-03 15:27:57,451 ----------------------------------------------------------------------------------------------------
2020-10-03 15:27:57,454 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings('en-glove')
  )
  (word_dropout): WordDropout(p=0.05)
  (locked_dropout): LockedDropout(p=0.5)
  (embedding2nn): Linear(in_features=100, out_features=100, bias=True)
  (rnn): LSTM(100, 256, batch_first=True, bidirectional=True)
  (linear): Linear(in_features=512, out_features=52, bias=True)
  (beta): 1.0
  (weights): None
  (weight_tensor) None
)"
2020-10-03 15:27:57,457 ----------------------------------------------------------------------------------------------------
2020-10-03 15:27:57,466 Corpus: "Corpus: 1254 train + 200 dev + 208 test sentences"
2020-10-03 15:27:57,469 ----------------------------------------------------------------------------------------------------
2020-10-03 15:27:57,483 Parameters:
2020-10-03 15:27:57,492  - learni

2020-10-03 15:30:46,499 epoch 5 - iter 20/40 - loss 27.54028978 - samples/sec: 43.34 - lr: 0.100000
2020-10-03 15:30:50,312 epoch 5 - iter 24/40 - loss 27.62365723 - samples/sec: 33.59 - lr: 0.100000
2020-10-03 15:30:54,425 epoch 5 - iter 28/40 - loss 27.59787703 - samples/sec: 31.13 - lr: 0.100000
2020-10-03 15:30:57,934 epoch 5 - iter 32/40 - loss 27.32355058 - samples/sec: 36.50 - lr: 0.100000
2020-10-03 15:31:01,585 epoch 5 - iter 36/40 - loss 27.16571013 - samples/sec: 35.08 - lr: 0.100000
2020-10-03 15:31:04,429 epoch 5 - iter 40/40 - loss 27.10799308 - samples/sec: 45.07 - lr: 0.100000
2020-10-03 15:31:04,431 ----------------------------------------------------------------------------------------------------
2020-10-03 15:31:04,433 EPOCH 5 done: loss 27.1080 - lr 0.1000000
2020-10-03 15:31:05,491 DEV : loss 15.066246032714844 - score 0.6348
2020-10-03 15:31:05,509 BAD EPOCHS (no improvement): 0
saving best model
2020-10-03 15:31:10,236 -------------------------------------------

2020-10-03 15:34:34,416 ----------------------------------------------------------------------------------------------------
2020-10-03 15:34:37,187 epoch 11 - iter 4/40 - loss 18.04439163 - samples/sec: 46.24 - lr: 0.100000
2020-10-03 15:34:40,698 epoch 11 - iter 8/40 - loss 19.08403063 - samples/sec: 36.49 - lr: 0.100000
2020-10-03 15:34:43,391 epoch 11 - iter 12/40 - loss 18.79509735 - samples/sec: 47.56 - lr: 0.100000
2020-10-03 15:34:46,739 epoch 11 - iter 16/40 - loss 19.03684533 - samples/sec: 38.25 - lr: 0.100000
2020-10-03 15:34:50,589 epoch 11 - iter 20/40 - loss 19.58128214 - samples/sec: 33.28 - lr: 0.100000
2020-10-03 15:34:53,470 epoch 11 - iter 24/40 - loss 19.10223238 - samples/sec: 44.47 - lr: 0.100000
2020-10-03 15:34:56,742 epoch 11 - iter 28/40 - loss 19.21657528 - samples/sec: 39.15 - lr: 0.100000
2020-10-03 15:35:00,001 epoch 11 - iter 32/40 - loss 19.14367366 - samples/sec: 39.29 - lr: 0.100000
2020-10-03 15:35:03,432 epoch 11 - iter 36/40 - loss 18.97628379 - sa

KeyboardInterrupt: 