In [21]:
from flair.data import Corpus
from flair.data_fetcher import NLPTaskDataFetcher
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, FlairEmbeddings, PooledFlairEmbeddings
from flair.trainers import ModelTrainer
from flair.models import SequenceTagger
from typing import List
import argparse
import os

# 1. get the corpus
column_format = {0: 'text', 1: 'ner'}

data_folder = r"C:\Users\augus\Desktop\137 Final Project\data\data"

corpus: Corpus = NLPTaskDataFetcher.load_column_corpus(data_folder, 
                                    train_file='FIN5.txt',
                                    test_file='FIN3.txt',
                                    column_format = column_format,)
                                    #tag_to_biloes="ner")
# 2. what tag do we want to predict?
tag_type = 'ner'

# 3. make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
print(tag_dictionary)

# 4. initialize embeddings
embedding_types = [

    WordEmbeddings('glove'),

    # comment in this line to use character embeddings
    # CharacterEmbeddings(),

    # comment in these lines to use flair embeddings
    FlairEmbeddings('news-forward'),
    FlairEmbeddings('news-backward'),
]

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)

# 5. initialize sequence tagger
from flair.models import SequenceTagger

tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=False)

# 6. initialize trainer
from flair.trainers import ModelTrainer

trainer: ModelTrainer = ModelTrainer(tagger, corpus)

# 7. start training
trainer.train(r'./data/train/',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=70)

2021-05-12 23:42:09,386 Reading data from C:\Users\augus\Desktop\137 Final Project\data\data
2021-05-12 23:42:09,388 Train: C:\Users\augus\Desktop\137 Final Project\data\data\FIN5.txt
2021-05-12 23:42:09,388 Dev: None
2021-05-12 23:42:09,389 Test: C:\Users\augus\Desktop\137 Final Project\data\data\FIN3.txt




Dictionary with 45 tags: <unk>, O, -X-, CD, ,, NNP, VBG, NN, IN, CC, NNPS, ., :, WP$, VBZ, JJ, DT, NNS, WDT, MD, VB, TO, VBP, RB, LS, VBN, POS, JJR, PRP, $
2021-05-12 23:42:10,639 ----------------------------------------------------------------------------------------------------
2021-05-12 23:42:10,641 Model: "SequenceTagger(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings('glove')
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.05, inplace=False)
        (encoder): Embedding(300, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=300, bias=True)
      )
    )
    (list_embedding_2): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.05, inplace=False)
        (encoder): Embedding(300, 100)
        (rnn): LSTM(100, 2048)
        (decoder): Linear(in_features=2048, out_features=300, bias=True)
      )
    )
  )
  (word_dropout): WordDropout(p=0.05)
  

{'test_score': 0.1323,
 'dev_score_history': [],
 'train_loss_history': [],
 'dev_loss_history': []}

In [14]:
directory = os.getcwd()

In [15]:
directory

'C:\\Users\\augus\\Desktop\\137 Final Project'