In [1]:
import pickle
from flair.data import Dictionary
from flair.models import LanguageModel
from flair.trainers.language_model_trainer import LanguageModelTrainer, TextCorpus

In [2]:
!mkdir -p ./models/ru-large-forward
!mkdir -p ./models/ru-large-backward

In [3]:
params = {
    'model_path': './models/ru-large-forward',
    'dictionary_path': './data/dictionaty.pckl',
    'direction': 'forward',
    'hidden_size': 2048,
    'n_layers': 1,
    'seq_len': 250,
    'batch_size': 100,
    'max_epochs': 10
}

In [4]:
dictionary: Dictionary = Dictionary.load_from_file(params['dictionary_path'])

In [5]:
corpus: TextCorpus = TextCorpus(
    './data/corpus',
    dictionary,
    params['direction'] == 'forward',
    character_level=True
)
    

2019-09-28 18:49:03,666 read text file with 35045 lines
2019-09-28 18:52:48,356 read text file with 37097 lines


In [6]:
language_model = LanguageModel(
    dictionary,
    params['direction'] == 'forward',
    hidden_size=params['hidden_size'],
    nlayers=params['n_layers']
)

In [7]:
trainer = LanguageModelTrainer(language_model, corpus)

In [None]:
trainer.train(
    params['model_path'],
    sequence_length=params['seq_len'],
    mini_batch_size=params['batch_size'],
    max_epochs=params['max_epochs']
)

2019-09-28 18:56:49,421 read text file with 8357 lines
2019-09-28 18:56:49,421 read text file with 8403 lines
2019-09-28 18:56:49,428 shuffled
2019-09-28 18:56:49,428 shuffled
2019-09-28 18:57:42,079 read text file with 8304 lines
2019-09-28 18:57:42,085 shuffled
2019-09-28 18:57:42,255 Sequence length is 250
2019-09-28 18:57:42,296 read text file with 8307 lines
2019-09-28 18:57:42,301 shuffled
2019-09-28 18:57:42,302 Split 1	 - (18:57:42)
2019-09-28 18:58:14,867 | split   1 / 80 |   100/  418 batches | ms/batch 325.54 | loss  4.24 | ppl    69.60
2019-09-28 18:58:35,724 read text file with 8311 lines
2019-09-28 18:58:35,730 shuffled
2019-09-28 18:58:47,544 | split   1 / 80 |   200/  418 batches | ms/batch 326.77 | loss  3.15 | ppl    23.41
2019-09-28 18:59:20,503 | split   1 / 80 |   300/  418 batches | ms/batch 329.58 | loss  2.70 | ppl    14.88
2019-09-28 18:59:53,654 | split   1 / 80 |   400/  418 batches | ms/batch 331.51 | loss  2.37 | ppl    10.66
