In [1]:
# Imports
from time import time
from os.path import join as join_path
import numpy as np
import pandas as pd

import multiprocessing
cores = multiprocessing.cpu_count()

from gensim.models import Word2Vec, Doc2Vec
from gensim.models.callbacks import CallbackAny2Vec
from gensim.models.doc2vec import TaggedDocument
import logging # Setting up the loggings to monitor gensim
logging.basicConfig(format="%(levelname)s - %(asctime)s: %(message)s", datefmt= '%H:%M:%S', level=logging.INFO)

import nltk
nltk.download('punkt')

from utils import clean_text
from tqdm.notebook import tqdm

from sklearn.metrics.pairwise import cosine_similarity

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Load and prepare data

In [2]:
# Constants
cord_data_dir = 'data'
cord_data_path = join_path(cord_data_dir, 'cord-19-data.csv')
w2v_saved_models_dir = 'models-word2vec-new'
saved_models_prefix = 'model'
train_logs_path = 'train_logs.txt'

In [3]:
cord_data = pd.read_csv(cord_data_path)
cord_data_eng = cord_data[cord_data['language'] == 'en']
eng_texts = cord_data_eng['body_text'].values

In [4]:
cord_num_sentences = 0
for text in tqdm(eng_texts):
    sentences = nltk.tokenize.sent_tokenize(text)
    cord_num_sentences += len(sentences)
print(f'Total number of CORD-19 sentences: {cord_num_sentences}')

HBox(children=(IntProgress(value=0, max=35708), HTML(value='')))


Total number of CORD-19 sentences: 7097680


In [5]:
class CORDDataIteratorWord2Vec():
    def __init__(self, texts: np.ndarray):
        self.texts = texts
    
    def __iter__(self):
        for text in self.texts:
            sentences = nltk.tokenize.sent_tokenize(text)
            cleaned_sentences = [clean_text(sent) for sent in sentences]
            for sentence in cleaned_sentences:
                yield sentence

In [6]:
cord_sentences = CORDDataIteratorWord2Vec(eng_texts)

## Learn word embeddings using Word2vec

In [7]:
class EpochSaver(CallbackAny2Vec):
    '''Callback to save model after each epoch.'''

    def __init__(self, output_dir: str, prefix: str, logs_filename: str):
        self.output_dir = output_dir
        self.prefix = prefix
        self.logs_filename = logs_filename
        self.epoch = 1

    def on_epoch_end(self, model):
        cum_loss = model.get_latest_training_loss()
        if self.epoch == 1:
            loss = cum_loss
        else:
            loss = cum_loss - self.loss_previous_step
        self.loss_previous_step = loss
        with open(join_path(self.output_dir, self.logs_filename), 'a+') as file:
            file.write(f'Epoch #{self.epoch}, loss: {loss}\n')
        
        output_path = join_path(self.output_dir, f'{self.prefix}_epoch_{self.epoch}.model')
        model.save(output_path)
        self.epoch += 1    

In [8]:
# Setup initial model
w2v_model = Word2Vec(
    min_count=20,
    window=2,
    size=300,
    negative=5,
    workers=cores-1,
    compute_loss=True,
    callbacks=[EpochSaver(w2v_saved_models_dir, saved_models_prefix, train_logs_path)]
)

In [9]:
# Build vocabulary
t = time()
w2v_model.build_vocab(tqdm(cord_sentences, total=cord_num_sentences), progress_per=int(cord_num_sentences / 100))
print(f'Time to build vocab: {round((time() - t) / 60, 2)} mins')

HBox(children=(IntProgress(value=0, max=7097680), HTML(value='')))

INFO - 14:13:43: collecting all words and their counts
INFO - 14:13:45: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - 14:14:05: PROGRESS: at sentence #70976, processed 1013256 words, keeping 51891 word types
INFO - 14:14:25: PROGRESS: at sentence #141952, processed 2029040 words, keeping 84651 word types
INFO - 14:14:44: PROGRESS: at sentence #212928, processed 3054106 words, keeping 115992 word types
INFO - 14:15:03: PROGRESS: at sentence #283904, processed 4068911 words, keeping 142369 word types
INFO - 14:15:23: PROGRESS: at sentence #354880, processed 5105447 words, keeping 164500 word types
INFO - 14:15:43: PROGRESS: at sentence #425856, processed 6124854 words, keeping 185561 word types
INFO - 14:16:02: PROGRESS: at sentence #496832, processed 7156844 words, keeping 206875 word types
INFO - 14:16:21: PROGRESS: at sentence #567808, processed 8149173 words, keeping 233078 word types
INFO - 14:16:41: PROGRESS: at sentence #638784, processed 9174245 words, 

INFO - 14:39:55: PROGRESS: at sentence #5749056, processed 78309142 words, keeping 1293451 word types
INFO - 14:40:14: PROGRESS: at sentence #5820032, processed 79280301 words, keeping 1302643 word types
INFO - 14:40:33: PROGRESS: at sentence #5891008, processed 80236804 words, keeping 1311421 word types
INFO - 14:40:53: PROGRESS: at sentence #5961984, processed 81191278 words, keeping 1320204 word types
INFO - 14:41:12: PROGRESS: at sentence #6032960, processed 82148730 words, keeping 1328579 word types
INFO - 14:41:31: PROGRESS: at sentence #6103936, processed 83102084 words, keeping 1339290 word types
INFO - 14:41:50: PROGRESS: at sentence #6174912, processed 84058376 words, keeping 1348821 word types
INFO - 14:42:09: PROGRESS: at sentence #6245888, processed 85011177 words, keeping 1357663 word types
INFO - 14:42:28: PROGRESS: at sentence #6316864, processed 85969150 words, keeping 1367554 word types
INFO - 14:42:48: PROGRESS: at sentence #6387840, processed 86933176 words, keeping




INFO - 14:46:10: effective_min_count=20 retains 93856 unique words (6% of original 1463257, drops 1369401)
INFO - 14:46:10: effective_min_count=20 leaves 93320129 word corpus (96% of original 96497880, drops 3177751)
INFO - 14:46:10: deleting the raw counts dictionary of 1463257 items
INFO - 14:46:10: sample=0.001 downsamples 17 most-common words
INFO - 14:46:10: downsampling leaves estimated 90805423 word corpus (97.3% of prior 93320129)
INFO - 14:46:10: estimated required memory for 93856 words and 300 dimensions: 272182400 bytes
INFO - 14:46:10: resetting layer weights


Time to build vocab: 32.77 mins


In [None]:
# Train model
t = time()
w2v_model.train(cord_sentences, total_examples=w2v_model.corpus_count, epochs=15, report_delay=30)
print(f'Time to train the model: {round((time() - t) / 60, 2)} mins')

INFO - 14:46:29: training model with 15 workers on 93856 vocabulary and 300 features, using sg=0 hs=0 sample=0.001 negative=5 window=2
INFO - 14:46:30: EPOCH 1 - PROGRESS: at 0.05% examples, 41887 words/s, in_qsize 0, out_qsize 0
INFO - 14:47:00: EPOCH 1 - PROGRESS: at 1.45% examples, 44325 words/s, in_qsize 0, out_qsize 0
INFO - 14:47:30: EPOCH 1 - PROGRESS: at 2.96% examples, 46173 words/s, in_qsize 0, out_qsize 0
INFO - 14:48:00: EPOCH 1 - PROGRESS: at 4.47% examples, 46879 words/s, in_qsize 0, out_qsize 0
INFO - 14:48:31: EPOCH 1 - PROGRESS: at 5.97% examples, 47092 words/s, in_qsize 0, out_qsize 0
INFO - 14:49:01: EPOCH 1 - PROGRESS: at 7.50% examples, 47276 words/s, in_qsize 0, out_qsize 0
INFO - 14:49:31: EPOCH 1 - PROGRESS: at 8.98% examples, 47214 words/s, in_qsize 0, out_qsize 0
INFO - 14:50:01: EPOCH 1 - PROGRESS: at 10.48% examples, 47370 words/s, in_qsize 0, out_qsize 0
INFO - 14:50:35: EPOCH 1 - PROGRESS: at 11.71% examples, 45709 words/s, in_qsize 29, out_qsize 0
INFO - 

INFO - 15:19:28: EPOCH 2 - PROGRESS: at 4.65% examples, 48666 words/s, in_qsize 0, out_qsize 0
INFO - 15:19:58: EPOCH 2 - PROGRESS: at 6.20% examples, 48795 words/s, in_qsize 0, out_qsize 0
INFO - 15:20:29: EPOCH 2 - PROGRESS: at 7.74% examples, 48496 words/s, in_qsize 0, out_qsize 0
INFO - 15:20:59: EPOCH 2 - PROGRESS: at 9.33% examples, 48930 words/s, in_qsize 0, out_qsize 0
INFO - 15:21:29: EPOCH 2 - PROGRESS: at 10.91% examples, 49140 words/s, in_qsize 0, out_qsize 0
INFO - 15:21:59: EPOCH 2 - PROGRESS: at 12.50% examples, 49165 words/s, in_qsize 0, out_qsize 0
INFO - 15:22:29: EPOCH 2 - PROGRESS: at 14.07% examples, 49256 words/s, in_qsize 0, out_qsize 0
INFO - 15:22:59: EPOCH 2 - PROGRESS: at 15.63% examples, 49289 words/s, in_qsize 0, out_qsize 0
INFO - 15:23:29: EPOCH 2 - PROGRESS: at 17.22% examples, 49426 words/s, in_qsize 0, out_qsize 0
INFO - 15:23:59: EPOCH 2 - PROGRESS: at 18.82% examples, 49500 words/s, in_qsize 0, out_qsize 0
INFO - 15:24:29: EPOCH 2 - PROGRESS: at 20.3

INFO - 15:53:25: EPOCH 3 - PROGRESS: at 15.59% examples, 49358 words/s, in_qsize 0, out_qsize 0
INFO - 15:53:55: EPOCH 3 - PROGRESS: at 17.18% examples, 49487 words/s, in_qsize 0, out_qsize 0
INFO - 15:54:25: EPOCH 3 - PROGRESS: at 18.73% examples, 49431 words/s, in_qsize 0, out_qsize 0
INFO - 15:54:55: EPOCH 3 - PROGRESS: at 20.29% examples, 49487 words/s, in_qsize 0, out_qsize 0
INFO - 15:55:25: EPOCH 3 - PROGRESS: at 21.82% examples, 49494 words/s, in_qsize 0, out_qsize 0
INFO - 15:55:55: EPOCH 3 - PROGRESS: at 23.35% examples, 49391 words/s, in_qsize 0, out_qsize 0
INFO - 15:56:26: EPOCH 3 - PROGRESS: at 24.86% examples, 49298 words/s, in_qsize 0, out_qsize 0
INFO - 15:56:56: EPOCH 3 - PROGRESS: at 26.40% examples, 49283 words/s, in_qsize 0, out_qsize 0
INFO - 15:57:26: EPOCH 3 - PROGRESS: at 27.92% examples, 49270 words/s, in_qsize 0, out_qsize 0
INFO - 15:57:56: EPOCH 3 - PROGRESS: at 29.48% examples, 49215 words/s, in_qsize 0, out_qsize 0
INFO - 15:58:26: EPOCH 3 - PROGRESS: at 

INFO - 16:27:22: EPOCH 4 - PROGRESS: at 21.56% examples, 48520 words/s, in_qsize 0, out_qsize 0
INFO - 16:27:52: EPOCH 4 - PROGRESS: at 23.14% examples, 48616 words/s, in_qsize 0, out_qsize 0
INFO - 16:28:22: EPOCH 4 - PROGRESS: at 24.70% examples, 48645 words/s, in_qsize 0, out_qsize 0
INFO - 16:28:53: EPOCH 4 - PROGRESS: at 26.28% examples, 48730 words/s, in_qsize 0, out_qsize 0
INFO - 16:29:23: EPOCH 4 - PROGRESS: at 27.86% examples, 48869 words/s, in_qsize 0, out_qsize 0
INFO - 16:29:53: EPOCH 4 - PROGRESS: at 29.45% examples, 48893 words/s, in_qsize 0, out_qsize 0
INFO - 16:30:23: EPOCH 4 - PROGRESS: at 31.09% examples, 48830 words/s, in_qsize 0, out_qsize 0
INFO - 16:30:53: EPOCH 4 - PROGRESS: at 32.70% examples, 48860 words/s, in_qsize 0, out_qsize 0
INFO - 16:31:23: EPOCH 4 - PROGRESS: at 34.27% examples, 48909 words/s, in_qsize 0, out_qsize 0
INFO - 16:31:53: EPOCH 4 - PROGRESS: at 35.91% examples, 48982 words/s, in_qsize 0, out_qsize 0
INFO - 16:32:23: EPOCH 4 - PROGRESS: at 

INFO - 17:01:30: EPOCH 5 - PROGRESS: at 27.74% examples, 48770 words/s, in_qsize 0, out_qsize 0
INFO - 17:02:00: EPOCH 5 - PROGRESS: at 29.30% examples, 48800 words/s, in_qsize 0, out_qsize 0
INFO - 17:02:30: EPOCH 5 - PROGRESS: at 30.92% examples, 48695 words/s, in_qsize 0, out_qsize 0
INFO - 17:03:00: EPOCH 5 - PROGRESS: at 32.52% examples, 48677 words/s, in_qsize 0, out_qsize 0
INFO - 17:03:30: EPOCH 5 - PROGRESS: at 34.05% examples, 48684 words/s, in_qsize 0, out_qsize 0
INFO - 17:04:00: EPOCH 5 - PROGRESS: at 35.62% examples, 48700 words/s, in_qsize 0, out_qsize 0
INFO - 17:04:30: EPOCH 5 - PROGRESS: at 37.19% examples, 48733 words/s, in_qsize 0, out_qsize 0
INFO - 17:05:01: EPOCH 5 - PROGRESS: at 38.74% examples, 48750 words/s, in_qsize 0, out_qsize 0
INFO - 17:05:31: EPOCH 5 - PROGRESS: at 40.31% examples, 48774 words/s, in_qsize 0, out_qsize 0
INFO - 17:06:01: EPOCH 5 - PROGRESS: at 41.93% examples, 48629 words/s, in_qsize 0, out_qsize 0
INFO - 17:06:31: EPOCH 5 - PROGRESS: at 

INFO - 17:35:18: EPOCH 6 - PROGRESS: at 36.10% examples, 49461 words/s, in_qsize 0, out_qsize 0
INFO - 17:35:49: EPOCH 6 - PROGRESS: at 37.72% examples, 49511 words/s, in_qsize 0, out_qsize 0
INFO - 17:36:19: EPOCH 6 - PROGRESS: at 39.32% examples, 49571 words/s, in_qsize 0, out_qsize 0
INFO - 17:36:50: EPOCH 6 - PROGRESS: at 40.94% examples, 49478 words/s, in_qsize 0, out_qsize 0
INFO - 17:37:20: EPOCH 6 - PROGRESS: at 42.60% examples, 49429 words/s, in_qsize 0, out_qsize 0
INFO - 17:37:50: EPOCH 6 - PROGRESS: at 44.27% examples, 49310 words/s, in_qsize 0, out_qsize 0
INFO - 17:38:20: EPOCH 6 - PROGRESS: at 45.93% examples, 49217 words/s, in_qsize 0, out_qsize 0
INFO - 17:38:50: EPOCH 6 - PROGRESS: at 47.58% examples, 49154 words/s, in_qsize 0, out_qsize 0
INFO - 17:39:20: EPOCH 6 - PROGRESS: at 49.23% examples, 49136 words/s, in_qsize 0, out_qsize 0
INFO - 17:39:50: EPOCH 6 - PROGRESS: at 50.89% examples, 49131 words/s, in_qsize 0, out_qsize 0
INFO - 17:40:21: EPOCH 6 - PROGRESS: at 

INFO - 18:08:59: EPOCH 7 - PROGRESS: at 42.93% examples, 48037 words/s, in_qsize 0, out_qsize 0
INFO - 18:09:29: EPOCH 7 - PROGRESS: at 44.49% examples, 47845 words/s, in_qsize 0, out_qsize 0
INFO - 18:09:59: EPOCH 7 - PROGRESS: at 46.07% examples, 47734 words/s, in_qsize 0, out_qsize 0
INFO - 18:10:29: EPOCH 7 - PROGRESS: at 47.60% examples, 47622 words/s, in_qsize 0, out_qsize 0
INFO - 18:10:59: EPOCH 7 - PROGRESS: at 49.16% examples, 47569 words/s, in_qsize 0, out_qsize 0
INFO - 18:11:29: EPOCH 7 - PROGRESS: at 50.72% examples, 47520 words/s, in_qsize 0, out_qsize 0
INFO - 18:11:59: EPOCH 7 - PROGRESS: at 52.32% examples, 47440 words/s, in_qsize 0, out_qsize 0
INFO - 18:12:30: EPOCH 7 - PROGRESS: at 53.86% examples, 47383 words/s, in_qsize 0, out_qsize 0
INFO - 18:13:00: EPOCH 7 - PROGRESS: at 55.40% examples, 47361 words/s, in_qsize 0, out_qsize 0
INFO - 18:13:30: EPOCH 7 - PROGRESS: at 56.95% examples, 47326 words/s, in_qsize 0, out_qsize 0
INFO - 18:14:00: EPOCH 7 - PROGRESS: at 

INFO - 18:43:03: EPOCH 8 - PROGRESS: at 46.23% examples, 47943 words/s, in_qsize 0, out_qsize 0
INFO - 18:43:34: EPOCH 8 - PROGRESS: at 47.84% examples, 47893 words/s, in_qsize 0, out_qsize 0
INFO - 18:44:04: EPOCH 8 - PROGRESS: at 49.44% examples, 47890 words/s, in_qsize 0, out_qsize 0
INFO - 18:44:34: EPOCH 8 - PROGRESS: at 50.98% examples, 47803 words/s, in_qsize 0, out_qsize 0
INFO - 18:45:04: EPOCH 8 - PROGRESS: at 52.64% examples, 47786 words/s, in_qsize 0, out_qsize 0
INFO - 18:45:34: EPOCH 8 - PROGRESS: at 54.30% examples, 47822 words/s, in_qsize 0, out_qsize 0
INFO - 18:46:04: EPOCH 8 - PROGRESS: at 55.97% examples, 47863 words/s, in_qsize 0, out_qsize 0
INFO - 18:46:34: EPOCH 8 - PROGRESS: at 57.61% examples, 47881 words/s, in_qsize 0, out_qsize 0
INFO - 18:47:04: EPOCH 8 - PROGRESS: at 59.38% examples, 47949 words/s, in_qsize 0, out_qsize 0
INFO - 18:47:35: EPOCH 8 - PROGRESS: at 61.14% examples, 47938 words/s, in_qsize 0, out_qsize 0
INFO - 18:48:05: EPOCH 8 - PROGRESS: at 

INFO - 19:17:12: EPOCH 9 - PROGRESS: at 56.13% examples, 49311 words/s, in_qsize 0, out_qsize 0
INFO - 19:17:43: EPOCH 9 - PROGRESS: at 57.80% examples, 49279 words/s, in_qsize 0, out_qsize 0
INFO - 19:18:13: EPOCH 9 - PROGRESS: at 59.57% examples, 49300 words/s, in_qsize 0, out_qsize 0
INFO - 19:18:43: EPOCH 9 - PROGRESS: at 61.35% examples, 49277 words/s, in_qsize 0, out_qsize 0
INFO - 19:19:13: EPOCH 9 - PROGRESS: at 63.10% examples, 49234 words/s, in_qsize 0, out_qsize 0
INFO - 19:19:43: EPOCH 9 - PROGRESS: at 64.71% examples, 49188 words/s, in_qsize 0, out_qsize 0
INFO - 19:20:14: EPOCH 9 - PROGRESS: at 66.41% examples, 49211 words/s, in_qsize 0, out_qsize 0
INFO - 19:20:44: EPOCH 9 - PROGRESS: at 68.02% examples, 49183 words/s, in_qsize 0, out_qsize 0
INFO - 19:21:14: EPOCH 9 - PROGRESS: at 69.76% examples, 49080 words/s, in_qsize 0, out_qsize 0
INFO - 19:21:44: EPOCH 9 - PROGRESS: at 71.51% examples, 49044 words/s, in_qsize 0, out_qsize 0
INFO - 19:22:14: EPOCH 9 - PROGRESS: at 

INFO - 19:50:48: EPOCH 10 - PROGRESS: at 64.57% examples, 49042 words/s, in_qsize 0, out_qsize 0
INFO - 19:51:18: EPOCH 10 - PROGRESS: at 66.31% examples, 49074 words/s, in_qsize 0, out_qsize 0
INFO - 19:51:49: EPOCH 10 - PROGRESS: at 68.05% examples, 49123 words/s, in_qsize 0, out_qsize 0
INFO - 19:52:19: EPOCH 10 - PROGRESS: at 69.78% examples, 49025 words/s, in_qsize 0, out_qsize 0
INFO - 19:52:49: EPOCH 10 - PROGRESS: at 71.42% examples, 48911 words/s, in_qsize 0, out_qsize 0
INFO - 19:53:19: EPOCH 10 - PROGRESS: at 73.01% examples, 48878 words/s, in_qsize 0, out_qsize 0
INFO - 19:53:50: EPOCH 10 - PROGRESS: at 74.68% examples, 48900 words/s, in_qsize 0, out_qsize 0
INFO - 19:54:20: EPOCH 10 - PROGRESS: at 76.34% examples, 48927 words/s, in_qsize 0, out_qsize 0
INFO - 19:54:50: EPOCH 10 - PROGRESS: at 78.01% examples, 48946 words/s, in_qsize 0, out_qsize 0
INFO - 19:55:20: EPOCH 10 - PROGRESS: at 79.68% examples, 48963 words/s, in_qsize 0, out_qsize 0
INFO - 19:55:50: EPOCH 10 - PR

INFO - 20:24:15: EPOCH 11 - PROGRESS: at 72.66% examples, 48669 words/s, in_qsize 0, out_qsize 0
INFO - 20:24:45: EPOCH 11 - PROGRESS: at 74.30% examples, 48679 words/s, in_qsize 0, out_qsize 0
INFO - 20:25:16: EPOCH 11 - PROGRESS: at 75.98% examples, 48722 words/s, in_qsize 0, out_qsize 0
INFO - 20:25:46: EPOCH 11 - PROGRESS: at 77.62% examples, 48719 words/s, in_qsize 0, out_qsize 0
INFO - 20:26:16: EPOCH 11 - PROGRESS: at 79.21% examples, 48692 words/s, in_qsize 0, out_qsize 0
INFO - 20:26:46: EPOCH 11 - PROGRESS: at 80.82% examples, 48680 words/s, in_qsize 0, out_qsize 0
INFO - 20:27:16: EPOCH 11 - PROGRESS: at 82.40% examples, 48665 words/s, in_qsize 0, out_qsize 0
INFO - 20:27:46: EPOCH 11 - PROGRESS: at 84.05% examples, 48685 words/s, in_qsize 0, out_qsize 0
INFO - 20:28:16: EPOCH 11 - PROGRESS: at 85.71% examples, 48702 words/s, in_qsize 0, out_qsize 0
INFO - 20:28:46: EPOCH 11 - PROGRESS: at 87.38% examples, 48723 words/s, in_qsize 0, out_qsize 0
INFO - 20:29:16: EPOCH 11 - PR

INFO - 20:57:47: EPOCH 12 - PROGRESS: at 82.04% examples, 49376 words/s, in_qsize 0, out_qsize 0
INFO - 20:58:17: EPOCH 12 - PROGRESS: at 83.70% examples, 49382 words/s, in_qsize 0, out_qsize 0
INFO - 20:58:48: EPOCH 12 - PROGRESS: at 85.25% examples, 49297 words/s, in_qsize 0, out_qsize 0
INFO - 20:59:18: EPOCH 12 - PROGRESS: at 87.00% examples, 49357 words/s, in_qsize 0, out_qsize 0
INFO - 20:59:48: EPOCH 12 - PROGRESS: at 88.64% examples, 49348 words/s, in_qsize 0, out_qsize 0
INFO - 21:00:18: EPOCH 12 - PROGRESS: at 90.28% examples, 49353 words/s, in_qsize 0, out_qsize 0
INFO - 21:00:48: EPOCH 12 - PROGRESS: at 91.89% examples, 49346 words/s, in_qsize 0, out_qsize 0
INFO - 21:01:18: EPOCH 12 - PROGRESS: at 93.42% examples, 49292 words/s, in_qsize 0, out_qsize 0
INFO - 21:01:48: EPOCH 12 - PROGRESS: at 95.02% examples, 49260 words/s, in_qsize 0, out_qsize 0
INFO - 21:02:18: EPOCH 12 - PROGRESS: at 96.63% examples, 49245 words/s, in_qsize 0, out_qsize 0
INFO - 21:02:48: EPOCH 12 - PR

INFO - 21:31:00: EPOCH 13 - PROGRESS: at 87.98% examples, 48214 words/s, in_qsize 0, out_qsize 0
INFO - 21:31:30: EPOCH 13 - PROGRESS: at 89.61% examples, 48236 words/s, in_qsize 0, out_qsize 0
INFO - 21:32:00: EPOCH 13 - PROGRESS: at 91.29% examples, 48274 words/s, in_qsize 0, out_qsize 0
INFO - 21:32:31: EPOCH 13 - PROGRESS: at 92.93% examples, 48296 words/s, in_qsize 0, out_qsize 0
INFO - 21:33:01: EPOCH 13 - PROGRESS: at 94.50% examples, 48273 words/s, in_qsize 0, out_qsize 0
INFO - 21:33:31: EPOCH 13 - PROGRESS: at 96.22% examples, 48318 words/s, in_qsize 0, out_qsize 0
INFO - 21:34:01: EPOCH 13 - PROGRESS: at 97.85% examples, 48322 words/s, in_qsize 0, out_qsize 0
INFO - 21:34:31: EPOCH 13 - PROGRESS: at 99.49% examples, 48332 words/s, in_qsize 0, out_qsize 0
INFO - 21:34:41: worker thread finished; awaiting finish of 14 more threads
INFO - 21:34:41: worker thread finished; awaiting finish of 13 more threads
INFO - 21:34:41: worker thread finished; awaiting finish of 12 more thre

INFO - 22:04:20: EPOCH 14 - PROGRESS: at 97.15% examples, 49604 words/s, in_qsize 0, out_qsize 0
INFO - 22:04:50: EPOCH 14 - PROGRESS: at 98.80% examples, 49594 words/s, in_qsize 0, out_qsize 0
INFO - 22:05:13: worker thread finished; awaiting finish of 14 more threads
INFO - 22:05:13: worker thread finished; awaiting finish of 13 more threads
INFO - 22:05:13: worker thread finished; awaiting finish of 12 more threads
INFO - 22:05:13: worker thread finished; awaiting finish of 11 more threads
INFO - 22:05:13: worker thread finished; awaiting finish of 10 more threads
INFO - 22:05:13: worker thread finished; awaiting finish of 9 more threads
INFO - 22:05:13: worker thread finished; awaiting finish of 8 more threads
INFO - 22:05:13: worker thread finished; awaiting finish of 7 more threads
INFO - 22:05:13: worker thread finished; awaiting finish of 6 more threads
INFO - 22:05:13: worker thread finished; awaiting finish of 5 more threads
INFO - 22:05:13: worker thread finished; awaiting f