In [1]:
import pandas as pd
import numpy as np
import string
import re
import jieba
import logging
from gensim.corpora import WikiCorpus
from gensim.models import Word2Vec
from gensim.models.word2vec import LineSentence
from _utils import u_constant
from _utils.nlp.ZHConverter import langconv
logging.basicConfig(format="%(asctime)s : %(levelname)s : %(message)s", level=logging.INFO)
root_path = u_constant.PATH_ROOT + "for learn/Python/NLP_in_Action/chapter-7/"
path = root_path + "word2vec/"
ZHWIKI_INPUT_PATH = root_path + "data/zhwiki-latest-pages-articles.xml.bz2"
ZHWIKI_OUTPUT_PATH = root_path + "data/zhwiki.txt"
MODEL_PATH = path + "zhwiki.word2vec"



In [6]:
def wiki_processing(input_path, output_path):
    """
    原始wiki数据的处理函数
    :param input_path: 原始文件路径
    :param output_path: 输出路径
    :return: None
    """
    # lemmatize = False 可以提高效率
    wiki = WikiCorpus(fname=input_path, lemmatize=False, dictionary={})
    f = open(output_path, "w", encoding="utf-8")
    for i, texts in enumerate(wiki.get_texts()):  # total: 328470
        for sentence in texts:
            sentence = sentence.encode("utf-8").decode()
            sentence = langconv.cht_to_chs(sentence)  # 繁体转换为简体
            seg_list = jieba.lcut(sentence)
            f.write(" ".join(seg_list) + "\n")
        if ((i + 1) % 200 == 0):
            print("Saved %s articles" % (str(i + 1)))
    f.close()

In [3]:
def train(corpus_path, model_output_path):
    """
    词向量训练函数
    :param corpus_path: 待训练语料路径
    :param model_output_path: 模型输出路径
    :return: None
    """
    wiki = open(corpus_path, "r")
    # sg = 0 => not use skip-gram
    # size: size of wordvector
    # window: context max distance e.g. context size: (2 * window - 1)
    # min_counts: vacabulary trimming
    # workers: num of threads using to train
    model = Word2Vec(sentences=LineSentence(corpus_path), sg=0, size=192, window=5, min_count=5, workers=8)
    model.save(model_output_path)

In [7]:
wiki_processing(ZHWIKI_INPUT_PATH, ZHWIKI_OUTPUT_PATH)  # almost 3 hours

Saved 200 articles
Saved 400 articles
Saved 600 articles
Saved 800 articles
Saved 1000 articles
Saved 1200 articles
Saved 1400 articles
Saved 1600 articles
Saved 1800 articles
Saved 2000 articles
Saved 2200 articles
Saved 2400 articles
Saved 2600 articles
Saved 2800 articles
Saved 3000 articles
Saved 3200 articles
Saved 3400 articles
Saved 3600 articles
Saved 3800 articles
Saved 4000 articles
Saved 4200 articles
Saved 4400 articles
Saved 4600 articles
Saved 4800 articles
Saved 5000 articles
Saved 5200 articles
Saved 5400 articles
Saved 5600 articles
Saved 5800 articles
Saved 6000 articles
Saved 6200 articles
Saved 6400 articles
Saved 6600 articles
Saved 6800 articles
Saved 7000 articles
Saved 7200 articles
Saved 7400 articles
Saved 7600 articles
Saved 7800 articles
Saved 8000 articles
Saved 8200 articles
Saved 8400 articles
Saved 8600 articles
Saved 8800 articles
Saved 9000 articles
Saved 9200 articles
Saved 9400 articles
Saved 9600 articles
Saved 9800 articles
Saved 10000 articles
Sav

Saved 78800 articles
Saved 79000 articles
Saved 79200 articles
Saved 79400 articles
Saved 79600 articles
Saved 79800 articles
Saved 80000 articles
Saved 80200 articles
Saved 80400 articles
Saved 80600 articles
Saved 80800 articles
Saved 81000 articles
Saved 81200 articles
Saved 81400 articles
Saved 81600 articles
Saved 81800 articles
Saved 82000 articles
Saved 82200 articles
Saved 82400 articles
Saved 82600 articles
Saved 82800 articles
Saved 83000 articles
Saved 83200 articles
Saved 83400 articles
Saved 83600 articles
Saved 83800 articles
Saved 84000 articles
Saved 84200 articles
Saved 84400 articles
Saved 84600 articles
Saved 84800 articles
Saved 85000 articles
Saved 85200 articles
Saved 85400 articles
Saved 85600 articles
Saved 85800 articles
Saved 86000 articles
Saved 86200 articles
Saved 86400 articles
Saved 86600 articles
Saved 86800 articles
Saved 87000 articles
Saved 87200 articles
Saved 87400 articles
Saved 87600 articles
Saved 87800 articles
Saved 88000 articles
Saved 88200 a

Saved 154400 articles
Saved 154600 articles
Saved 154800 articles
Saved 155000 articles
Saved 155200 articles
Saved 155400 articles
Saved 155600 articles
Saved 155800 articles
Saved 156000 articles
Saved 156200 articles
Saved 156400 articles
Saved 156600 articles
Saved 156800 articles
Saved 157000 articles
Saved 157200 articles
Saved 157400 articles
Saved 157600 articles
Saved 157800 articles
Saved 158000 articles
Saved 158200 articles
Saved 158400 articles
Saved 158600 articles
Saved 158800 articles
Saved 159000 articles
Saved 159200 articles
Saved 159400 articles
Saved 159600 articles
Saved 159800 articles
Saved 160000 articles
Saved 160200 articles
Saved 160400 articles
Saved 160600 articles
Saved 160800 articles
Saved 161000 articles
Saved 161200 articles
Saved 161400 articles
Saved 161600 articles
Saved 161800 articles
Saved 162000 articles
Saved 162200 articles
Saved 162400 articles
Saved 162600 articles
Saved 162800 articles
Saved 163000 articles
Saved 163200 articles
Saved 1634

Saved 229000 articles
Saved 229200 articles
Saved 229400 articles
Saved 229600 articles
Saved 229800 articles
Saved 230000 articles
Saved 230200 articles
Saved 230400 articles
Saved 230600 articles
Saved 230800 articles
Saved 231000 articles
Saved 231200 articles
Saved 231400 articles
Saved 231600 articles
Saved 231800 articles
Saved 232000 articles
Saved 232200 articles
Saved 232400 articles
Saved 232600 articles
Saved 232800 articles
Saved 233000 articles
Saved 233200 articles
Saved 233400 articles
Saved 233600 articles
Saved 233800 articles
Saved 234000 articles
Saved 234200 articles
Saved 234400 articles
Saved 234600 articles
Saved 234800 articles
Saved 235000 articles
Saved 235200 articles
Saved 235400 articles
Saved 235600 articles
Saved 235800 articles
Saved 236000 articles
Saved 236200 articles
Saved 236400 articles
Saved 236600 articles
Saved 236800 articles
Saved 237000 articles
Saved 237200 articles
Saved 237400 articles
Saved 237600 articles
Saved 237800 articles
Saved 2380

Saved 303600 articles
Saved 303800 articles
Saved 304000 articles
Saved 304200 articles
Saved 304400 articles
Saved 304600 articles
Saved 304800 articles
Saved 305000 articles
Saved 305200 articles
Saved 305400 articles
Saved 305600 articles
Saved 305800 articles
Saved 306000 articles
Saved 306200 articles
Saved 306400 articles
Saved 306600 articles
Saved 306800 articles
Saved 307000 articles
Saved 307200 articles
Saved 307400 articles
Saved 307600 articles
Saved 307800 articles
Saved 308000 articles
Saved 308200 articles
Saved 308400 articles
Saved 308600 articles
Saved 308800 articles
Saved 309000 articles
Saved 309200 articles
Saved 309400 articles
Saved 309600 articles
Saved 309800 articles
Saved 310000 articles
Saved 310200 articles
Saved 310400 articles
Saved 310600 articles
Saved 310800 articles
Saved 311000 articles
Saved 311200 articles
Saved 311400 articles
Saved 311600 articles
Saved 311800 articles
Saved 312000 articles
Saved 312200 articles
Saved 312400 articles
Saved 3126

2018-12-17 13:44:25,250 : INFO : finished iterating over Wikipedia corpus of 328470 documents with 74953593 positions (total 3214417 articles, 88824819 positions before pruning articles shorter than 50 words)


In [8]:
train(ZHWIKI_OUTPUT_PATH, MODEL_PATH)  # almost 50 min

2018-12-17 13:47:50,092 : INFO : collecting all words and their counts
2018-12-17 13:47:50,093 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2018-12-17 13:47:50,139 : INFO : PROGRESS: at sentence #10000, processed 27013 words, keeping 6894 word types
2018-12-17 13:47:50,185 : INFO : PROGRESS: at sentence #20000, processed 58426 words, keeping 12806 word types
2018-12-17 13:47:50,219 : INFO : PROGRESS: at sentence #30000, processed 89452 words, keeping 19497 word types
2018-12-17 13:47:50,263 : INFO : PROGRESS: at sentence #40000, processed 119790 words, keeping 25380 word types
2018-12-17 13:47:50,306 : INFO : PROGRESS: at sentence #50000, processed 152144 words, keeping 30522 word types
2018-12-17 13:47:50,343 : INFO : PROGRESS: at sentence #60000, processed 181725 words, keeping 34306 word types
2018-12-17 13:47:50,387 : INFO : PROGRESS: at sentence #70000, processed 209071 words, keeping 38270 word types
2018-12-17 13:47:50,425 : INFO : PROGRESS: at sent

2018-12-17 13:47:53,503 : INFO : PROGRESS: at sentence #720000, processed 2103156 words, keeping 185911 word types
2018-12-17 13:47:53,548 : INFO : PROGRESS: at sentence #730000, processed 2133681 words, keeping 187937 word types
2018-12-17 13:47:53,598 : INFO : PROGRESS: at sentence #740000, processed 2162044 words, keeping 189408 word types
2018-12-17 13:47:53,645 : INFO : PROGRESS: at sentence #750000, processed 2190553 words, keeping 191282 word types
2018-12-17 13:47:53,697 : INFO : PROGRESS: at sentence #760000, processed 2219388 words, keeping 193014 word types
2018-12-17 13:47:53,748 : INFO : PROGRESS: at sentence #770000, processed 2249581 words, keeping 194601 word types
2018-12-17 13:47:53,796 : INFO : PROGRESS: at sentence #780000, processed 2278223 words, keeping 195952 word types
2018-12-17 13:47:53,843 : INFO : PROGRESS: at sentence #790000, processed 2306954 words, keeping 197446 word types
2018-12-17 13:47:53,891 : INFO : PROGRESS: at sentence #800000, processed 233503

2018-12-17 13:47:56,844 : INFO : PROGRESS: at sentence #1430000, processed 4200403 words, keeping 293468 word types
2018-12-17 13:47:56,888 : INFO : PROGRESS: at sentence #1440000, processed 4230647 words, keeping 295085 word types
2018-12-17 13:47:56,931 : INFO : PROGRESS: at sentence #1450000, processed 4258958 words, keeping 297493 word types
2018-12-17 13:47:56,992 : INFO : PROGRESS: at sentence #1460000, processed 4287143 words, keeping 299010 word types
2018-12-17 13:47:57,033 : INFO : PROGRESS: at sentence #1470000, processed 4316597 words, keeping 300420 word types
2018-12-17 13:47:57,077 : INFO : PROGRESS: at sentence #1480000, processed 4344084 words, keeping 301669 word types
2018-12-17 13:47:57,116 : INFO : PROGRESS: at sentence #1490000, processed 4376642 words, keeping 302914 word types
2018-12-17 13:47:57,162 : INFO : PROGRESS: at sentence #1500000, processed 4406725 words, keeping 304039 word types
2018-12-17 13:47:57,207 : INFO : PROGRESS: at sentence #1510000, process

2018-12-17 13:48:00,554 : INFO : PROGRESS: at sentence #2140000, processed 6246420 words, keeping 386395 word types
2018-12-17 13:48:00,609 : INFO : PROGRESS: at sentence #2150000, processed 6278951 words, keeping 387564 word types
2018-12-17 13:48:00,665 : INFO : PROGRESS: at sentence #2160000, processed 6309480 words, keeping 388583 word types
2018-12-17 13:48:00,722 : INFO : PROGRESS: at sentence #2170000, processed 6337373 words, keeping 389616 word types
2018-12-17 13:48:00,780 : INFO : PROGRESS: at sentence #2180000, processed 6371773 words, keeping 390347 word types
2018-12-17 13:48:00,832 : INFO : PROGRESS: at sentence #2190000, processed 6396069 words, keeping 391643 word types
2018-12-17 13:48:00,892 : INFO : PROGRESS: at sentence #2200000, processed 6428141 words, keeping 392616 word types
2018-12-17 13:48:00,943 : INFO : PROGRESS: at sentence #2210000, processed 6455556 words, keeping 394133 word types
2018-12-17 13:48:01,003 : INFO : PROGRESS: at sentence #2220000, process

2018-12-17 13:48:04,888 : INFO : PROGRESS: at sentence #2850000, processed 8313038 words, keeping 463168 word types
2018-12-17 13:48:04,951 : INFO : PROGRESS: at sentence #2860000, processed 8342085 words, keeping 464359 word types
2018-12-17 13:48:05,027 : INFO : PROGRESS: at sentence #2870000, processed 8373338 words, keeping 465598 word types
2018-12-17 13:48:05,091 : INFO : PROGRESS: at sentence #2880000, processed 8401409 words, keeping 466838 word types
2018-12-17 13:48:05,155 : INFO : PROGRESS: at sentence #2890000, processed 8429520 words, keeping 467594 word types
2018-12-17 13:48:05,224 : INFO : PROGRESS: at sentence #2900000, processed 8458515 words, keeping 468589 word types
2018-12-17 13:48:05,290 : INFO : PROGRESS: at sentence #2910000, processed 8486871 words, keeping 469713 word types
2018-12-17 13:48:05,358 : INFO : PROGRESS: at sentence #2920000, processed 8514194 words, keeping 470649 word types
2018-12-17 13:48:05,422 : INFO : PROGRESS: at sentence #2930000, process

2018-12-17 13:48:09,036 : INFO : PROGRESS: at sentence #3560000, processed 10380693 words, keeping 532538 word types
2018-12-17 13:48:09,077 : INFO : PROGRESS: at sentence #3570000, processed 10411436 words, keeping 533451 word types
2018-12-17 13:48:09,118 : INFO : PROGRESS: at sentence #3580000, processed 10441716 words, keeping 535104 word types
2018-12-17 13:48:09,154 : INFO : PROGRESS: at sentence #3590000, processed 10465822 words, keeping 535900 word types
2018-12-17 13:48:09,196 : INFO : PROGRESS: at sentence #3600000, processed 10494511 words, keeping 536863 word types
2018-12-17 13:48:09,236 : INFO : PROGRESS: at sentence #3610000, processed 10525365 words, keeping 537554 word types
2018-12-17 13:48:09,276 : INFO : PROGRESS: at sentence #3620000, processed 10554591 words, keeping 538650 word types
2018-12-17 13:48:09,317 : INFO : PROGRESS: at sentence #3630000, processed 10581712 words, keeping 539393 word types
2018-12-17 13:48:09,354 : INFO : PROGRESS: at sentence #3640000,

2018-12-17 13:48:12,005 : INFO : PROGRESS: at sentence #4270000, processed 12455531 words, keeping 593318 word types
2018-12-17 13:48:12,048 : INFO : PROGRESS: at sentence #4280000, processed 12486241 words, keeping 594111 word types
2018-12-17 13:48:12,087 : INFO : PROGRESS: at sentence #4290000, processed 12515625 words, keeping 594995 word types
2018-12-17 13:48:12,129 : INFO : PROGRESS: at sentence #4300000, processed 12546815 words, keeping 595997 word types
2018-12-17 13:48:12,170 : INFO : PROGRESS: at sentence #4310000, processed 12573955 words, keeping 597067 word types
2018-12-17 13:48:12,210 : INFO : PROGRESS: at sentence #4320000, processed 12600460 words, keeping 597557 word types
2018-12-17 13:48:12,255 : INFO : PROGRESS: at sentence #4330000, processed 12629735 words, keeping 598203 word types
2018-12-17 13:48:12,293 : INFO : PROGRESS: at sentence #4340000, processed 12659175 words, keeping 599189 word types
2018-12-17 13:48:12,345 : INFO : PROGRESS: at sentence #4350000,

2018-12-17 13:48:14,959 : INFO : PROGRESS: at sentence #4980000, processed 14525591 words, keeping 651541 word types
2018-12-17 13:48:15,017 : INFO : PROGRESS: at sentence #4990000, processed 14555084 words, keeping 652410 word types
2018-12-17 13:48:15,062 : INFO : PROGRESS: at sentence #5000000, processed 14584652 words, keeping 653010 word types
2018-12-17 13:48:15,106 : INFO : PROGRESS: at sentence #5010000, processed 14614689 words, keeping 653843 word types
2018-12-17 13:48:15,147 : INFO : PROGRESS: at sentence #5020000, processed 14643558 words, keeping 654683 word types
2018-12-17 13:48:15,189 : INFO : PROGRESS: at sentence #5030000, processed 14674172 words, keeping 655605 word types
2018-12-17 13:48:15,236 : INFO : PROGRESS: at sentence #5040000, processed 14703730 words, keeping 656385 word types
2018-12-17 13:48:15,281 : INFO : PROGRESS: at sentence #5050000, processed 14733635 words, keeping 657391 word types
2018-12-17 13:48:15,329 : INFO : PROGRESS: at sentence #5060000,

2018-12-17 13:48:17,951 : INFO : PROGRESS: at sentence #5690000, processed 16564259 words, keeping 712272 word types
2018-12-17 13:48:17,986 : INFO : PROGRESS: at sentence #5700000, processed 16591208 words, keeping 713395 word types
2018-12-17 13:48:18,026 : INFO : PROGRESS: at sentence #5710000, processed 16619901 words, keeping 714149 word types
2018-12-17 13:48:18,071 : INFO : PROGRESS: at sentence #5720000, processed 16648847 words, keeping 715073 word types
2018-12-17 13:48:18,108 : INFO : PROGRESS: at sentence #5730000, processed 16676116 words, keeping 716108 word types
2018-12-17 13:48:18,148 : INFO : PROGRESS: at sentence #5740000, processed 16705281 words, keeping 716927 word types
2018-12-17 13:48:18,194 : INFO : PROGRESS: at sentence #5750000, processed 16733719 words, keeping 717584 word types
2018-12-17 13:48:18,232 : INFO : PROGRESS: at sentence #5760000, processed 16762259 words, keeping 718443 word types
2018-12-17 13:48:18,277 : INFO : PROGRESS: at sentence #5770000,

2018-12-17 13:48:20,731 : INFO : PROGRESS: at sentence #6400000, processed 18518187 words, keeping 771470 word types
2018-12-17 13:48:20,769 : INFO : PROGRESS: at sentence #6410000, processed 18540896 words, keeping 772468 word types
2018-12-17 13:48:20,808 : INFO : PROGRESS: at sentence #6420000, processed 18565920 words, keeping 773251 word types
2018-12-17 13:48:20,844 : INFO : PROGRESS: at sentence #6430000, processed 18594921 words, keeping 774244 word types
2018-12-17 13:48:20,882 : INFO : PROGRESS: at sentence #6440000, processed 18621447 words, keeping 774993 word types
2018-12-17 13:48:20,915 : INFO : PROGRESS: at sentence #6450000, processed 18650200 words, keeping 775834 word types
2018-12-17 13:48:20,960 : INFO : PROGRESS: at sentence #6460000, processed 18675441 words, keeping 776565 word types
2018-12-17 13:48:21,012 : INFO : PROGRESS: at sentence #6470000, processed 18704513 words, keeping 777483 word types
2018-12-17 13:48:21,052 : INFO : PROGRESS: at sentence #6480000,

2018-12-17 13:48:24,353 : INFO : PROGRESS: at sentence #7110000, processed 20549646 words, keeping 828112 word types
2018-12-17 13:48:24,416 : INFO : PROGRESS: at sentence #7120000, processed 20577749 words, keeping 829088 word types
2018-12-17 13:48:24,485 : INFO : PROGRESS: at sentence #7130000, processed 20607809 words, keeping 829651 word types
2018-12-17 13:48:24,559 : INFO : PROGRESS: at sentence #7140000, processed 20638164 words, keeping 830393 word types
2018-12-17 13:48:24,627 : INFO : PROGRESS: at sentence #7150000, processed 20668589 words, keeping 831071 word types
2018-12-17 13:48:24,692 : INFO : PROGRESS: at sentence #7160000, processed 20697213 words, keeping 831835 word types
2018-12-17 13:48:24,754 : INFO : PROGRESS: at sentence #7170000, processed 20726053 words, keeping 832644 word types
2018-12-17 13:48:24,813 : INFO : PROGRESS: at sentence #7180000, processed 20749931 words, keeping 833330 word types
2018-12-17 13:48:24,876 : INFO : PROGRESS: at sentence #7190000,

2018-12-17 13:48:30,583 : INFO : PROGRESS: at sentence #7820000, processed 22546892 words, keeping 878854 word types
2018-12-17 13:48:30,696 : INFO : PROGRESS: at sentence #7830000, processed 22581673 words, keeping 879736 word types
2018-12-17 13:48:30,799 : INFO : PROGRESS: at sentence #7840000, processed 22610903 words, keeping 880242 word types
2018-12-17 13:48:30,916 : INFO : PROGRESS: at sentence #7850000, processed 22641338 words, keeping 880922 word types
2018-12-17 13:48:31,028 : INFO : PROGRESS: at sentence #7860000, processed 22669704 words, keeping 881621 word types
2018-12-17 13:48:31,130 : INFO : PROGRESS: at sentence #7870000, processed 22698295 words, keeping 882264 word types
2018-12-17 13:48:31,241 : INFO : PROGRESS: at sentence #7880000, processed 22726052 words, keeping 882855 word types
2018-12-17 13:48:31,359 : INFO : PROGRESS: at sentence #7890000, processed 22755949 words, keeping 883885 word types
2018-12-17 13:48:31,462 : INFO : PROGRESS: at sentence #7900000,

2018-12-17 13:48:34,145 : INFO : PROGRESS: at sentence #8530000, processed 24540178 words, keeping 929474 word types
2018-12-17 13:48:34,186 : INFO : PROGRESS: at sentence #8540000, processed 24561327 words, keeping 930447 word types
2018-12-17 13:48:34,238 : INFO : PROGRESS: at sentence #8550000, processed 24588059 words, keeping 930967 word types
2018-12-17 13:48:34,277 : INFO : PROGRESS: at sentence #8560000, processed 24616897 words, keeping 931642 word types
2018-12-17 13:48:34,329 : INFO : PROGRESS: at sentence #8570000, processed 24648756 words, keeping 932182 word types
2018-12-17 13:48:34,378 : INFO : PROGRESS: at sentence #8580000, processed 24676644 words, keeping 932890 word types
2018-12-17 13:48:34,430 : INFO : PROGRESS: at sentence #8590000, processed 24706817 words, keeping 933438 word types
2018-12-17 13:48:34,477 : INFO : PROGRESS: at sentence #8600000, processed 24733310 words, keeping 934200 word types
2018-12-17 13:48:34,518 : INFO : PROGRESS: at sentence #8610000,

2018-12-17 13:48:37,212 : INFO : PROGRESS: at sentence #9240000, processed 26477912 words, keeping 971974 word types
2018-12-17 13:48:37,258 : INFO : PROGRESS: at sentence #9250000, processed 26506916 words, keeping 972761 word types
2018-12-17 13:48:37,301 : INFO : PROGRESS: at sentence #9260000, processed 26532952 words, keeping 973717 word types
2018-12-17 13:48:37,339 : INFO : PROGRESS: at sentence #9270000, processed 26556728 words, keeping 974158 word types
2018-12-17 13:48:37,380 : INFO : PROGRESS: at sentence #9280000, processed 26580170 words, keeping 974486 word types
2018-12-17 13:48:37,420 : INFO : PROGRESS: at sentence #9290000, processed 26607253 words, keeping 974917 word types
2018-12-17 13:48:37,461 : INFO : PROGRESS: at sentence #9300000, processed 26635010 words, keeping 975255 word types
2018-12-17 13:48:37,505 : INFO : PROGRESS: at sentence #9310000, processed 26661858 words, keeping 975945 word types
2018-12-17 13:48:37,551 : INFO : PROGRESS: at sentence #9320000,

2018-12-17 13:48:40,184 : INFO : PROGRESS: at sentence #9940000, processed 28406042 words, keeping 1013232 word types
2018-12-17 13:48:40,227 : INFO : PROGRESS: at sentence #9950000, processed 28431999 words, keeping 1013929 word types
2018-12-17 13:48:40,264 : INFO : PROGRESS: at sentence #9960000, processed 28455796 words, keeping 1014465 word types
2018-12-17 13:48:40,309 : INFO : PROGRESS: at sentence #9970000, processed 28485430 words, keeping 1015036 word types
2018-12-17 13:48:40,349 : INFO : PROGRESS: at sentence #9980000, processed 28512304 words, keeping 1015317 word types
2018-12-17 13:48:40,400 : INFO : PROGRESS: at sentence #9990000, processed 28543747 words, keeping 1015797 word types
2018-12-17 13:48:40,438 : INFO : PROGRESS: at sentence #10000000, processed 28572303 words, keeping 1016302 word types
2018-12-17 13:48:40,481 : INFO : PROGRESS: at sentence #10010000, processed 28601391 words, keeping 1016856 word types
2018-12-17 13:48:40,517 : INFO : PROGRESS: at sentence

2018-12-17 13:48:42,830 : INFO : PROGRESS: at sentence #10630000, processed 30317600 words, keeping 1051797 word types
2018-12-17 13:48:42,864 : INFO : PROGRESS: at sentence #10640000, processed 30342819 words, keeping 1052328 word types
2018-12-17 13:48:42,897 : INFO : PROGRESS: at sentence #10650000, processed 30366610 words, keeping 1053381 word types
2018-12-17 13:48:42,938 : INFO : PROGRESS: at sentence #10660000, processed 30393913 words, keeping 1054151 word types
2018-12-17 13:48:42,972 : INFO : PROGRESS: at sentence #10670000, processed 30421440 words, keeping 1054679 word types
2018-12-17 13:48:43,031 : INFO : PROGRESS: at sentence #10680000, processed 30449124 words, keeping 1055428 word types
2018-12-17 13:48:43,075 : INFO : PROGRESS: at sentence #10690000, processed 30479710 words, keeping 1055807 word types
2018-12-17 13:48:43,122 : INFO : PROGRESS: at sentence #10700000, processed 30509341 words, keeping 1056319 word types
2018-12-17 13:48:43,172 : INFO : PROGRESS: at se

2018-12-17 13:48:45,790 : INFO : PROGRESS: at sentence #11320000, processed 32221979 words, keeping 1092776 word types
2018-12-17 13:48:45,828 : INFO : PROGRESS: at sentence #11330000, processed 32245286 words, keeping 1093327 word types
2018-12-17 13:48:45,867 : INFO : PROGRESS: at sentence #11340000, processed 32273818 words, keeping 1093957 word types
2018-12-17 13:48:45,907 : INFO : PROGRESS: at sentence #11350000, processed 32308211 words, keeping 1094480 word types
2018-12-17 13:48:45,952 : INFO : PROGRESS: at sentence #11360000, processed 32336121 words, keeping 1095157 word types
2018-12-17 13:48:45,993 : INFO : PROGRESS: at sentence #11370000, processed 32362932 words, keeping 1095636 word types
2018-12-17 13:48:46,040 : INFO : PROGRESS: at sentence #11380000, processed 32391010 words, keeping 1096293 word types
2018-12-17 13:48:46,081 : INFO : PROGRESS: at sentence #11390000, processed 32418259 words, keeping 1096934 word types
2018-12-17 13:48:46,133 : INFO : PROGRESS: at se

2018-12-17 13:48:49,506 : INFO : PROGRESS: at sentence #12010000, processed 34100589 words, keeping 1135043 word types
2018-12-17 13:48:49,559 : INFO : PROGRESS: at sentence #12020000, processed 34127645 words, keeping 1135758 word types
2018-12-17 13:48:49,615 : INFO : PROGRESS: at sentence #12030000, processed 34154355 words, keeping 1136500 word types
2018-12-17 13:48:49,668 : INFO : PROGRESS: at sentence #12040000, processed 34182935 words, keeping 1137004 word types
2018-12-17 13:48:49,723 : INFO : PROGRESS: at sentence #12050000, processed 34211246 words, keeping 1137587 word types
2018-12-17 13:48:49,784 : INFO : PROGRESS: at sentence #12060000, processed 34239888 words, keeping 1138259 word types
2018-12-17 13:48:49,841 : INFO : PROGRESS: at sentence #12070000, processed 34267477 words, keeping 1138852 word types
2018-12-17 13:48:49,894 : INFO : PROGRESS: at sentence #12080000, processed 34296585 words, keeping 1139436 word types
2018-12-17 13:48:49,955 : INFO : PROGRESS: at se

2018-12-17 13:48:54,006 : INFO : PROGRESS: at sentence #12700000, processed 35953526 words, keeping 1179289 word types
2018-12-17 13:48:54,082 : INFO : PROGRESS: at sentence #12710000, processed 35982568 words, keeping 1180001 word types
2018-12-17 13:48:54,154 : INFO : PROGRESS: at sentence #12720000, processed 36007357 words, keeping 1180434 word types
2018-12-17 13:48:54,232 : INFO : PROGRESS: at sentence #12730000, processed 36038964 words, keeping 1180993 word types
2018-12-17 13:48:54,296 : INFO : PROGRESS: at sentence #12740000, processed 36058600 words, keeping 1182171 word types
2018-12-17 13:48:54,369 : INFO : PROGRESS: at sentence #12750000, processed 36087764 words, keeping 1182741 word types
2018-12-17 13:48:54,442 : INFO : PROGRESS: at sentence #12760000, processed 36123319 words, keeping 1183081 word types
2018-12-17 13:48:54,501 : INFO : PROGRESS: at sentence #12770000, processed 36146908 words, keeping 1183574 word types
2018-12-17 13:48:54,567 : INFO : PROGRESS: at se

2018-12-17 13:48:57,735 : INFO : PROGRESS: at sentence #13390000, processed 37869792 words, keeping 1217560 word types
2018-12-17 13:48:57,778 : INFO : PROGRESS: at sentence #13400000, processed 37897257 words, keeping 1218133 word types
2018-12-17 13:48:57,816 : INFO : PROGRESS: at sentence #13410000, processed 37924135 words, keeping 1218647 word types
2018-12-17 13:48:57,857 : INFO : PROGRESS: at sentence #13420000, processed 37951474 words, keeping 1219259 word types
2018-12-17 13:48:57,898 : INFO : PROGRESS: at sentence #13430000, processed 37980103 words, keeping 1220129 word types
2018-12-17 13:48:57,939 : INFO : PROGRESS: at sentence #13440000, processed 38005626 words, keeping 1220971 word types
2018-12-17 13:48:57,979 : INFO : PROGRESS: at sentence #13450000, processed 38037129 words, keeping 1221516 word types
2018-12-17 13:48:58,021 : INFO : PROGRESS: at sentence #13460000, processed 38062589 words, keeping 1222042 word types
2018-12-17 13:48:58,058 : INFO : PROGRESS: at se

2018-12-17 13:49:00,691 : INFO : PROGRESS: at sentence #14080000, processed 39778943 words, keeping 1255267 word types
2018-12-17 13:49:00,731 : INFO : PROGRESS: at sentence #14090000, processed 39803028 words, keeping 1255830 word types
2018-12-17 13:49:00,770 : INFO : PROGRESS: at sentence #14100000, processed 39828064 words, keeping 1256177 word types
2018-12-17 13:49:00,815 : INFO : PROGRESS: at sentence #14110000, processed 39855071 words, keeping 1256630 word types
2018-12-17 13:49:00,857 : INFO : PROGRESS: at sentence #14120000, processed 39883545 words, keeping 1257317 word types
2018-12-17 13:49:00,900 : INFO : PROGRESS: at sentence #14130000, processed 39917571 words, keeping 1257985 word types
2018-12-17 13:49:00,940 : INFO : PROGRESS: at sentence #14140000, processed 39945293 words, keeping 1258534 word types
2018-12-17 13:49:00,994 : INFO : PROGRESS: at sentence #14150000, processed 39974728 words, keeping 1258968 word types
2018-12-17 13:49:01,034 : INFO : PROGRESS: at se

2018-12-17 13:49:03,590 : INFO : PROGRESS: at sentence #14770000, processed 41616162 words, keeping 1290576 word types
2018-12-17 13:49:03,629 : INFO : PROGRESS: at sentence #14780000, processed 41642878 words, keeping 1291139 word types
2018-12-17 13:49:03,679 : INFO : PROGRESS: at sentence #14790000, processed 41673576 words, keeping 1291647 word types
2018-12-17 13:49:03,721 : INFO : PROGRESS: at sentence #14800000, processed 41701702 words, keeping 1292281 word types
2018-12-17 13:49:03,763 : INFO : PROGRESS: at sentence #14810000, processed 41728926 words, keeping 1292753 word types
2018-12-17 13:49:03,805 : INFO : PROGRESS: at sentence #14820000, processed 41755650 words, keeping 1293205 word types
2018-12-17 13:49:03,844 : INFO : PROGRESS: at sentence #14830000, processed 41780512 words, keeping 1293621 word types
2018-12-17 13:49:03,889 : INFO : PROGRESS: at sentence #14840000, processed 41807380 words, keeping 1294093 word types
2018-12-17 13:49:03,930 : INFO : PROGRESS: at se

2018-12-17 13:49:06,531 : INFO : PROGRESS: at sentence #15460000, processed 43492967 words, keeping 1326678 word types
2018-12-17 13:49:06,570 : INFO : PROGRESS: at sentence #15470000, processed 43521616 words, keeping 1327181 word types
2018-12-17 13:49:06,607 : INFO : PROGRESS: at sentence #15480000, processed 43549067 words, keeping 1327998 word types
2018-12-17 13:49:06,651 : INFO : PROGRESS: at sentence #15490000, processed 43578918 words, keeping 1328463 word types
2018-12-17 13:49:06,702 : INFO : PROGRESS: at sentence #15500000, processed 43605925 words, keeping 1329089 word types
2018-12-17 13:49:06,742 : INFO : PROGRESS: at sentence #15510000, processed 43633786 words, keeping 1329708 word types
2018-12-17 13:49:06,781 : INFO : PROGRESS: at sentence #15520000, processed 43658421 words, keeping 1330142 word types
2018-12-17 13:49:06,819 : INFO : PROGRESS: at sentence #15530000, processed 43683242 words, keeping 1330691 word types
2018-12-17 13:49:06,851 : INFO : PROGRESS: at se

2018-12-17 13:49:09,440 : INFO : PROGRESS: at sentence #16150000, processed 45303623 words, keeping 1368646 word types
2018-12-17 13:49:09,479 : INFO : PROGRESS: at sentence #16160000, processed 45330356 words, keeping 1369136 word types
2018-12-17 13:49:09,525 : INFO : PROGRESS: at sentence #16170000, processed 45361755 words, keeping 1369660 word types
2018-12-17 13:49:09,562 : INFO : PROGRESS: at sentence #16180000, processed 45388687 words, keeping 1370381 word types
2018-12-17 13:49:09,605 : INFO : PROGRESS: at sentence #16190000, processed 45416800 words, keeping 1370961 word types
2018-12-17 13:49:09,645 : INFO : PROGRESS: at sentence #16200000, processed 45444431 words, keeping 1372317 word types
2018-12-17 13:49:09,691 : INFO : PROGRESS: at sentence #16210000, processed 45472176 words, keeping 1372900 word types
2018-12-17 13:49:09,739 : INFO : PROGRESS: at sentence #16220000, processed 45500689 words, keeping 1373410 word types
2018-12-17 13:49:09,784 : INFO : PROGRESS: at se

2018-12-17 13:49:13,178 : INFO : PROGRESS: at sentence #16840000, processed 47155782 words, keeping 1404549 word types
2018-12-17 13:49:13,236 : INFO : PROGRESS: at sentence #16850000, processed 47183716 words, keeping 1405498 word types
2018-12-17 13:49:13,294 : INFO : PROGRESS: at sentence #16860000, processed 47214172 words, keeping 1406015 word types
2018-12-17 13:49:13,351 : INFO : PROGRESS: at sentence #16870000, processed 47239729 words, keeping 1406425 word types
2018-12-17 13:49:13,402 : INFO : PROGRESS: at sentence #16880000, processed 47264539 words, keeping 1406995 word types
2018-12-17 13:49:13,452 : INFO : PROGRESS: at sentence #16890000, processed 47289537 words, keeping 1407385 word types
2018-12-17 13:49:13,506 : INFO : PROGRESS: at sentence #16900000, processed 47318003 words, keeping 1408020 word types
2018-12-17 13:49:13,558 : INFO : PROGRESS: at sentence #16910000, processed 47344523 words, keeping 1408449 word types
2018-12-17 13:49:13,609 : INFO : PROGRESS: at se

2018-12-17 13:49:17,423 : INFO : PROGRESS: at sentence #17530000, processed 48959846 words, keeping 1440373 word types
2018-12-17 13:49:17,491 : INFO : PROGRESS: at sentence #17540000, processed 48989705 words, keeping 1440871 word types
2018-12-17 13:49:17,556 : INFO : PROGRESS: at sentence #17550000, processed 49018206 words, keeping 1441379 word types
2018-12-17 13:49:17,620 : INFO : PROGRESS: at sentence #17560000, processed 49042157 words, keeping 1442082 word types
2018-12-17 13:49:17,690 : INFO : PROGRESS: at sentence #17570000, processed 49070435 words, keeping 1442407 word types
2018-12-17 13:49:17,757 : INFO : PROGRESS: at sentence #17580000, processed 49096710 words, keeping 1442996 word types
2018-12-17 13:49:17,816 : INFO : PROGRESS: at sentence #17590000, processed 49121814 words, keeping 1443378 word types
2018-12-17 13:49:17,888 : INFO : PROGRESS: at sentence #17600000, processed 49150104 words, keeping 1444075 word types
2018-12-17 13:49:17,954 : INFO : PROGRESS: at se

2018-12-17 13:49:21,434 : INFO : PROGRESS: at sentence #18220000, processed 50785982 words, keeping 1470446 word types
2018-12-17 13:49:21,474 : INFO : PROGRESS: at sentence #18230000, processed 50808835 words, keeping 1471227 word types
2018-12-17 13:49:21,513 : INFO : PROGRESS: at sentence #18240000, processed 50830101 words, keeping 1471840 word types
2018-12-17 13:49:21,560 : INFO : PROGRESS: at sentence #18250000, processed 50860793 words, keeping 1472381 word types
2018-12-17 13:49:21,601 : INFO : PROGRESS: at sentence #18260000, processed 50887626 words, keeping 1472853 word types
2018-12-17 13:49:21,645 : INFO : PROGRESS: at sentence #18270000, processed 50912372 words, keeping 1473286 word types
2018-12-17 13:49:21,686 : INFO : PROGRESS: at sentence #18280000, processed 50937591 words, keeping 1473894 word types
2018-12-17 13:49:21,735 : INFO : PROGRESS: at sentence #18290000, processed 50964783 words, keeping 1474562 word types
2018-12-17 13:49:21,778 : INFO : PROGRESS: at se

2018-12-17 13:49:24,385 : INFO : PROGRESS: at sentence #18910000, processed 52643911 words, keeping 1504490 word types
2018-12-17 13:49:24,422 : INFO : PROGRESS: at sentence #18920000, processed 52670467 words, keeping 1504980 word types
2018-12-17 13:49:24,462 : INFO : PROGRESS: at sentence #18930000, processed 52696513 words, keeping 1505292 word types
2018-12-17 13:49:24,503 : INFO : PROGRESS: at sentence #18940000, processed 52723236 words, keeping 1505826 word types
2018-12-17 13:49:24,546 : INFO : PROGRESS: at sentence #18950000, processed 52750389 words, keeping 1506342 word types
2018-12-17 13:49:24,582 : INFO : PROGRESS: at sentence #18960000, processed 52777139 words, keeping 1506741 word types
2018-12-17 13:49:24,646 : INFO : PROGRESS: at sentence #18970000, processed 52806128 words, keeping 1507145 word types
2018-12-17 13:49:24,683 : INFO : PROGRESS: at sentence #18980000, processed 52825459 words, keeping 1507981 word types
2018-12-17 13:49:24,729 : INFO : PROGRESS: at se

2018-12-17 13:49:27,476 : INFO : PROGRESS: at sentence #19600000, processed 54518709 words, keeping 1536968 word types
2018-12-17 13:49:27,516 : INFO : PROGRESS: at sentence #19610000, processed 54546463 words, keeping 1537376 word types
2018-12-17 13:49:27,561 : INFO : PROGRESS: at sentence #19620000, processed 54571773 words, keeping 1537849 word types
2018-12-17 13:49:27,610 : INFO : PROGRESS: at sentence #19630000, processed 54602203 words, keeping 1538443 word types
2018-12-17 13:49:27,648 : INFO : PROGRESS: at sentence #19640000, processed 54624752 words, keeping 1538956 word types
2018-12-17 13:49:27,690 : INFO : PROGRESS: at sentence #19650000, processed 54653959 words, keeping 1539252 word types
2018-12-17 13:49:27,729 : INFO : PROGRESS: at sentence #19660000, processed 54681525 words, keeping 1539731 word types
2018-12-17 13:49:27,771 : INFO : PROGRESS: at sentence #19670000, processed 54708344 words, keeping 1540199 word types
2018-12-17 13:49:27,812 : INFO : PROGRESS: at se

2018-12-17 13:49:30,419 : INFO : PROGRESS: at sentence #20290000, processed 56371779 words, keeping 1571944 word types
2018-12-17 13:49:30,460 : INFO : PROGRESS: at sentence #20300000, processed 56399301 words, keeping 1572618 word types
2018-12-17 13:49:30,496 : INFO : PROGRESS: at sentence #20310000, processed 56425776 words, keeping 1573094 word types
2018-12-17 13:49:30,542 : INFO : PROGRESS: at sentence #20320000, processed 56454122 words, keeping 1573489 word types
2018-12-17 13:49:30,583 : INFO : PROGRESS: at sentence #20330000, processed 56478411 words, keeping 1574216 word types
2018-12-17 13:49:30,628 : INFO : PROGRESS: at sentence #20340000, processed 56509614 words, keeping 1574738 word types
2018-12-17 13:49:30,665 : INFO : PROGRESS: at sentence #20350000, processed 56533205 words, keeping 1575441 word types
2018-12-17 13:49:30,707 : INFO : PROGRESS: at sentence #20360000, processed 56563218 words, keeping 1575818 word types
2018-12-17 13:49:30,749 : INFO : PROGRESS: at se

2018-12-17 13:49:33,354 : INFO : PROGRESS: at sentence #20980000, processed 58232864 words, keeping 1607388 word types
2018-12-17 13:49:33,413 : INFO : PROGRESS: at sentence #20990000, processed 58261436 words, keeping 1607822 word types
2018-12-17 13:49:33,453 : INFO : PROGRESS: at sentence #21000000, processed 58285397 words, keeping 1608250 word types
2018-12-17 13:49:33,492 : INFO : PROGRESS: at sentence #21010000, processed 58314204 words, keeping 1608733 word types
2018-12-17 13:49:33,535 : INFO : PROGRESS: at sentence #21020000, processed 58339631 words, keeping 1609400 word types
2018-12-17 13:49:33,572 : INFO : PROGRESS: at sentence #21030000, processed 58361442 words, keeping 1609831 word types
2018-12-17 13:49:33,611 : INFO : PROGRESS: at sentence #21040000, processed 58386963 words, keeping 1610370 word types
2018-12-17 13:49:33,654 : INFO : PROGRESS: at sentence #21050000, processed 58414330 words, keeping 1611010 word types
2018-12-17 13:49:33,695 : INFO : PROGRESS: at se

2018-12-17 13:49:36,628 : INFO : PROGRESS: at sentence #21670000, processed 60043241 words, keeping 1641235 word types
2018-12-17 13:49:36,676 : INFO : PROGRESS: at sentence #21680000, processed 60068416 words, keeping 1641919 word types
2018-12-17 13:49:36,720 : INFO : PROGRESS: at sentence #21690000, processed 60091905 words, keeping 1642193 word types
2018-12-17 13:49:36,772 : INFO : PROGRESS: at sentence #21700000, processed 60120002 words, keeping 1642671 word types
2018-12-17 13:49:36,823 : INFO : PROGRESS: at sentence #21710000, processed 60145422 words, keeping 1643336 word types
2018-12-17 13:49:36,871 : INFO : PROGRESS: at sentence #21720000, processed 60169000 words, keeping 1643940 word types
2018-12-17 13:49:36,924 : INFO : PROGRESS: at sentence #21730000, processed 60194635 words, keeping 1644361 word types
2018-12-17 13:49:36,971 : INFO : PROGRESS: at sentence #21740000, processed 60222146 words, keeping 1644778 word types
2018-12-17 13:49:37,021 : INFO : PROGRESS: at se

2018-12-17 13:49:40,526 : INFO : PROGRESS: at sentence #22360000, processed 61870916 words, keeping 1678222 word types
2018-12-17 13:49:40,586 : INFO : PROGRESS: at sentence #22370000, processed 61893802 words, keeping 1678871 word types
2018-12-17 13:49:40,649 : INFO : PROGRESS: at sentence #22380000, processed 61920007 words, keeping 1679288 word types
2018-12-17 13:49:40,717 : INFO : PROGRESS: at sentence #22390000, processed 61945909 words, keeping 1679674 word types
2018-12-17 13:49:40,790 : INFO : PROGRESS: at sentence #22400000, processed 61973400 words, keeping 1680268 word types
2018-12-17 13:49:40,860 : INFO : PROGRESS: at sentence #22410000, processed 62001192 words, keeping 1680811 word types
2018-12-17 13:49:40,934 : INFO : PROGRESS: at sentence #22420000, processed 62030913 words, keeping 1681167 word types
2018-12-17 13:49:41,011 : INFO : PROGRESS: at sentence #22430000, processed 62057354 words, keeping 1681575 word types
2018-12-17 13:49:41,078 : INFO : PROGRESS: at se

2018-12-17 13:49:44,567 : INFO : PROGRESS: at sentence #23050000, processed 63748481 words, keeping 1710067 word types
2018-12-17 13:49:44,606 : INFO : PROGRESS: at sentence #23060000, processed 63775778 words, keeping 1710444 word types
2018-12-17 13:49:44,647 : INFO : PROGRESS: at sentence #23070000, processed 63802269 words, keeping 1710749 word types
2018-12-17 13:49:44,687 : INFO : PROGRESS: at sentence #23080000, processed 63827953 words, keeping 1711090 word types
2018-12-17 13:49:44,726 : INFO : PROGRESS: at sentence #23090000, processed 63854008 words, keeping 1711426 word types
2018-12-17 13:49:44,767 : INFO : PROGRESS: at sentence #23100000, processed 63880183 words, keeping 1711776 word types
2018-12-17 13:49:44,810 : INFO : PROGRESS: at sentence #23110000, processed 63907044 words, keeping 1712196 word types
2018-12-17 13:49:44,846 : INFO : PROGRESS: at sentence #23120000, processed 63931357 words, keeping 1712784 word types
2018-12-17 13:49:44,883 : INFO : PROGRESS: at se

2018-12-17 13:49:47,623 : INFO : PROGRESS: at sentence #23740000, processed 65587109 words, keeping 1741331 word types
2018-12-17 13:49:47,670 : INFO : PROGRESS: at sentence #23750000, processed 65609697 words, keeping 1742007 word types
2018-12-17 13:49:47,720 : INFO : PROGRESS: at sentence #23760000, processed 65637087 words, keeping 1742437 word types
2018-12-17 13:49:47,769 : INFO : PROGRESS: at sentence #23770000, processed 65662475 words, keeping 1742814 word types
2018-12-17 13:49:47,829 : INFO : PROGRESS: at sentence #23780000, processed 65689525 words, keeping 1743171 word types
2018-12-17 13:49:47,881 : INFO : PROGRESS: at sentence #23790000, processed 65715727 words, keeping 1743503 word types
2018-12-17 13:49:47,942 : INFO : PROGRESS: at sentence #23800000, processed 65746614 words, keeping 1743943 word types
2018-12-17 13:49:47,990 : INFO : PROGRESS: at sentence #23810000, processed 65775156 words, keeping 1744346 word types
2018-12-17 13:49:48,045 : INFO : PROGRESS: at se

2018-12-17 13:49:50,693 : INFO : PROGRESS: at sentence #24430000, processed 67390764 words, keeping 1773530 word types
2018-12-17 13:49:50,738 : INFO : PROGRESS: at sentence #24440000, processed 67416548 words, keeping 1774003 word types
2018-12-17 13:49:50,777 : INFO : PROGRESS: at sentence #24450000, processed 67442633 words, keeping 1774343 word types
2018-12-17 13:49:50,842 : INFO : PROGRESS: at sentence #24460000, processed 67469477 words, keeping 1774638 word types
2018-12-17 13:49:50,884 : INFO : PROGRESS: at sentence #24470000, processed 67495417 words, keeping 1775025 word types
2018-12-17 13:49:50,921 : INFO : PROGRESS: at sentence #24480000, processed 67518181 words, keeping 1775564 word types
2018-12-17 13:49:50,968 : INFO : PROGRESS: at sentence #24490000, processed 67547022 words, keeping 1775998 word types
2018-12-17 13:49:51,023 : INFO : PROGRESS: at sentence #24500000, processed 67573913 words, keeping 1776355 word types
2018-12-17 13:49:51,068 : INFO : PROGRESS: at se

2018-12-17 13:49:53,600 : INFO : PROGRESS: at sentence #25120000, processed 69240204 words, keeping 1805195 word types
2018-12-17 13:49:53,636 : INFO : PROGRESS: at sentence #25130000, processed 69263793 words, keeping 1805810 word types
2018-12-17 13:49:53,680 : INFO : PROGRESS: at sentence #25140000, processed 69292548 words, keeping 1806296 word types
2018-12-17 13:49:53,715 : INFO : PROGRESS: at sentence #25150000, processed 69319263 words, keeping 1806798 word types
2018-12-17 13:49:53,750 : INFO : PROGRESS: at sentence #25160000, processed 69346565 words, keeping 1807164 word types
2018-12-17 13:49:53,791 : INFO : PROGRESS: at sentence #25170000, processed 69373111 words, keeping 1807698 word types
2018-12-17 13:49:53,835 : INFO : PROGRESS: at sentence #25180000, processed 69400569 words, keeping 1808144 word types
2018-12-17 13:49:53,876 : INFO : PROGRESS: at sentence #25190000, processed 69427257 words, keeping 1808659 word types
2018-12-17 13:49:53,916 : INFO : PROGRESS: at se

2018-12-17 13:49:56,435 : INFO : PROGRESS: at sentence #25810000, processed 71008419 words, keeping 1832000 word types
2018-12-17 13:49:56,480 : INFO : PROGRESS: at sentence #25820000, processed 71031110 words, keeping 1832356 word types
2018-12-17 13:49:56,523 : INFO : PROGRESS: at sentence #25830000, processed 71055790 words, keeping 1832762 word types
2018-12-17 13:49:56,583 : INFO : PROGRESS: at sentence #25840000, processed 71082995 words, keeping 1833145 word types
2018-12-17 13:49:56,628 : INFO : PROGRESS: at sentence #25850000, processed 71108824 words, keeping 1833571 word types
2018-12-17 13:49:56,670 : INFO : PROGRESS: at sentence #25860000, processed 71133245 words, keeping 1833921 word types
2018-12-17 13:49:56,718 : INFO : PROGRESS: at sentence #25870000, processed 71160947 words, keeping 1834324 word types
2018-12-17 13:49:56,760 : INFO : PROGRESS: at sentence #25880000, processed 71190319 words, keeping 1834668 word types
2018-12-17 13:49:56,802 : INFO : PROGRESS: at se

2018-12-17 13:49:59,447 : INFO : PROGRESS: at sentence #26500000, processed 72836437 words, keeping 1863865 word types
2018-12-17 13:49:59,499 : INFO : PROGRESS: at sentence #26510000, processed 72865392 words, keeping 1864303 word types
2018-12-17 13:49:59,540 : INFO : PROGRESS: at sentence #26520000, processed 72886645 words, keeping 1864892 word types
2018-12-17 13:49:59,578 : INFO : PROGRESS: at sentence #26530000, processed 72901457 words, keeping 1865662 word types
2018-12-17 13:49:59,615 : INFO : PROGRESS: at sentence #26540000, processed 72917629 words, keeping 1866331 word types
2018-12-17 13:49:59,663 : INFO : PROGRESS: at sentence #26550000, processed 72942208 words, keeping 1866898 word types
2018-12-17 13:49:59,708 : INFO : PROGRESS: at sentence #26560000, processed 72972261 words, keeping 1867464 word types
2018-12-17 13:49:59,757 : INFO : PROGRESS: at sentence #26570000, processed 72999284 words, keeping 1867839 word types
2018-12-17 13:49:59,801 : INFO : PROGRESS: at se

2018-12-17 13:50:02,532 : INFO : PROGRESS: at sentence #27190000, processed 74574045 words, keeping 1893315 word types
2018-12-17 13:50:02,582 : INFO : PROGRESS: at sentence #27200000, processed 74601766 words, keeping 1893675 word types
2018-12-17 13:50:02,627 : INFO : PROGRESS: at sentence #27210000, processed 74628313 words, keeping 1893943 word types
2018-12-17 13:50:02,678 : INFO : PROGRESS: at sentence #27220000, processed 74656358 words, keeping 1894227 word types
2018-12-17 13:50:02,726 : INFO : PROGRESS: at sentence #27230000, processed 74684734 words, keeping 1894620 word types
2018-12-17 13:50:02,774 : INFO : PROGRESS: at sentence #27240000, processed 74714377 words, keeping 1895026 word types
2018-12-17 13:50:02,823 : INFO : PROGRESS: at sentence #27250000, processed 74742731 words, keeping 1895380 word types
2018-12-17 13:50:02,864 : INFO : PROGRESS: at sentence #27260000, processed 74766557 words, keeping 1895818 word types
2018-12-17 13:50:02,909 : INFO : PROGRESS: at se

2018-12-17 13:50:05,909 : INFO : PROGRESS: at sentence #27880000, processed 76393334 words, keeping 1920775 word types
2018-12-17 13:50:05,964 : INFO : PROGRESS: at sentence #27890000, processed 76419066 words, keeping 1921239 word types
2018-12-17 13:50:06,008 : INFO : PROGRESS: at sentence #27900000, processed 76442185 words, keeping 1921571 word types
2018-12-17 13:50:06,060 : INFO : PROGRESS: at sentence #27910000, processed 76465704 words, keeping 1921909 word types
2018-12-17 13:50:06,112 : INFO : PROGRESS: at sentence #27920000, processed 76494798 words, keeping 1922162 word types
2018-12-17 13:50:06,161 : INFO : PROGRESS: at sentence #27930000, processed 76519542 words, keeping 1922539 word types
2018-12-17 13:50:06,211 : INFO : PROGRESS: at sentence #27940000, processed 76545991 words, keeping 1922917 word types
2018-12-17 13:50:06,263 : INFO : PROGRESS: at sentence #27950000, processed 76571202 words, keeping 1923271 word types
2018-12-17 13:50:06,309 : INFO : PROGRESS: at se

2018-12-17 13:50:09,176 : INFO : PROGRESS: at sentence #28570000, processed 78089615 words, keeping 1948347 word types
2018-12-17 13:50:09,219 : INFO : PROGRESS: at sentence #28580000, processed 78117036 words, keeping 1948805 word types
2018-12-17 13:50:09,264 : INFO : PROGRESS: at sentence #28590000, processed 78142411 words, keeping 1949076 word types
2018-12-17 13:50:09,303 : INFO : PROGRESS: at sentence #28600000, processed 78166752 words, keeping 1949357 word types
2018-12-17 13:50:09,340 : INFO : PROGRESS: at sentence #28610000, processed 78194351 words, keeping 1949755 word types
2018-12-17 13:50:09,383 : INFO : PROGRESS: at sentence #28620000, processed 78221392 words, keeping 1949994 word types
2018-12-17 13:50:09,423 : INFO : PROGRESS: at sentence #28630000, processed 78246458 words, keeping 1950357 word types
2018-12-17 13:50:09,467 : INFO : PROGRESS: at sentence #28640000, processed 78271743 words, keeping 1950704 word types
2018-12-17 13:50:09,512 : INFO : PROGRESS: at se

2018-12-17 13:50:12,228 : INFO : PROGRESS: at sentence #29260000, processed 79847637 words, keeping 1973216 word types
2018-12-17 13:50:12,273 : INFO : PROGRESS: at sentence #29270000, processed 79871908 words, keeping 1973687 word types
2018-12-17 13:50:12,307 : INFO : PROGRESS: at sentence #29280000, processed 79893751 words, keeping 1974007 word types
2018-12-17 13:50:12,345 : INFO : PROGRESS: at sentence #29290000, processed 79917172 words, keeping 1974214 word types
2018-12-17 13:50:12,403 : INFO : PROGRESS: at sentence #29300000, processed 79948663 words, keeping 1974503 word types
2018-12-17 13:50:12,445 : INFO : PROGRESS: at sentence #29310000, processed 79975738 words, keeping 1974933 word types
2018-12-17 13:50:12,482 : INFO : PROGRESS: at sentence #29320000, processed 79998719 words, keeping 1975244 word types
2018-12-17 13:50:12,519 : INFO : PROGRESS: at sentence #29330000, processed 80024248 words, keeping 1976240 word types
2018-12-17 13:50:12,556 : INFO : PROGRESS: at se

2018-12-17 13:50:15,337 : INFO : PROGRESS: at sentence #29950000, processed 81650276 words, keeping 2000541 word types
2018-12-17 13:50:15,381 : INFO : PROGRESS: at sentence #29960000, processed 81676219 words, keeping 2000851 word types
2018-12-17 13:50:15,423 : INFO : PROGRESS: at sentence #29970000, processed 81699467 words, keeping 2001265 word types
2018-12-17 13:50:15,486 : INFO : PROGRESS: at sentence #29980000, processed 81727219 words, keeping 2001608 word types
2018-12-17 13:50:15,537 : INFO : PROGRESS: at sentence #29990000, processed 81752633 words, keeping 2001990 word types
2018-12-17 13:50:15,578 : INFO : PROGRESS: at sentence #30000000, processed 81776758 words, keeping 2002309 word types
2018-12-17 13:50:15,618 : INFO : PROGRESS: at sentence #30010000, processed 81801928 words, keeping 2002640 word types
2018-12-17 13:50:15,661 : INFO : PROGRESS: at sentence #30020000, processed 81827809 words, keeping 2003276 word types
2018-12-17 13:50:15,701 : INFO : PROGRESS: at se

2018-12-17 13:50:18,329 : INFO : PROGRESS: at sentence #30640000, processed 83468849 words, keeping 2029307 word types
2018-12-17 13:50:18,378 : INFO : PROGRESS: at sentence #30650000, processed 83495077 words, keeping 2029661 word types
2018-12-17 13:50:18,419 : INFO : PROGRESS: at sentence #30660000, processed 83522920 words, keeping 2030084 word types
2018-12-17 13:50:18,453 : INFO : PROGRESS: at sentence #30670000, processed 83546061 words, keeping 2030439 word types
2018-12-17 13:50:18,494 : INFO : PROGRESS: at sentence #30680000, processed 83569726 words, keeping 2030788 word types
2018-12-17 13:50:18,532 : INFO : PROGRESS: at sentence #30690000, processed 83596769 words, keeping 2031196 word types
2018-12-17 13:50:18,569 : INFO : PROGRESS: at sentence #30700000, processed 83621263 words, keeping 2031477 word types
2018-12-17 13:50:18,610 : INFO : PROGRESS: at sentence #30710000, processed 83650125 words, keeping 2031933 word types
2018-12-17 13:50:18,649 : INFO : PROGRESS: at se

2018-12-17 13:50:21,234 : INFO : PROGRESS: at sentence #31330000, processed 85262139 words, keeping 2057178 word types
2018-12-17 13:50:21,283 : INFO : PROGRESS: at sentence #31340000, processed 85287009 words, keeping 2057426 word types
2018-12-17 13:50:21,344 : INFO : PROGRESS: at sentence #31350000, processed 85311547 words, keeping 2057680 word types
2018-12-17 13:50:21,392 : INFO : PROGRESS: at sentence #31360000, processed 85337886 words, keeping 2058054 word types
2018-12-17 13:50:21,437 : INFO : PROGRESS: at sentence #31370000, processed 85364167 words, keeping 2058421 word types
2018-12-17 13:50:21,478 : INFO : PROGRESS: at sentence #31380000, processed 85385079 words, keeping 2058626 word types
2018-12-17 13:50:21,513 : INFO : PROGRESS: at sentence #31390000, processed 85413283 words, keeping 2058982 word types
2018-12-17 13:50:21,550 : INFO : PROGRESS: at sentence #31400000, processed 85443667 words, keeping 2059263 word types
2018-12-17 13:50:21,594 : INFO : PROGRESS: at se

2018-12-17 13:50:24,257 : INFO : PROGRESS: at sentence #32020000, processed 87044297 words, keeping 2085043 word types
2018-12-17 13:50:24,307 : INFO : PROGRESS: at sentence #32030000, processed 87072469 words, keeping 2085536 word types
2018-12-17 13:50:24,348 : INFO : PROGRESS: at sentence #32040000, processed 87092319 words, keeping 2085771 word types
2018-12-17 13:50:24,392 : INFO : PROGRESS: at sentence #32050000, processed 87114702 words, keeping 2086101 word types
2018-12-17 13:50:24,438 : INFO : PROGRESS: at sentence #32060000, processed 87142169 words, keeping 2086526 word types
2018-12-17 13:50:24,481 : INFO : PROGRESS: at sentence #32070000, processed 87173815 words, keeping 2087126 word types
2018-12-17 13:50:24,524 : INFO : PROGRESS: at sentence #32080000, processed 87202108 words, keeping 2087659 word types
2018-12-17 13:50:24,569 : INFO : PROGRESS: at sentence #32090000, processed 87223144 words, keeping 2089128 word types
2018-12-17 13:50:24,619 : INFO : PROGRESS: at se

2018-12-17 13:50:27,449 : INFO : PROGRESS: at sentence #32710000, processed 88852351 words, keeping 2115344 word types
2018-12-17 13:50:27,488 : INFO : PROGRESS: at sentence #32720000, processed 88871568 words, keeping 2115677 word types
2018-12-17 13:50:27,526 : INFO : PROGRESS: at sentence #32730000, processed 88891962 words, keeping 2115770 word types
2018-12-17 13:50:27,567 : INFO : PROGRESS: at sentence #32740000, processed 88915672 words, keeping 2116035 word types
2018-12-17 13:50:27,622 : INFO : PROGRESS: at sentence #32750000, processed 88945461 words, keeping 2116422 word types
2018-12-17 13:50:27,669 : INFO : PROGRESS: at sentence #32760000, processed 88968216 words, keeping 2116813 word types
2018-12-17 13:50:27,716 : INFO : PROGRESS: at sentence #32770000, processed 88993552 words, keeping 2117127 word types
2018-12-17 13:50:27,767 : INFO : PROGRESS: at sentence #32780000, processed 89022439 words, keeping 2117380 word types
2018-12-17 13:50:27,820 : INFO : PROGRESS: at se

2018-12-17 13:50:30,985 : INFO : PROGRESS: at sentence #33400000, processed 90650458 words, keeping 2139991 word types
2018-12-17 13:50:31,045 : INFO : PROGRESS: at sentence #33410000, processed 90678242 words, keeping 2140591 word types
2018-12-17 13:50:31,097 : INFO : PROGRESS: at sentence #33420000, processed 90702686 words, keeping 2140897 word types
2018-12-17 13:50:31,160 : INFO : PROGRESS: at sentence #33430000, processed 90740249 words, keeping 2141311 word types
2018-12-17 13:50:31,215 : INFO : PROGRESS: at sentence #33440000, processed 90767559 words, keeping 2141680 word types
2018-12-17 13:50:31,264 : INFO : PROGRESS: at sentence #33450000, processed 90792200 words, keeping 2141956 word types
2018-12-17 13:50:31,317 : INFO : PROGRESS: at sentence #33460000, processed 90819781 words, keeping 2142251 word types
2018-12-17 13:50:31,367 : INFO : PROGRESS: at sentence #33470000, processed 90847367 words, keeping 2142476 word types
2018-12-17 13:50:31,416 : INFO : PROGRESS: at se

2018-12-17 13:50:34,167 : INFO : PROGRESS: at sentence #34090000, processed 92463921 words, keeping 2169426 word types
2018-12-17 13:50:34,211 : INFO : PROGRESS: at sentence #34100000, processed 92488222 words, keeping 2169658 word types
2018-12-17 13:50:34,257 : INFO : PROGRESS: at sentence #34110000, processed 92513433 words, keeping 2170032 word types
2018-12-17 13:50:34,298 : INFO : PROGRESS: at sentence #34120000, processed 92535568 words, keeping 2170703 word types
2018-12-17 13:50:34,337 : INFO : PROGRESS: at sentence #34130000, processed 92560883 words, keeping 2171156 word types
2018-12-17 13:50:34,380 : INFO : PROGRESS: at sentence #34140000, processed 92588817 words, keeping 2171665 word types
2018-12-17 13:50:34,418 : INFO : PROGRESS: at sentence #34150000, processed 92613981 words, keeping 2172070 word types
2018-12-17 13:50:34,459 : INFO : PROGRESS: at sentence #34160000, processed 92641798 words, keeping 2172537 word types
2018-12-17 13:50:34,501 : INFO : PROGRESS: at se

2018-12-17 13:50:37,056 : INFO : PROGRESS: at sentence #34780000, processed 94219036 words, keeping 2196046 word types
2018-12-17 13:50:37,094 : INFO : PROGRESS: at sentence #34790000, processed 94244260 words, keeping 2196375 word types
2018-12-17 13:50:37,131 : INFO : PROGRESS: at sentence #34800000, processed 94270604 words, keeping 2196678 word types
2018-12-17 13:50:37,174 : INFO : PROGRESS: at sentence #34810000, processed 94294367 words, keeping 2196996 word types
2018-12-17 13:50:37,208 : INFO : PROGRESS: at sentence #34820000, processed 94315043 words, keeping 2197192 word types
2018-12-17 13:50:37,243 : INFO : PROGRESS: at sentence #34830000, processed 94334149 words, keeping 2197473 word types
2018-12-17 13:50:37,283 : INFO : PROGRESS: at sentence #34840000, processed 94357996 words, keeping 2197725 word types
2018-12-17 13:50:37,324 : INFO : PROGRESS: at sentence #34850000, processed 94385636 words, keeping 2198060 word types
2018-12-17 13:50:37,368 : INFO : PROGRESS: at se

2018-12-17 13:50:39,836 : INFO : PROGRESS: at sentence #35470000, processed 95928769 words, keeping 2219352 word types
2018-12-17 13:50:39,876 : INFO : PROGRESS: at sentence #35480000, processed 95955727 words, keeping 2219793 word types
2018-12-17 13:50:39,911 : INFO : PROGRESS: at sentence #35490000, processed 95982031 words, keeping 2220207 word types
2018-12-17 13:50:39,953 : INFO : PROGRESS: at sentence #35500000, processed 96002822 words, keeping 2220436 word types
2018-12-17 13:50:39,993 : INFO : PROGRESS: at sentence #35510000, processed 96028715 words, keeping 2220892 word types
2018-12-17 13:50:40,029 : INFO : PROGRESS: at sentence #35520000, processed 96052846 words, keeping 2221436 word types
2018-12-17 13:50:40,069 : INFO : PROGRESS: at sentence #35530000, processed 96077532 words, keeping 2221879 word types
2018-12-17 13:50:40,111 : INFO : PROGRESS: at sentence #35540000, processed 96103259 words, keeping 2222268 word types
2018-12-17 13:50:40,147 : INFO : PROGRESS: at se

2018-12-17 13:50:42,653 : INFO : PROGRESS: at sentence #36160000, processed 97703375 words, keeping 2246224 word types
2018-12-17 13:50:42,695 : INFO : PROGRESS: at sentence #36170000, processed 97730213 words, keeping 2246518 word types
2018-12-17 13:50:42,735 : INFO : PROGRESS: at sentence #36180000, processed 97754082 words, keeping 2246791 word types
2018-12-17 13:50:42,775 : INFO : PROGRESS: at sentence #36190000, processed 97781916 words, keeping 2247204 word types
2018-12-17 13:50:42,809 : INFO : PROGRESS: at sentence #36200000, processed 97803235 words, keeping 2247398 word types
2018-12-17 13:50:42,852 : INFO : PROGRESS: at sentence #36210000, processed 97831132 words, keeping 2247724 word types
2018-12-17 13:50:42,892 : INFO : PROGRESS: at sentence #36220000, processed 97856346 words, keeping 2248126 word types
2018-12-17 13:50:42,934 : INFO : PROGRESS: at sentence #36230000, processed 97880268 words, keeping 2248442 word types
2018-12-17 13:50:42,971 : INFO : PROGRESS: at se

2018-12-17 13:50:45,495 : INFO : PROGRESS: at sentence #36850000, processed 99449575 words, keeping 2272516 word types
2018-12-17 13:50:45,542 : INFO : PROGRESS: at sentence #36860000, processed 99474819 words, keeping 2272832 word types
2018-12-17 13:50:45,587 : INFO : PROGRESS: at sentence #36870000, processed 99501198 words, keeping 2273134 word types
2018-12-17 13:50:45,633 : INFO : PROGRESS: at sentence #36880000, processed 99524372 words, keeping 2273646 word types
2018-12-17 13:50:45,682 : INFO : PROGRESS: at sentence #36890000, processed 99548349 words, keeping 2274044 word types
2018-12-17 13:50:45,721 : INFO : PROGRESS: at sentence #36900000, processed 99574460 words, keeping 2274335 word types
2018-12-17 13:50:45,755 : INFO : PROGRESS: at sentence #36910000, processed 99602407 words, keeping 2274681 word types
2018-12-17 13:50:45,794 : INFO : PROGRESS: at sentence #36920000, processed 99629184 words, keeping 2275034 word types
2018-12-17 13:50:45,830 : INFO : PROGRESS: at se

2018-12-17 13:50:48,513 : INFO : PROGRESS: at sentence #37540000, processed 101162904 words, keeping 2297993 word types
2018-12-17 13:50:48,553 : INFO : PROGRESS: at sentence #37550000, processed 101184276 words, keeping 2298156 word types
2018-12-17 13:50:48,598 : INFO : PROGRESS: at sentence #37560000, processed 101207971 words, keeping 2298470 word types
2018-12-17 13:50:48,645 : INFO : PROGRESS: at sentence #37570000, processed 101234109 words, keeping 2298879 word types
2018-12-17 13:50:48,691 : INFO : PROGRESS: at sentence #37580000, processed 101257967 words, keeping 2299125 word types
2018-12-17 13:50:48,737 : INFO : PROGRESS: at sentence #37590000, processed 101285145 words, keeping 2299486 word types
2018-12-17 13:50:48,785 : INFO : PROGRESS: at sentence #37600000, processed 101311098 words, keeping 2299723 word types
2018-12-17 13:50:48,834 : INFO : PROGRESS: at sentence #37610000, processed 101337365 words, keeping 2299949 word types
2018-12-17 13:50:48,880 : INFO : PROGRES

2018-12-17 13:50:51,720 : INFO : PROGRESS: at sentence #38230000, processed 102989391 words, keeping 2322488 word types
2018-12-17 13:50:51,769 : INFO : PROGRESS: at sentence #38240000, processed 103020412 words, keeping 2323003 word types
2018-12-17 13:50:51,821 : INFO : PROGRESS: at sentence #38250000, processed 103042958 words, keeping 2323406 word types
2018-12-17 13:50:51,867 : INFO : PROGRESS: at sentence #38260000, processed 103065848 words, keeping 2323757 word types
2018-12-17 13:50:51,908 : INFO : PROGRESS: at sentence #38270000, processed 103083304 words, keeping 2324082 word types
2018-12-17 13:50:51,947 : INFO : PROGRESS: at sentence #38280000, processed 103095884 words, keeping 2324400 word types
2018-12-17 13:50:51,996 : INFO : PROGRESS: at sentence #38290000, processed 103121173 words, keeping 2324818 word types
2018-12-17 13:50:52,047 : INFO : PROGRESS: at sentence #38300000, processed 103149109 words, keeping 2325089 word types
2018-12-17 13:50:52,100 : INFO : PROGRES

2018-12-17 13:50:55,176 : INFO : PROGRESS: at sentence #38920000, processed 104811230 words, keeping 2348385 word types
2018-12-17 13:50:55,225 : INFO : PROGRESS: at sentence #38930000, processed 104837969 words, keeping 2348806 word types
2018-12-17 13:50:55,275 : INFO : PROGRESS: at sentence #38940000, processed 104863348 words, keeping 2349190 word types
2018-12-17 13:50:55,323 : INFO : PROGRESS: at sentence #38950000, processed 104888398 words, keeping 2349474 word types
2018-12-17 13:50:55,373 : INFO : PROGRESS: at sentence #38960000, processed 104915177 words, keeping 2349833 word types
2018-12-17 13:50:55,420 : INFO : PROGRESS: at sentence #38970000, processed 104940630 words, keeping 2350203 word types
2018-12-17 13:50:55,473 : INFO : PROGRESS: at sentence #38980000, processed 104969976 words, keeping 2350481 word types
2018-12-17 13:50:55,520 : INFO : PROGRESS: at sentence #38990000, processed 104995320 words, keeping 2350802 word types
2018-12-17 13:50:55,562 : INFO : PROGRES

2018-12-17 13:50:58,289 : INFO : PROGRESS: at sentence #39610000, processed 106599020 words, keeping 2373687 word types
2018-12-17 13:50:58,331 : INFO : PROGRESS: at sentence #39620000, processed 106626736 words, keeping 2373794 word types
2018-12-17 13:50:58,376 : INFO : PROGRESS: at sentence #39630000, processed 106656124 words, keeping 2374043 word types
2018-12-17 13:50:58,420 : INFO : PROGRESS: at sentence #39640000, processed 106682285 words, keeping 2374344 word types
2018-12-17 13:50:58,458 : INFO : PROGRESS: at sentence #39650000, processed 106702879 words, keeping 2374668 word types
2018-12-17 13:50:58,489 : INFO : PROGRESS: at sentence #39660000, processed 106724717 words, keeping 2374950 word types
2018-12-17 13:50:58,526 : INFO : PROGRESS: at sentence #39670000, processed 106747368 words, keeping 2375530 word types
2018-12-17 13:50:58,568 : INFO : PROGRESS: at sentence #39680000, processed 106768228 words, keeping 2376737 word types
2018-12-17 13:50:58,605 : INFO : PROGRES

2018-12-17 13:51:01,184 : INFO : PROGRESS: at sentence #40300000, processed 108381630 words, keeping 2402700 word types
2018-12-17 13:51:01,231 : INFO : PROGRESS: at sentence #40310000, processed 108407357 words, keeping 2403014 word types
2018-12-17 13:51:01,281 : INFO : PROGRESS: at sentence #40320000, processed 108440061 words, keeping 2403418 word types
2018-12-17 13:51:01,321 : INFO : PROGRESS: at sentence #40330000, processed 108466590 words, keeping 2403767 word types
2018-12-17 13:51:01,361 : INFO : PROGRESS: at sentence #40340000, processed 108491436 words, keeping 2404066 word types
2018-12-17 13:51:01,402 : INFO : PROGRESS: at sentence #40350000, processed 108517849 words, keeping 2404360 word types
2018-12-17 13:51:01,441 : INFO : PROGRESS: at sentence #40360000, processed 108542169 words, keeping 2404837 word types
2018-12-17 13:51:01,480 : INFO : PROGRESS: at sentence #40370000, processed 108568744 words, keeping 2405171 word types
2018-12-17 13:51:01,520 : INFO : PROGRES

2018-12-17 13:51:04,020 : INFO : PROGRESS: at sentence #40990000, processed 110167016 words, keeping 2430900 word types
2018-12-17 13:51:04,055 : INFO : PROGRESS: at sentence #41000000, processed 110189393 words, keeping 2431168 word types
2018-12-17 13:51:04,097 : INFO : PROGRESS: at sentence #41010000, processed 110212886 words, keeping 2431404 word types
2018-12-17 13:51:04,142 : INFO : PROGRESS: at sentence #41020000, processed 110237196 words, keeping 2431636 word types
2018-12-17 13:51:04,184 : INFO : PROGRESS: at sentence #41030000, processed 110262179 words, keeping 2431845 word types
2018-12-17 13:51:04,228 : INFO : PROGRESS: at sentence #41040000, processed 110290343 words, keeping 2432365 word types
2018-12-17 13:51:04,268 : INFO : PROGRESS: at sentence #41050000, processed 110311823 words, keeping 2432729 word types
2018-12-17 13:51:04,312 : INFO : PROGRESS: at sentence #41060000, processed 110335305 words, keeping 2433056 word types
2018-12-17 13:51:04,355 : INFO : PROGRES

2018-12-17 13:51:06,827 : INFO : PROGRESS: at sentence #41680000, processed 111808134 words, keeping 2455010 word types
2018-12-17 13:51:06,866 : INFO : PROGRESS: at sentence #41690000, processed 111833326 words, keeping 2455438 word types
2018-12-17 13:51:06,905 : INFO : PROGRESS: at sentence #41700000, processed 111857687 words, keeping 2455629 word types
2018-12-17 13:51:06,940 : INFO : PROGRESS: at sentence #41710000, processed 111883478 words, keeping 2455710 word types
2018-12-17 13:51:06,988 : INFO : PROGRESS: at sentence #41720000, processed 111906919 words, keeping 2455738 word types
2018-12-17 13:51:07,027 : INFO : PROGRESS: at sentence #41730000, processed 111930932 words, keeping 2455844 word types
2018-12-17 13:51:07,059 : INFO : PROGRESS: at sentence #41740000, processed 111955521 words, keeping 2455916 word types
2018-12-17 13:51:07,114 : INFO : PROGRESS: at sentence #41750000, processed 111978531 words, keeping 2455979 word types
2018-12-17 13:51:07,146 : INFO : PROGRES

2018-12-17 13:51:09,404 : INFO : PROGRESS: at sentence #42370000, processed 113471236 words, keeping 2466554 word types
2018-12-17 13:51:09,445 : INFO : PROGRESS: at sentence #42380000, processed 113496877 words, keeping 2466744 word types
2018-12-17 13:51:09,486 : INFO : PROGRESS: at sentence #42390000, processed 113522634 words, keeping 2467105 word types
2018-12-17 13:51:09,525 : INFO : PROGRESS: at sentence #42400000, processed 113548942 words, keeping 2467592 word types
2018-12-17 13:51:09,571 : INFO : PROGRESS: at sentence #42410000, processed 113577546 words, keeping 2468064 word types
2018-12-17 13:51:09,615 : INFO : PROGRESS: at sentence #42420000, processed 113602819 words, keeping 2468356 word types
2018-12-17 13:51:09,654 : INFO : PROGRESS: at sentence #42430000, processed 113628350 words, keeping 2468763 word types
2018-12-17 13:51:09,697 : INFO : PROGRESS: at sentence #42440000, processed 113653831 words, keeping 2469138 word types
2018-12-17 13:51:09,737 : INFO : PROGRES

2018-12-17 13:51:12,352 : INFO : PROGRESS: at sentence #43060000, processed 115227722 words, keeping 2490870 word types
2018-12-17 13:51:12,397 : INFO : PROGRESS: at sentence #43070000, processed 115255865 words, keeping 2491160 word types
2018-12-17 13:51:12,438 : INFO : PROGRESS: at sentence #43080000, processed 115279965 words, keeping 2491580 word types
2018-12-17 13:51:12,479 : INFO : PROGRESS: at sentence #43090000, processed 115306588 words, keeping 2491907 word types
2018-12-17 13:51:12,523 : INFO : PROGRESS: at sentence #43100000, processed 115330737 words, keeping 2492251 word types
2018-12-17 13:51:12,571 : INFO : PROGRESS: at sentence #43110000, processed 115356193 words, keeping 2492528 word types
2018-12-17 13:51:12,618 : INFO : PROGRESS: at sentence #43120000, processed 115383302 words, keeping 2492868 word types
2018-12-17 13:51:12,668 : INFO : PROGRESS: at sentence #43130000, processed 115411469 words, keeping 2493267 word types
2018-12-17 13:51:12,711 : INFO : PROGRES

2018-12-17 13:51:18,861 : INFO : PROGRESS: at sentence #44430000, processed 118682487 words, keeping 2537082 word types
2018-12-17 13:51:18,913 : INFO : PROGRESS: at sentence #44440000, processed 118705937 words, keeping 2537225 word types
2018-12-17 13:51:18,962 : INFO : PROGRESS: at sentence #44450000, processed 118731801 words, keeping 2537501 word types
2018-12-17 13:51:19,008 : INFO : PROGRESS: at sentence #44460000, processed 118758470 words, keeping 2537975 word types
2018-12-17 13:51:19,057 : INFO : PROGRESS: at sentence #44470000, processed 118783106 words, keeping 2538303 word types
2018-12-17 13:51:19,101 : INFO : PROGRESS: at sentence #44480000, processed 118802802 words, keeping 2538620 word types
2018-12-17 13:51:19,149 : INFO : PROGRESS: at sentence #44490000, processed 118825478 words, keeping 2539060 word types
2018-12-17 13:51:19,197 : INFO : PROGRESS: at sentence #44500000, processed 118848844 words, keeping 2539492 word types
2018-12-17 13:51:19,244 : INFO : PROGRES

2018-12-17 13:51:21,975 : INFO : PROGRESS: at sentence #45120000, processed 120358057 words, keeping 2561071 word types
2018-12-17 13:51:22,014 : INFO : PROGRESS: at sentence #45130000, processed 120384440 words, keeping 2561368 word types
2018-12-17 13:51:22,052 : INFO : PROGRESS: at sentence #45140000, processed 120408307 words, keeping 2561805 word types
2018-12-17 13:51:22,094 : INFO : PROGRESS: at sentence #45150000, processed 120434538 words, keeping 2562166 word types
2018-12-17 13:51:22,133 : INFO : PROGRESS: at sentence #45160000, processed 120462423 words, keeping 2562589 word types
2018-12-17 13:51:22,171 : INFO : PROGRESS: at sentence #45170000, processed 120482817 words, keeping 2562960 word types
2018-12-17 13:51:22,217 : INFO : PROGRESS: at sentence #45180000, processed 120507122 words, keeping 2563297 word types
2018-12-17 13:51:22,257 : INFO : PROGRESS: at sentence #45190000, processed 120529882 words, keeping 2563578 word types
2018-12-17 13:51:22,309 : INFO : PROGRES

2018-12-17 13:51:24,839 : INFO : PROGRESS: at sentence #45810000, processed 122062432 words, keeping 2587757 word types
2018-12-17 13:51:24,878 : INFO : PROGRESS: at sentence #45820000, processed 122085441 words, keeping 2588103 word types
2018-12-17 13:51:24,918 : INFO : PROGRESS: at sentence #45830000, processed 122110971 words, keeping 2588385 word types
2018-12-17 13:51:24,949 : INFO : PROGRESS: at sentence #45840000, processed 122125105 words, keeping 2588621 word types
2018-12-17 13:51:25,000 : INFO : PROGRESS: at sentence #45850000, processed 122147248 words, keeping 2588986 word types
2018-12-17 13:51:25,038 : INFO : PROGRESS: at sentence #45860000, processed 122167205 words, keeping 2589239 word types
2018-12-17 13:51:25,079 : INFO : PROGRESS: at sentence #45870000, processed 122193183 words, keeping 2589687 word types
2018-12-17 13:51:25,119 : INFO : PROGRESS: at sentence #45880000, processed 122217769 words, keeping 2590158 word types
2018-12-17 13:51:25,158 : INFO : PROGRES

2018-12-17 13:51:27,645 : INFO : PROGRESS: at sentence #46500000, processed 123743986 words, keeping 2610892 word types
2018-12-17 13:51:27,683 : INFO : PROGRESS: at sentence #46510000, processed 123771067 words, keeping 2611244 word types
2018-12-17 13:51:27,728 : INFO : PROGRESS: at sentence #46520000, processed 123798166 words, keeping 2611542 word types
2018-12-17 13:51:27,768 : INFO : PROGRESS: at sentence #46530000, processed 123821271 words, keeping 2612004 word types
2018-12-17 13:51:27,803 : INFO : PROGRESS: at sentence #46540000, processed 123846305 words, keeping 2612216 word types
2018-12-17 13:51:27,843 : INFO : PROGRESS: at sentence #46550000, processed 123872395 words, keeping 2612679 word types
2018-12-17 13:51:27,881 : INFO : PROGRESS: at sentence #46560000, processed 123898182 words, keeping 2612958 word types
2018-12-17 13:51:27,921 : INFO : PROGRESS: at sentence #46570000, processed 123923303 words, keeping 2613220 word types
2018-12-17 13:51:27,966 : INFO : PROGRES

2018-12-17 13:51:30,405 : INFO : PROGRESS: at sentence #47190000, processed 125401648 words, keeping 2634026 word types
2018-12-17 13:51:30,438 : INFO : PROGRESS: at sentence #47200000, processed 125426639 words, keeping 2634711 word types
2018-12-17 13:51:30,476 : INFO : PROGRESS: at sentence #47210000, processed 125450044 words, keeping 2635624 word types
2018-12-17 13:51:30,514 : INFO : PROGRESS: at sentence #47220000, processed 125472057 words, keeping 2635969 word types
2018-12-17 13:51:30,555 : INFO : PROGRESS: at sentence #47230000, processed 125498085 words, keeping 2636395 word types
2018-12-17 13:51:30,598 : INFO : PROGRESS: at sentence #47240000, processed 125522525 words, keeping 2636822 word types
2018-12-17 13:51:30,640 : INFO : PROGRESS: at sentence #47250000, processed 125548337 words, keeping 2637113 word types
2018-12-17 13:51:30,680 : INFO : PROGRESS: at sentence #47260000, processed 125573860 words, keeping 2637367 word types
2018-12-17 13:51:30,717 : INFO : PROGRES

2018-12-17 13:51:33,227 : INFO : PROGRESS: at sentence #47880000, processed 127091584 words, keeping 2659532 word types
2018-12-17 13:51:33,271 : INFO : PROGRESS: at sentence #47890000, processed 127118836 words, keeping 2659791 word types
2018-12-17 13:51:33,312 : INFO : PROGRESS: at sentence #47900000, processed 127142976 words, keeping 2660140 word types
2018-12-17 13:51:33,347 : INFO : PROGRESS: at sentence #47910000, processed 127169012 words, keeping 2660499 word types
2018-12-17 13:51:33,389 : INFO : PROGRESS: at sentence #47920000, processed 127191040 words, keeping 2661102 word types
2018-12-17 13:51:33,427 : INFO : PROGRESS: at sentence #47930000, processed 127215357 words, keeping 2661409 word types
2018-12-17 13:51:33,469 : INFO : PROGRESS: at sentence #47940000, processed 127240834 words, keeping 2661758 word types
2018-12-17 13:51:33,508 : INFO : PROGRESS: at sentence #47950000, processed 127265448 words, keeping 2662156 word types
2018-12-17 13:51:33,546 : INFO : PROGRES

2018-12-17 13:51:36,179 : INFO : PROGRESS: at sentence #48570000, processed 128848757 words, keeping 2685551 word types
2018-12-17 13:51:36,221 : INFO : PROGRESS: at sentence #48580000, processed 128870917 words, keeping 2685882 word types
2018-12-17 13:51:36,273 : INFO : PROGRESS: at sentence #48590000, processed 128897054 words, keeping 2686278 word types
2018-12-17 13:51:36,318 : INFO : PROGRESS: at sentence #48600000, processed 128923303 words, keeping 2686546 word types
2018-12-17 13:51:36,367 : INFO : PROGRESS: at sentence #48610000, processed 128951676 words, keeping 2687040 word types
2018-12-17 13:51:36,414 : INFO : PROGRESS: at sentence #48620000, processed 128981502 words, keeping 2687315 word types
2018-12-17 13:51:36,458 : INFO : PROGRESS: at sentence #48630000, processed 129003857 words, keeping 2688003 word types
2018-12-17 13:51:36,501 : INFO : PROGRESS: at sentence #48640000, processed 129031477 words, keeping 2688402 word types
2018-12-17 13:51:36,545 : INFO : PROGRES

2018-12-17 13:51:39,307 : INFO : PROGRESS: at sentence #49260000, processed 130591121 words, keeping 2708899 word types
2018-12-17 13:51:39,353 : INFO : PROGRESS: at sentence #49270000, processed 130618535 words, keeping 2709150 word types
2018-12-17 13:51:39,395 : INFO : PROGRESS: at sentence #49280000, processed 130643405 words, keeping 2709455 word types
2018-12-17 13:51:39,443 : INFO : PROGRESS: at sentence #49290000, processed 130670893 words, keeping 2709751 word types
2018-12-17 13:51:39,487 : INFO : PROGRESS: at sentence #49300000, processed 130692676 words, keeping 2710031 word types
2018-12-17 13:51:39,531 : INFO : PROGRESS: at sentence #49310000, processed 130718070 words, keeping 2710353 word types
2018-12-17 13:51:39,574 : INFO : PROGRESS: at sentence #49320000, processed 130743972 words, keeping 2710599 word types
2018-12-17 13:51:39,617 : INFO : PROGRESS: at sentence #49330000, processed 130767741 words, keeping 2711548 word types
2018-12-17 13:51:39,664 : INFO : PROGRES

2018-12-17 13:51:42,718 : INFO : PROGRESS: at sentence #49950000, processed 132335775 words, keeping 2730476 word types
2018-12-17 13:51:42,769 : INFO : PROGRESS: at sentence #49960000, processed 132359540 words, keeping 2730653 word types
2018-12-17 13:51:42,815 : INFO : PROGRESS: at sentence #49970000, processed 132390098 words, keeping 2730898 word types
2018-12-17 13:51:42,867 : INFO : PROGRESS: at sentence #49980000, processed 132420585 words, keeping 2731251 word types
2018-12-17 13:51:42,913 : INFO : PROGRESS: at sentence #49990000, processed 132445738 words, keeping 2731562 word types
2018-12-17 13:51:42,965 : INFO : PROGRESS: at sentence #50000000, processed 132470365 words, keeping 2731792 word types
2018-12-17 13:51:43,015 : INFO : PROGRESS: at sentence #50010000, processed 132494240 words, keeping 2732332 word types
2018-12-17 13:51:43,060 : INFO : PROGRESS: at sentence #50020000, processed 132520120 words, keeping 2732809 word types
2018-12-17 13:51:43,105 : INFO : PROGRES

2018-12-17 13:51:45,909 : INFO : PROGRESS: at sentence #50640000, processed 134048318 words, keeping 2751268 word types
2018-12-17 13:51:45,948 : INFO : PROGRESS: at sentence #50650000, processed 134074588 words, keeping 2751608 word types
2018-12-17 13:51:45,991 : INFO : PROGRESS: at sentence #50660000, processed 134097428 words, keeping 2751995 word types
2018-12-17 13:51:46,034 : INFO : PROGRESS: at sentence #50670000, processed 134122336 words, keeping 2752289 word types
2018-12-17 13:51:46,075 : INFO : PROGRESS: at sentence #50680000, processed 134147864 words, keeping 2752606 word types
2018-12-17 13:51:46,116 : INFO : PROGRESS: at sentence #50690000, processed 134173723 words, keeping 2752859 word types
2018-12-17 13:51:46,153 : INFO : PROGRESS: at sentence #50700000, processed 134196202 words, keeping 2753264 word types
2018-12-17 13:51:46,194 : INFO : PROGRESS: at sentence #50710000, processed 134219857 words, keeping 2753543 word types
2018-12-17 13:51:46,240 : INFO : PROGRES

2018-12-17 13:51:48,427 : INFO : PROGRESS: at sentence #51330000, processed 135591375 words, keeping 2761149 word types
2018-12-17 13:51:48,464 : INFO : PROGRESS: at sentence #51340000, processed 135615226 words, keeping 2761737 word types
2018-12-17 13:51:48,508 : INFO : PROGRESS: at sentence #51350000, processed 135640369 words, keeping 2762162 word types
2018-12-17 13:51:48,550 : INFO : PROGRESS: at sentence #51360000, processed 135664626 words, keeping 2762420 word types
2018-12-17 13:51:48,587 : INFO : PROGRESS: at sentence #51370000, processed 135688360 words, keeping 2762645 word types
2018-12-17 13:51:48,632 : INFO : PROGRESS: at sentence #51380000, processed 135715774 words, keeping 2762937 word types
2018-12-17 13:51:48,674 : INFO : PROGRESS: at sentence #51390000, processed 135743325 words, keeping 2763183 word types
2018-12-17 13:51:48,716 : INFO : PROGRESS: at sentence #51400000, processed 135764349 words, keeping 2763462 word types
2018-12-17 13:51:48,755 : INFO : PROGRES

2018-12-17 13:51:51,330 : INFO : PROGRESS: at sentence #52020000, processed 137334241 words, keeping 2782168 word types
2018-12-17 13:51:51,368 : INFO : PROGRESS: at sentence #52030000, processed 137355851 words, keeping 2782383 word types
2018-12-17 13:51:51,405 : INFO : PROGRESS: at sentence #52040000, processed 137380780 words, keeping 2782559 word types
2018-12-17 13:51:51,442 : INFO : PROGRESS: at sentence #52050000, processed 137405447 words, keeping 2782812 word types
2018-12-17 13:51:51,484 : INFO : PROGRESS: at sentence #52060000, processed 137430715 words, keeping 2783072 word types
2018-12-17 13:51:51,525 : INFO : PROGRESS: at sentence #52070000, processed 137458761 words, keeping 2783387 word types
2018-12-17 13:51:51,557 : INFO : PROGRESS: at sentence #52080000, processed 137478506 words, keeping 2783563 word types
2018-12-17 13:51:51,601 : INFO : PROGRESS: at sentence #52090000, processed 137503734 words, keeping 2783873 word types
2018-12-17 13:51:51,641 : INFO : PROGRES

2018-12-17 13:51:54,377 : INFO : PROGRESS: at sentence #52710000, processed 139106345 words, keeping 2804404 word types
2018-12-17 13:51:54,419 : INFO : PROGRESS: at sentence #52720000, processed 139129141 words, keeping 2804625 word types
2018-12-17 13:51:54,463 : INFO : PROGRESS: at sentence #52730000, processed 139156698 words, keeping 2804834 word types
2018-12-17 13:51:54,507 : INFO : PROGRESS: at sentence #52740000, processed 139187401 words, keeping 2805095 word types
2018-12-17 13:51:54,542 : INFO : PROGRESS: at sentence #52750000, processed 139207678 words, keeping 2805508 word types
2018-12-17 13:51:54,583 : INFO : PROGRESS: at sentence #52760000, processed 139229434 words, keeping 2806095 word types
2018-12-17 13:51:54,623 : INFO : PROGRESS: at sentence #52770000, processed 139256408 words, keeping 2806340 word types
2018-12-17 13:51:54,666 : INFO : PROGRESS: at sentence #52780000, processed 139284255 words, keeping 2806639 word types
2018-12-17 13:51:54,703 : INFO : PROGRES

2018-12-17 13:51:57,274 : INFO : PROGRESS: at sentence #53400000, processed 140882718 words, keeping 2826758 word types
2018-12-17 13:51:57,316 : INFO : PROGRESS: at sentence #53410000, processed 140908417 words, keeping 2827126 word types
2018-12-17 13:51:57,352 : INFO : PROGRESS: at sentence #53420000, processed 140933510 words, keeping 2827487 word types
2018-12-17 13:51:57,394 : INFO : PROGRESS: at sentence #53430000, processed 140960909 words, keeping 2827778 word types
2018-12-17 13:51:57,432 : INFO : PROGRESS: at sentence #53440000, processed 140985798 words, keeping 2828093 word types
2018-12-17 13:51:57,470 : INFO : PROGRESS: at sentence #53450000, processed 141012977 words, keeping 2828374 word types
2018-12-17 13:51:57,511 : INFO : PROGRESS: at sentence #53460000, processed 141039005 words, keeping 2828664 word types
2018-12-17 13:51:57,548 : INFO : PROGRESS: at sentence #53470000, processed 141064094 words, keeping 2828838 word types
2018-12-17 13:51:57,584 : INFO : PROGRES

2018-12-17 13:52:00,075 : INFO : PROGRESS: at sentence #54090000, processed 142627762 words, keeping 2848990 word types
2018-12-17 13:52:00,117 : INFO : PROGRESS: at sentence #54100000, processed 142655319 words, keeping 2849309 word types
2018-12-17 13:52:00,160 : INFO : PROGRESS: at sentence #54110000, processed 142680507 words, keeping 2849697 word types
2018-12-17 13:52:00,202 : INFO : PROGRESS: at sentence #54120000, processed 142704544 words, keeping 2850089 word types
2018-12-17 13:52:00,242 : INFO : PROGRESS: at sentence #54130000, processed 142729549 words, keeping 2850364 word types
2018-12-17 13:52:00,284 : INFO : PROGRESS: at sentence #54140000, processed 142756982 words, keeping 2850634 word types
2018-12-17 13:52:00,320 : INFO : PROGRESS: at sentence #54150000, processed 142783800 words, keeping 2850928 word types
2018-12-17 13:52:00,368 : INFO : PROGRESS: at sentence #54160000, processed 142811816 words, keeping 2851124 word types
2018-12-17 13:52:00,411 : INFO : PROGRES

2018-12-17 13:52:02,892 : INFO : PROGRESS: at sentence #54780000, processed 144378354 words, keeping 2869123 word types
2018-12-17 13:52:02,936 : INFO : PROGRESS: at sentence #54790000, processed 144409291 words, keeping 2869507 word types
2018-12-17 13:52:02,969 : INFO : PROGRESS: at sentence #54800000, processed 144429132 words, keeping 2869747 word types
2018-12-17 13:52:03,026 : INFO : PROGRESS: at sentence #54810000, processed 144454738 words, keeping 2869941 word types
2018-12-17 13:52:03,063 : INFO : PROGRESS: at sentence #54820000, processed 144478804 words, keeping 2870188 word types
2018-12-17 13:52:03,099 : INFO : PROGRESS: at sentence #54830000, processed 144502166 words, keeping 2870535 word types
2018-12-17 13:52:03,145 : INFO : PROGRESS: at sentence #54840000, processed 144528228 words, keeping 2870707 word types
2018-12-17 13:52:03,187 : INFO : PROGRESS: at sentence #54850000, processed 144555191 words, keeping 2871117 word types
2018-12-17 13:52:03,228 : INFO : PROGRES

2018-12-17 13:52:05,734 : INFO : PROGRESS: at sentence #55470000, processed 146119168 words, keeping 2887970 word types
2018-12-17 13:52:05,780 : INFO : PROGRESS: at sentence #55480000, processed 146147203 words, keeping 2888275 word types
2018-12-17 13:52:05,820 : INFO : PROGRESS: at sentence #55490000, processed 146171572 words, keeping 2888556 word types
2018-12-17 13:52:05,862 : INFO : PROGRESS: at sentence #55500000, processed 146199873 words, keeping 2888915 word types
2018-12-17 13:52:05,900 : INFO : PROGRESS: at sentence #55510000, processed 146224896 words, keeping 2889169 word types
2018-12-17 13:52:05,940 : INFO : PROGRESS: at sentence #55520000, processed 146249763 words, keeping 2889391 word types
2018-12-17 13:52:05,982 : INFO : PROGRESS: at sentence #55530000, processed 146275243 words, keeping 2889591 word types
2018-12-17 13:52:06,023 : INFO : PROGRESS: at sentence #55540000, processed 146300885 words, keeping 2889902 word types
2018-12-17 13:52:06,064 : INFO : PROGRES

2018-12-17 13:52:08,653 : INFO : PROGRESS: at sentence #56160000, processed 147871502 words, keeping 2907029 word types
2018-12-17 13:52:08,689 : INFO : PROGRESS: at sentence #56170000, processed 147896778 words, keeping 2907224 word types
2018-12-17 13:52:08,733 : INFO : PROGRESS: at sentence #56180000, processed 147922480 words, keeping 2907487 word types
2018-12-17 13:52:08,772 : INFO : PROGRESS: at sentence #56190000, processed 147950085 words, keeping 2907659 word types
2018-12-17 13:52:08,812 : INFO : PROGRESS: at sentence #56200000, processed 147979038 words, keeping 2907823 word types
2018-12-17 13:52:08,857 : INFO : PROGRESS: at sentence #56210000, processed 148007200 words, keeping 2908103 word types
2018-12-17 13:52:08,899 : INFO : PROGRESS: at sentence #56220000, processed 148032985 words, keeping 2908288 word types
2018-12-17 13:52:08,941 : INFO : PROGRESS: at sentence #56230000, processed 148059999 words, keeping 2908490 word types
2018-12-17 13:52:08,995 : INFO : PROGRES

2018-12-17 13:52:11,776 : INFO : PROGRESS: at sentence #56850000, processed 149628243 words, keeping 2924403 word types
2018-12-17 13:52:11,825 : INFO : PROGRESS: at sentence #56860000, processed 149654533 words, keeping 2924640 word types
2018-12-17 13:52:11,871 : INFO : PROGRESS: at sentence #56870000, processed 149680434 words, keeping 2925082 word types
2018-12-17 13:52:11,924 : INFO : PROGRESS: at sentence #56880000, processed 149709238 words, keeping 2925322 word types
2018-12-17 13:52:11,969 : INFO : PROGRESS: at sentence #56890000, processed 149736748 words, keeping 2925538 word types
2018-12-17 13:52:12,014 : INFO : PROGRESS: at sentence #56900000, processed 149764122 words, keeping 2925823 word types
2018-12-17 13:52:12,066 : INFO : PROGRESS: at sentence #56910000, processed 149790451 words, keeping 2926117 word types
2018-12-17 13:52:12,117 : INFO : PROGRESS: at sentence #56920000, processed 149817031 words, keeping 2926376 word types
2018-12-17 13:52:12,164 : INFO : PROGRES

2018-12-17 13:52:15,035 : INFO : PROGRESS: at sentence #57540000, processed 151387982 words, keeping 2944511 word types
2018-12-17 13:52:15,083 : INFO : PROGRESS: at sentence #57550000, processed 151413063 words, keeping 2944755 word types
2018-12-17 13:52:15,131 : INFO : PROGRESS: at sentence #57560000, processed 151436347 words, keeping 2944969 word types
2018-12-17 13:52:15,178 : INFO : PROGRESS: at sentence #57570000, processed 151461637 words, keeping 2945195 word types
2018-12-17 13:52:15,227 : INFO : PROGRESS: at sentence #57580000, processed 151485019 words, keeping 2945551 word types
2018-12-17 13:52:15,271 : INFO : PROGRESS: at sentence #57590000, processed 151509688 words, keeping 2945913 word types
2018-12-17 13:52:15,316 : INFO : PROGRESS: at sentence #57600000, processed 151535784 words, keeping 2946324 word types
2018-12-17 13:52:15,370 : INFO : PROGRESS: at sentence #57610000, processed 151563776 words, keeping 2946665 word types
2018-12-17 13:52:15,420 : INFO : PROGRES

2018-12-17 13:52:18,534 : INFO : PROGRESS: at sentence #58230000, processed 153144610 words, keeping 2965242 word types
2018-12-17 13:52:18,576 : INFO : PROGRESS: at sentence #58240000, processed 153162694 words, keeping 2965548 word types
2018-12-17 13:52:18,630 : INFO : PROGRESS: at sentence #58250000, processed 153188364 words, keeping 2965907 word types
2018-12-17 13:52:18,677 : INFO : PROGRESS: at sentence #58260000, processed 153213172 words, keeping 2966221 word types
2018-12-17 13:52:18,728 : INFO : PROGRESS: at sentence #58270000, processed 153239214 words, keeping 2966485 word types
2018-12-17 13:52:18,770 : INFO : PROGRESS: at sentence #58280000, processed 153262954 words, keeping 2966705 word types
2018-12-17 13:52:18,825 : INFO : PROGRESS: at sentence #58290000, processed 153291096 words, keeping 2967027 word types
2018-12-17 13:52:18,873 : INFO : PROGRESS: at sentence #58300000, processed 153315555 words, keeping 2967348 word types
2018-12-17 13:52:18,924 : INFO : PROGRES

2018-12-17 13:52:21,564 : INFO : PROGRESS: at sentence #58920000, processed 154933057 words, keeping 2985234 word types
2018-12-17 13:52:21,601 : INFO : PROGRESS: at sentence #58930000, processed 154955670 words, keeping 2985463 word types
2018-12-17 13:52:21,638 : INFO : PROGRESS: at sentence #58940000, processed 154979916 words, keeping 2985660 word types
2018-12-17 13:52:21,678 : INFO : PROGRESS: at sentence #58950000, processed 155004770 words, keeping 2985954 word types
2018-12-17 13:52:21,722 : INFO : PROGRESS: at sentence #58960000, processed 155027768 words, keeping 2986175 word types
2018-12-17 13:52:21,763 : INFO : PROGRESS: at sentence #58970000, processed 155052599 words, keeping 2986512 word types
2018-12-17 13:52:21,804 : INFO : PROGRESS: at sentence #58980000, processed 155077052 words, keeping 2986680 word types
2018-12-17 13:52:21,847 : INFO : PROGRESS: at sentence #58990000, processed 155107215 words, keeping 2986895 word types
2018-12-17 13:52:21,884 : INFO : PROGRES

2018-12-17 13:52:24,311 : INFO : PROGRESS: at sentence #59610000, processed 156589196 words, keeping 3002476 word types
2018-12-17 13:52:24,353 : INFO : PROGRESS: at sentence #59620000, processed 156617852 words, keeping 3002773 word types
2018-12-17 13:52:24,400 : INFO : PROGRESS: at sentence #59630000, processed 156644666 words, keeping 3003045 word types
2018-12-17 13:52:24,435 : INFO : PROGRESS: at sentence #59640000, processed 156669107 words, keeping 3003386 word types
2018-12-17 13:52:24,473 : INFO : PROGRESS: at sentence #59650000, processed 156688888 words, keeping 3003674 word types
2018-12-17 13:52:24,514 : INFO : PROGRESS: at sentence #59660000, processed 156719247 words, keeping 3004011 word types
2018-12-17 13:52:24,558 : INFO : PROGRESS: at sentence #59670000, processed 156742965 words, keeping 3004262 word types
2018-12-17 13:52:24,600 : INFO : PROGRESS: at sentence #59680000, processed 156769013 words, keeping 3004589 word types
2018-12-17 13:52:24,648 : INFO : PROGRES

2018-12-17 13:52:29,993 : INFO : PROGRESS: at sentence #60980000, processed 160050054 words, keeping 3046119 word types
2018-12-17 13:52:30,035 : INFO : PROGRESS: at sentence #60990000, processed 160074689 words, keeping 3046380 word types
2018-12-17 13:52:30,071 : INFO : PROGRESS: at sentence #61000000, processed 160102230 words, keeping 3046840 word types
2018-12-17 13:52:30,114 : INFO : PROGRESS: at sentence #61010000, processed 160128183 words, keeping 3047388 word types
2018-12-17 13:52:30,149 : INFO : PROGRESS: at sentence #61020000, processed 160155902 words, keeping 3047806 word types
2018-12-17 13:52:30,192 : INFO : PROGRESS: at sentence #61030000, processed 160182837 words, keeping 3047982 word types
2018-12-17 13:52:30,237 : INFO : PROGRESS: at sentence #61040000, processed 160208349 words, keeping 3048199 word types
2018-12-17 13:52:30,272 : INFO : PROGRESS: at sentence #61050000, processed 160234237 words, keeping 3048425 word types
2018-12-17 13:52:30,317 : INFO : PROGRES

2018-12-17 13:52:32,776 : INFO : PROGRESS: at sentence #61670000, processed 161786344 words, keeping 3064221 word types
2018-12-17 13:52:32,821 : INFO : PROGRESS: at sentence #61680000, processed 161810185 words, keeping 3064435 word types
2018-12-17 13:52:32,857 : INFO : PROGRESS: at sentence #61690000, processed 161836975 words, keeping 3064839 word types
2018-12-17 13:52:32,892 : INFO : PROGRESS: at sentence #61700000, processed 161859046 words, keeping 3064965 word types
2018-12-17 13:52:32,932 : INFO : PROGRESS: at sentence #61710000, processed 161881603 words, keeping 3065163 word types
2018-12-17 13:52:32,972 : INFO : PROGRESS: at sentence #61720000, processed 161904887 words, keeping 3065362 word types
2018-12-17 13:52:33,021 : INFO : PROGRESS: at sentence #61730000, processed 161929639 words, keeping 3065661 word types
2018-12-17 13:52:33,060 : INFO : PROGRESS: at sentence #61740000, processed 161953343 words, keeping 3065884 word types
2018-12-17 13:52:33,112 : INFO : PROGRES

2018-12-17 13:52:35,726 : INFO : PROGRESS: at sentence #62360000, processed 163584462 words, keeping 3082328 word types
2018-12-17 13:52:35,764 : INFO : PROGRESS: at sentence #62370000, processed 163611038 words, keeping 3082547 word types
2018-12-17 13:52:35,796 : INFO : PROGRESS: at sentence #62380000, processed 163636282 words, keeping 3082745 word types
2018-12-17 13:52:35,831 : INFO : PROGRESS: at sentence #62390000, processed 163654330 words, keeping 3082847 word types
2018-12-17 13:52:35,871 : INFO : PROGRESS: at sentence #62400000, processed 163679525 words, keeping 3083121 word types
2018-12-17 13:52:35,911 : INFO : PROGRESS: at sentence #62410000, processed 163705244 words, keeping 3083313 word types
2018-12-17 13:52:35,951 : INFO : PROGRESS: at sentence #62420000, processed 163730659 words, keeping 3083656 word types
2018-12-17 13:52:36,011 : INFO : PROGRESS: at sentence #62430000, processed 163760940 words, keeping 3083897 word types
2018-12-17 13:52:36,050 : INFO : PROGRES

2018-12-17 13:52:38,669 : INFO : PROGRESS: at sentence #63050000, processed 165350973 words, keeping 3100831 word types
2018-12-17 13:52:38,710 : INFO : PROGRESS: at sentence #63060000, processed 165373362 words, keeping 3100972 word types
2018-12-17 13:52:38,747 : INFO : PROGRESS: at sentence #63070000, processed 165392967 words, keeping 3101090 word types
2018-12-17 13:52:38,796 : INFO : PROGRESS: at sentence #63080000, processed 165422212 words, keeping 3101379 word types
2018-12-17 13:52:38,831 : INFO : PROGRESS: at sentence #63090000, processed 165446781 words, keeping 3101691 word types
2018-12-17 13:52:38,866 : INFO : PROGRESS: at sentence #63100000, processed 165465044 words, keeping 3101979 word types
2018-12-17 13:52:38,896 : INFO : PROGRESS: at sentence #63110000, processed 165475335 words, keeping 3102223 word types
2018-12-17 13:52:38,939 : INFO : PROGRESS: at sentence #63120000, processed 165502217 words, keeping 3102495 word types
2018-12-17 13:52:38,997 : INFO : PROGRES

2018-12-17 13:52:41,485 : INFO : PROGRESS: at sentence #63740000, processed 167027707 words, keeping 3118409 word types
2018-12-17 13:52:41,527 : INFO : PROGRESS: at sentence #63750000, processed 167052736 words, keeping 3118744 word types
2018-12-17 13:52:41,566 : INFO : PROGRESS: at sentence #63760000, processed 167084476 words, keeping 3118965 word types
2018-12-17 13:52:41,607 : INFO : PROGRESS: at sentence #63770000, processed 167108023 words, keeping 3119223 word types
2018-12-17 13:52:41,645 : INFO : PROGRESS: at sentence #63780000, processed 167131328 words, keeping 3119512 word types
2018-12-17 13:52:41,682 : INFO : PROGRESS: at sentence #63790000, processed 167155781 words, keeping 3119769 word types
2018-12-17 13:52:41,724 : INFO : PROGRESS: at sentence #63800000, processed 167182441 words, keeping 3120060 word types
2018-12-17 13:52:41,760 : INFO : PROGRESS: at sentence #63810000, processed 167206532 words, keeping 3120312 word types
2018-12-17 13:52:41,800 : INFO : PROGRES

2018-12-17 13:52:44,411 : INFO : PROGRESS: at sentence #64430000, processed 168707865 words, keeping 3138522 word types
2018-12-17 13:52:44,456 : INFO : PROGRESS: at sentence #64440000, processed 168733422 words, keeping 3138771 word types
2018-12-17 13:52:44,494 : INFO : PROGRESS: at sentence #64450000, processed 168756662 words, keeping 3139033 word types
2018-12-17 13:52:44,549 : INFO : PROGRESS: at sentence #64460000, processed 168782724 words, keeping 3139270 word types
2018-12-17 13:52:44,603 : INFO : PROGRESS: at sentence #64470000, processed 168814082 words, keeping 3139605 word types
2018-12-17 13:52:44,649 : INFO : PROGRESS: at sentence #64480000, processed 168835782 words, keeping 3139989 word types
2018-12-17 13:52:44,686 : INFO : PROGRESS: at sentence #64490000, processed 168857867 words, keeping 3140366 word types
2018-12-17 13:52:44,740 : INFO : PROGRESS: at sentence #64500000, processed 168887020 words, keeping 3140647 word types
2018-12-17 13:52:44,785 : INFO : PROGRES

2018-12-17 13:52:47,340 : INFO : PROGRESS: at sentence #65120000, processed 170345924 words, keeping 3158231 word types
2018-12-17 13:52:47,384 : INFO : PROGRESS: at sentence #65130000, processed 170364796 words, keeping 3158710 word types
2018-12-17 13:52:47,440 : INFO : PROGRESS: at sentence #65140000, processed 170392377 words, keeping 3159058 word types
2018-12-17 13:52:47,484 : INFO : PROGRESS: at sentence #65150000, processed 170413030 words, keeping 3159350 word types
2018-12-17 13:52:47,533 : INFO : PROGRESS: at sentence #65160000, processed 170435148 words, keeping 3159590 word types
2018-12-17 13:52:47,586 : INFO : PROGRESS: at sentence #65170000, processed 170459870 words, keeping 3159804 word types
2018-12-17 13:52:47,635 : INFO : PROGRESS: at sentence #65180000, processed 170484488 words, keeping 3160168 word types
2018-12-17 13:52:47,677 : INFO : PROGRESS: at sentence #65190000, processed 170504259 words, keeping 3160513 word types
2018-12-17 13:52:47,729 : INFO : PROGRES

2018-12-17 13:52:51,151 : INFO : PROGRESS: at sentence #65810000, processed 172055195 words, keeping 3177433 word types
2018-12-17 13:52:51,203 : INFO : PROGRESS: at sentence #65820000, processed 172078792 words, keeping 3177666 word types
2018-12-17 13:52:51,260 : INFO : PROGRESS: at sentence #65830000, processed 172106935 words, keeping 3177889 word types
2018-12-17 13:52:51,315 : INFO : PROGRESS: at sentence #65840000, processed 172138890 words, keeping 3178105 word types
2018-12-17 13:52:51,367 : INFO : PROGRESS: at sentence #65850000, processed 172163635 words, keeping 3178316 word types
2018-12-17 13:52:51,414 : INFO : PROGRESS: at sentence #65860000, processed 172188586 words, keeping 3178531 word types
2018-12-17 13:52:51,467 : INFO : PROGRESS: at sentence #65870000, processed 172211330 words, keeping 3178794 word types
2018-12-17 13:52:51,514 : INFO : PROGRESS: at sentence #65880000, processed 172239805 words, keeping 3179048 word types
2018-12-17 13:52:51,567 : INFO : PROGRES

2018-12-17 13:52:54,993 : INFO : PROGRESS: at sentence #66500000, processed 173819665 words, keeping 3194842 word types
2018-12-17 13:52:55,052 : INFO : PROGRESS: at sentence #66510000, processed 173845119 words, keeping 3195111 word types
2018-12-17 13:52:55,113 : INFO : PROGRESS: at sentence #66520000, processed 173871530 words, keeping 3195337 word types
2018-12-17 13:52:55,168 : INFO : PROGRESS: at sentence #66530000, processed 173895314 words, keeping 3195621 word types
2018-12-17 13:52:55,246 : INFO : PROGRESS: at sentence #66540000, processed 173917824 words, keeping 3195798 word types
2018-12-17 13:52:55,310 : INFO : PROGRESS: at sentence #66550000, processed 173942379 words, keeping 3196028 word types
2018-12-17 13:52:55,368 : INFO : PROGRESS: at sentence #66560000, processed 173968010 words, keeping 3196284 word types
2018-12-17 13:52:55,430 : INFO : PROGRESS: at sentence #66570000, processed 173991975 words, keeping 3196653 word types
2018-12-17 13:52:55,484 : INFO : PROGRES

2018-12-17 13:52:58,496 : INFO : PROGRESS: at sentence #67190000, processed 175527104 words, keeping 3215440 word types
2018-12-17 13:52:58,535 : INFO : PROGRESS: at sentence #67200000, processed 175550834 words, keeping 3215636 word types
2018-12-17 13:52:58,578 : INFO : PROGRESS: at sentence #67210000, processed 175580945 words, keeping 3215834 word types
2018-12-17 13:52:58,620 : INFO : PROGRESS: at sentence #67220000, processed 175608435 words, keeping 3215996 word types
2018-12-17 13:52:58,662 : INFO : PROGRESS: at sentence #67230000, processed 175633648 words, keeping 3216190 word types
2018-12-17 13:52:58,706 : INFO : PROGRESS: at sentence #67240000, processed 175660541 words, keeping 3216400 word types
2018-12-17 13:52:58,743 : INFO : PROGRESS: at sentence #67250000, processed 175683392 words, keeping 3216566 word types
2018-12-17 13:52:58,783 : INFO : PROGRESS: at sentence #67260000, processed 175711083 words, keeping 3216827 word types
2018-12-17 13:52:58,823 : INFO : PROGRES

2018-12-17 13:53:01,356 : INFO : PROGRESS: at sentence #67880000, processed 177256173 words, keeping 3232115 word types
2018-12-17 13:53:01,395 : INFO : PROGRESS: at sentence #67890000, processed 177277898 words, keeping 3232610 word types
2018-12-17 13:53:01,441 : INFO : PROGRESS: at sentence #67900000, processed 177303130 words, keeping 3232782 word types
2018-12-17 13:53:01,483 : INFO : PROGRESS: at sentence #67910000, processed 177328740 words, keeping 3232969 word types
2018-12-17 13:53:01,519 : INFO : PROGRESS: at sentence #67920000, processed 177355030 words, keeping 3233148 word types
2018-12-17 13:53:01,560 : INFO : PROGRESS: at sentence #67930000, processed 177380244 words, keeping 3233450 word types
2018-12-17 13:53:01,597 : INFO : PROGRESS: at sentence #67940000, processed 177400610 words, keeping 3233696 word types
2018-12-17 13:53:01,638 : INFO : PROGRESS: at sentence #67950000, processed 177426952 words, keeping 3233970 word types
2018-12-17 13:53:01,679 : INFO : PROGRES

2018-12-17 13:53:04,152 : INFO : PROGRESS: at sentence #68570000, processed 178969679 words, keeping 3251125 word types
2018-12-17 13:53:04,191 : INFO : PROGRESS: at sentence #68580000, processed 178994805 words, keeping 3251378 word types
2018-12-17 13:53:04,237 : INFO : PROGRESS: at sentence #68590000, processed 179020376 words, keeping 3251699 word types
2018-12-17 13:53:04,276 : INFO : PROGRESS: at sentence #68600000, processed 179044953 words, keeping 3251919 word types
2018-12-17 13:53:04,319 : INFO : PROGRESS: at sentence #68610000, processed 179066729 words, keeping 3252085 word types
2018-12-17 13:53:04,356 : INFO : PROGRESS: at sentence #68620000, processed 179088803 words, keeping 3252311 word types
2018-12-17 13:53:04,393 : INFO : PROGRESS: at sentence #68630000, processed 179110592 words, keeping 3252462 word types
2018-12-17 13:53:04,425 : INFO : PROGRESS: at sentence #68640000, processed 179132549 words, keeping 3252678 word types
2018-12-17 13:53:04,462 : INFO : PROGRES

2018-12-17 13:53:06,963 : INFO : PROGRESS: at sentence #69260000, processed 180654302 words, keeping 3269477 word types
2018-12-17 13:53:07,016 : INFO : PROGRESS: at sentence #69270000, processed 180680219 words, keeping 3269693 word types
2018-12-17 13:53:07,055 : INFO : PROGRESS: at sentence #69280000, processed 180703296 words, keeping 3270067 word types
2018-12-17 13:53:07,096 : INFO : PROGRESS: at sentence #69290000, processed 180727160 words, keeping 3270456 word types
2018-12-17 13:53:07,136 : INFO : PROGRESS: at sentence #69300000, processed 180751660 words, keeping 3270717 word types
2018-12-17 13:53:07,179 : INFO : PROGRESS: at sentence #69310000, processed 180774671 words, keeping 3270859 word types
2018-12-17 13:53:07,222 : INFO : PROGRESS: at sentence #69320000, processed 180798039 words, keeping 3271090 word types
2018-12-17 13:53:07,275 : INFO : PROGRESS: at sentence #69330000, processed 180822744 words, keeping 3271322 word types
2018-12-17 13:53:07,319 : INFO : PROGRES

2018-12-17 13:53:09,853 : INFO : PROGRESS: at sentence #69950000, processed 182392002 words, keeping 3290189 word types
2018-12-17 13:53:09,890 : INFO : PROGRESS: at sentence #69960000, processed 182414779 words, keeping 3290647 word types
2018-12-17 13:53:09,930 : INFO : PROGRESS: at sentence #69970000, processed 182439926 words, keeping 3290869 word types
2018-12-17 13:53:09,968 : INFO : PROGRESS: at sentence #69980000, processed 182467100 words, keeping 3291107 word types
2018-12-17 13:53:10,009 : INFO : PROGRESS: at sentence #69990000, processed 182492886 words, keeping 3291360 word types
2018-12-17 13:53:10,046 : INFO : PROGRESS: at sentence #70000000, processed 182518693 words, keeping 3291521 word types
2018-12-17 13:53:10,086 : INFO : PROGRESS: at sentence #70010000, processed 182544413 words, keeping 3291795 word types
2018-12-17 13:53:10,133 : INFO : PROGRESS: at sentence #70020000, processed 182570110 words, keeping 3292147 word types
2018-12-17 13:53:10,174 : INFO : PROGRES

2018-12-17 13:53:12,764 : INFO : PROGRESS: at sentence #70640000, processed 184145395 words, keeping 3309574 word types
2018-12-17 13:53:12,810 : INFO : PROGRESS: at sentence #70650000, processed 184171156 words, keeping 3310048 word types
2018-12-17 13:53:12,849 : INFO : PROGRESS: at sentence #70660000, processed 184195500 words, keeping 3310224 word types
2018-12-17 13:53:12,889 : INFO : PROGRESS: at sentence #70670000, processed 184222494 words, keeping 3310481 word types
2018-12-17 13:53:12,932 : INFO : PROGRESS: at sentence #70680000, processed 184253918 words, keeping 3310832 word types
2018-12-17 13:53:12,989 : INFO : PROGRESS: at sentence #70690000, processed 184279518 words, keeping 3311154 word types
2018-12-17 13:53:13,032 : INFO : PROGRESS: at sentence #70700000, processed 184304861 words, keeping 3311401 word types
2018-12-17 13:53:13,077 : INFO : PROGRESS: at sentence #70710000, processed 184332649 words, keeping 3311642 word types
2018-12-17 13:53:13,119 : INFO : PROGRES

2018-12-17 13:53:15,633 : INFO : PROGRESS: at sentence #71330000, processed 185859302 words, keeping 3328244 word types
2018-12-17 13:53:15,674 : INFO : PROGRESS: at sentence #71340000, processed 185885427 words, keeping 3328521 word types
2018-12-17 13:53:15,708 : INFO : PROGRESS: at sentence #71350000, processed 185908218 words, keeping 3328980 word types
2018-12-17 13:53:15,749 : INFO : PROGRESS: at sentence #71360000, processed 185936444 words, keeping 3329328 word types
2018-12-17 13:53:15,778 : INFO : PROGRESS: at sentence #71370000, processed 185957737 words, keeping 3329552 word types
2018-12-17 13:53:15,822 : INFO : PROGRESS: at sentence #71380000, processed 185981714 words, keeping 3329713 word types
2018-12-17 13:53:15,854 : INFO : PROGRESS: at sentence #71390000, processed 186000795 words, keeping 3329833 word types
2018-12-17 13:53:15,891 : INFO : PROGRESS: at sentence #71400000, processed 186028146 words, keeping 3330204 word types
2018-12-17 13:53:15,932 : INFO : PROGRES

2018-12-17 13:53:18,396 : INFO : PROGRESS: at sentence #72020000, processed 187501439 words, keeping 3346872 word types
2018-12-17 13:53:18,440 : INFO : PROGRESS: at sentence #72030000, processed 187526914 words, keeping 3347138 word types
2018-12-17 13:53:18,478 : INFO : PROGRESS: at sentence #72040000, processed 187554609 words, keeping 3347426 word types
2018-12-17 13:53:18,512 : INFO : PROGRESS: at sentence #72050000, processed 187580540 words, keeping 3347700 word types
2018-12-17 13:53:18,551 : INFO : PROGRESS: at sentence #72060000, processed 187603403 words, keeping 3347978 word types
2018-12-17 13:53:18,594 : INFO : PROGRESS: at sentence #72070000, processed 187632630 words, keeping 3348239 word types
2018-12-17 13:53:18,645 : INFO : PROGRESS: at sentence #72080000, processed 187658977 words, keeping 3348616 word types
2018-12-17 13:53:18,685 : INFO : PROGRESS: at sentence #72090000, processed 187684085 words, keeping 3349022 word types
2018-12-17 13:53:18,723 : INFO : PROGRES

2018-12-17 13:53:21,180 : INFO : PROGRESS: at sentence #72710000, processed 189212632 words, keeping 3365679 word types
2018-12-17 13:53:21,224 : INFO : PROGRESS: at sentence #72720000, processed 189235419 words, keeping 3365957 word types
2018-12-17 13:53:21,267 : INFO : PROGRESS: at sentence #72730000, processed 189257506 words, keeping 3366209 word types
2018-12-17 13:53:21,310 : INFO : PROGRESS: at sentence #72740000, processed 189283118 words, keeping 3366568 word types
2018-12-17 13:53:21,354 : INFO : PROGRESS: at sentence #72750000, processed 189312197 words, keeping 3366854 word types
2018-12-17 13:53:21,399 : INFO : PROGRESS: at sentence #72760000, processed 189336169 words, keeping 3367078 word types
2018-12-17 13:53:21,439 : INFO : PROGRESS: at sentence #72770000, processed 189363267 words, keeping 3367443 word types
2018-12-17 13:53:21,484 : INFO : PROGRESS: at sentence #72780000, processed 189389057 words, keeping 3367674 word types
2018-12-17 13:53:21,525 : INFO : PROGRES

2018-12-17 13:53:24,069 : INFO : PROGRESS: at sentence #73400000, processed 190986510 words, keeping 3389565 word types
2018-12-17 13:53:24,109 : INFO : PROGRESS: at sentence #73410000, processed 191009684 words, keeping 3389727 word types
2018-12-17 13:53:24,148 : INFO : PROGRESS: at sentence #73420000, processed 191034964 words, keeping 3389955 word types
2018-12-17 13:53:24,192 : INFO : PROGRESS: at sentence #73430000, processed 191062936 words, keeping 3390344 word types
2018-12-17 13:53:24,244 : INFO : PROGRESS: at sentence #73440000, processed 191091772 words, keeping 3390933 word types
2018-12-17 13:53:24,287 : INFO : PROGRESS: at sentence #73450000, processed 191122514 words, keeping 3391635 word types
2018-12-17 13:53:24,333 : INFO : PROGRESS: at sentence #73460000, processed 191151485 words, keeping 3392224 word types
2018-12-17 13:53:24,378 : INFO : PROGRESS: at sentence #73470000, processed 191177025 words, keeping 3392605 word types
2018-12-17 13:53:24,410 : INFO : PROGRES

2018-12-17 13:53:26,939 : INFO : PROGRESS: at sentence #74090000, processed 192777507 words, keeping 3410327 word types
2018-12-17 13:53:26,999 : INFO : PROGRESS: at sentence #74100000, processed 192804449 words, keeping 3410568 word types
2018-12-17 13:53:27,043 : INFO : PROGRESS: at sentence #74110000, processed 192831198 words, keeping 3410961 word types
2018-12-17 13:53:27,085 : INFO : PROGRESS: at sentence #74120000, processed 192859609 words, keeping 3411354 word types
2018-12-17 13:53:27,127 : INFO : PROGRESS: at sentence #74130000, processed 192882860 words, keeping 3411702 word types
2018-12-17 13:53:27,176 : INFO : PROGRESS: at sentence #74140000, processed 192910511 words, keeping 3411961 word types
2018-12-17 13:53:27,214 : INFO : PROGRESS: at sentence #74150000, processed 192931996 words, keeping 3412125 word types
2018-12-17 13:53:27,257 : INFO : PROGRESS: at sentence #74160000, processed 192954484 words, keeping 3412379 word types
2018-12-17 13:53:27,294 : INFO : PROGRES

2018-12-17 13:53:29,859 : INFO : PROGRESS: at sentence #74780000, processed 194507437 words, keeping 3430017 word types
2018-12-17 13:53:29,899 : INFO : PROGRESS: at sentence #74790000, processed 194532856 words, keeping 3430184 word types
2018-12-17 13:53:29,939 : INFO : PROGRESS: at sentence #74800000, processed 194555004 words, keeping 3430417 word types
2018-12-17 13:53:29,976 : INFO : PROGRESS: at sentence #74810000, processed 194579264 words, keeping 3430709 word types
2018-12-17 13:53:30,014 : INFO : PROGRESS: at sentence #74820000, processed 194603566 words, keeping 3431180 word types
2018-12-17 13:53:30,063 : INFO : PROGRESS: at sentence #74830000, processed 194629992 words, keeping 3431386 word types
2018-12-17 13:53:30,108 : INFO : PROGRESS: at sentence #74840000, processed 194655990 words, keeping 3431657 word types
2018-12-17 13:53:30,149 : INFO : PROGRESS: at sentence #74850000, processed 194685764 words, keeping 3431875 word types
2018-12-17 13:53:30,188 : INFO : PROGRES

2018-12-17 13:54:42,880 : INFO : EPOCH 1 - PROGRESS: at 8.61% examples, 368988 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:54:43,892 : INFO : EPOCH 1 - PROGRESS: at 8.79% examples, 368926 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:54:44,923 : INFO : EPOCH 1 - PROGRESS: at 9.00% examples, 369697 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:54:45,937 : INFO : EPOCH 1 - PROGRESS: at 9.17% examples, 368716 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:54:46,946 : INFO : EPOCH 1 - PROGRESS: at 9.38% examples, 369196 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:54:47,969 : INFO : EPOCH 1 - PROGRESS: at 9.55% examples, 368838 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:54:48,990 : INFO : EPOCH 1 - PROGRESS: at 9.76% examples, 369449 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:54:49,994 : INFO : EPOCH 1 - PROGRESS: at 9.94% examples, 369112 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:54:51,012 : INFO : EPOCH 1 - PROGRESS: at 10.13% examples, 368868 words/s, in_qsize 0, out

2018-12-17 13:55:57,297 : INFO : EPOCH 1 - PROGRESS: at 22.88% examples, 367408 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:55:58,296 : INFO : EPOCH 1 - PROGRESS: at 23.10% examples, 367858 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:55:59,318 : INFO : EPOCH 1 - PROGRESS: at 23.31% examples, 367507 words/s, in_qsize 0, out_qsize 1
2018-12-17 13:56:00,321 : INFO : EPOCH 1 - PROGRESS: at 23.50% examples, 367331 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:56:01,326 : INFO : EPOCH 1 - PROGRESS: at 23.69% examples, 367229 words/s, in_qsize 1, out_qsize 0
2018-12-17 13:56:02,336 : INFO : EPOCH 1 - PROGRESS: at 23.87% examples, 366961 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:56:03,342 : INFO : EPOCH 1 - PROGRESS: at 24.08% examples, 367028 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:56:04,388 : INFO : EPOCH 1 - PROGRESS: at 24.26% examples, 366726 words/s, in_qsize 1, out_qsize 0
2018-12-17 13:56:05,508 : INFO : EPOCH 1 - PROGRESS: at 24.48% examples, 366644 words/s, in_qsiz

2018-12-17 13:57:11,927 : INFO : EPOCH 1 - PROGRESS: at 37.26% examples, 362894 words/s, in_qsize 0, out_qsize 1
2018-12-17 13:57:12,930 : INFO : EPOCH 1 - PROGRESS: at 37.44% examples, 362716 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:57:13,962 : INFO : EPOCH 1 - PROGRESS: at 37.60% examples, 362265 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:57:14,990 : INFO : EPOCH 1 - PROGRESS: at 37.80% examples, 361920 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:57:16,003 : INFO : EPOCH 1 - PROGRESS: at 37.95% examples, 361484 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:57:17,026 : INFO : EPOCH 1 - PROGRESS: at 38.16% examples, 361450 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:57:18,047 : INFO : EPOCH 1 - PROGRESS: at 38.35% examples, 361244 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:57:19,080 : INFO : EPOCH 1 - PROGRESS: at 38.52% examples, 360927 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:57:20,084 : INFO : EPOCH 1 - PROGRESS: at 38.73% examples, 361018 words/s, in_qsiz

2018-12-17 13:58:26,322 : INFO : EPOCH 1 - PROGRESS: at 52.00% examples, 361160 words/s, in_qsize 1, out_qsize 0
2018-12-17 13:58:27,324 : INFO : EPOCH 1 - PROGRESS: at 52.20% examples, 361169 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:58:28,340 : INFO : EPOCH 1 - PROGRESS: at 52.40% examples, 361161 words/s, in_qsize 1, out_qsize 0
2018-12-17 13:58:29,346 : INFO : EPOCH 1 - PROGRESS: at 52.58% examples, 361026 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:58:30,373 : INFO : EPOCH 1 - PROGRESS: at 52.80% examples, 361172 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:58:31,394 : INFO : EPOCH 1 - PROGRESS: at 53.00% examples, 361009 words/s, in_qsize 1, out_qsize 0
2018-12-17 13:58:32,549 : INFO : EPOCH 1 - PROGRESS: at 53.22% examples, 360959 words/s, in_qsize 0, out_qsize 1
2018-12-17 13:58:33,554 : INFO : EPOCH 1 - PROGRESS: at 53.43% examples, 361065 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:58:34,559 : INFO : EPOCH 1 - PROGRESS: at 53.61% examples, 360937 words/s, in_qsiz

2018-12-17 13:59:41,060 : INFO : EPOCH 1 - PROGRESS: at 67.31% examples, 360497 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:59:42,093 : INFO : EPOCH 1 - PROGRESS: at 67.53% examples, 360554 words/s, in_qsize 0, out_qsize 1
2018-12-17 13:59:43,120 : INFO : EPOCH 1 - PROGRESS: at 67.73% examples, 360483 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:59:44,127 : INFO : EPOCH 1 - PROGRESS: at 67.98% examples, 360627 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:59:45,142 : INFO : EPOCH 1 - PROGRESS: at 68.25% examples, 360734 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:59:46,166 : INFO : EPOCH 1 - PROGRESS: at 68.49% examples, 360742 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:59:47,192 : INFO : EPOCH 1 - PROGRESS: at 68.69% examples, 360647 words/s, in_qsize 0, out_qsize 0
2018-12-17 13:59:48,205 : INFO : EPOCH 1 - PROGRESS: at 68.91% examples, 360748 words/s, in_qsize 1, out_qsize 0
2018-12-17 13:59:49,221 : INFO : EPOCH 1 - PROGRESS: at 69.11% examples, 360690 words/s, in_qsiz

2018-12-17 14:00:55,652 : INFO : EPOCH 1 - PROGRESS: at 82.68% examples, 361083 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:00:56,667 : INFO : EPOCH 1 - PROGRESS: at 82.88% examples, 361083 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:00:57,695 : INFO : EPOCH 1 - PROGRESS: at 83.06% examples, 361047 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:00:58,711 : INFO : EPOCH 1 - PROGRESS: at 83.29% examples, 361126 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:00:59,741 : INFO : EPOCH 1 - PROGRESS: at 83.48% examples, 361087 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:01:00,763 : INFO : EPOCH 1 - PROGRESS: at 83.68% examples, 361054 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:01:01,782 : INFO : EPOCH 1 - PROGRESS: at 83.90% examples, 361092 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:01:02,792 : INFO : EPOCH 1 - PROGRESS: at 84.09% examples, 361071 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:01:03,842 : INFO : EPOCH 1 - PROGRESS: at 84.32% examples, 361083 words/s, in_qsiz

2018-12-17 14:02:10,328 : INFO : EPOCH 1 - PROGRESS: at 98.00% examples, 360660 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:02:11,349 : INFO : EPOCH 1 - PROGRESS: at 98.21% examples, 360671 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:02:12,361 : INFO : EPOCH 1 - PROGRESS: at 98.43% examples, 360759 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:02:13,384 : INFO : EPOCH 1 - PROGRESS: at 98.61% examples, 360692 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:02:14,399 : INFO : EPOCH 1 - PROGRESS: at 98.82% examples, 360723 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:02:15,399 : INFO : EPOCH 1 - PROGRESS: at 99.04% examples, 360786 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:02:16,425 : INFO : EPOCH 1 - PROGRESS: at 99.23% examples, 360751 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:02:17,495 : INFO : EPOCH 1 - PROGRESS: at 99.44% examples, 360689 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:02:18,511 : INFO : EPOCH 1 - PROGRESS: at 99.68% examples, 360758 words/s, in_qsiz

2018-12-17 14:03:18,397 : INFO : EPOCH 2 - PROGRESS: at 10.98% examples, 376097 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:03:19,417 : INFO : EPOCH 2 - PROGRESS: at 11.15% examples, 375115 words/s, in_qsize 0, out_qsize 2
2018-12-17 14:03:20,446 : INFO : EPOCH 2 - PROGRESS: at 11.37% examples, 375936 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:03:21,453 : INFO : EPOCH 2 - PROGRESS: at 11.56% examples, 375560 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:03:22,455 : INFO : EPOCH 2 - PROGRESS: at 11.77% examples, 376070 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:03:23,468 : INFO : EPOCH 2 - PROGRESS: at 11.97% examples, 375809 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:03:24,478 : INFO : EPOCH 2 - PROGRESS: at 12.16% examples, 375390 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:03:25,483 : INFO : EPOCH 2 - PROGRESS: at 12.37% examples, 376006 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:03:26,541 : INFO : EPOCH 2 - PROGRESS: at 12.57% examples, 375618 words/s, in_qsiz

2018-12-17 14:04:32,805 : INFO : EPOCH 2 - PROGRESS: at 25.37% examples, 370091 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:04:33,807 : INFO : EPOCH 2 - PROGRESS: at 25.50% examples, 369266 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:04:34,837 : INFO : EPOCH 2 - PROGRESS: at 25.68% examples, 368929 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:04:35,868 : INFO : EPOCH 2 - PROGRESS: at 25.83% examples, 368188 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:04:36,880 : INFO : EPOCH 2 - PROGRESS: at 26.00% examples, 367914 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:04:37,891 : INFO : EPOCH 2 - PROGRESS: at 26.16% examples, 367321 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:04:38,904 : INFO : EPOCH 2 - PROGRESS: at 26.34% examples, 367056 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:04:39,961 : INFO : EPOCH 2 - PROGRESS: at 26.54% examples, 366919 words/s, in_qsize 1, out_qsize 1
2018-12-17 14:04:40,970 : INFO : EPOCH 2 - PROGRESS: at 26.75% examples, 367127 words/s, in_qsiz

2018-12-17 14:05:47,424 : INFO : EPOCH 2 - PROGRESS: at 40.19% examples, 367887 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:05:48,475 : INFO : EPOCH 2 - PROGRESS: at 40.39% examples, 367813 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:05:49,510 : INFO : EPOCH 2 - PROGRESS: at 40.58% examples, 367731 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:05:50,528 : INFO : EPOCH 2 - PROGRESS: at 40.81% examples, 367889 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:05:51,538 : INFO : EPOCH 2 - PROGRESS: at 40.99% examples, 367718 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:05:52,600 : INFO : EPOCH 2 - PROGRESS: at 41.22% examples, 367894 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:05:53,610 : INFO : EPOCH 2 - PROGRESS: at 41.41% examples, 367810 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:05:54,654 : INFO : EPOCH 2 - PROGRESS: at 41.62% examples, 367792 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:05:55,655 : INFO : EPOCH 2 - PROGRESS: at 41.85% examples, 367941 words/s, in_qsiz

2018-12-17 14:07:02,107 : INFO : EPOCH 2 - PROGRESS: at 55.45% examples, 367815 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:07:03,109 : INFO : EPOCH 2 - PROGRESS: at 55.66% examples, 367767 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:07:04,113 : INFO : EPOCH 2 - PROGRESS: at 55.94% examples, 368147 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:07:05,120 : INFO : EPOCH 2 - PROGRESS: at 56.19% examples, 368355 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:07:06,128 : INFO : EPOCH 2 - PROGRESS: at 56.39% examples, 368233 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:07:07,146 : INFO : EPOCH 2 - PROGRESS: at 56.62% examples, 368353 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:07:08,179 : INFO : EPOCH 2 - PROGRESS: at 56.82% examples, 368237 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:07:09,218 : INFO : EPOCH 2 - PROGRESS: at 57.04% examples, 368230 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:07:10,225 : INFO : EPOCH 2 - PROGRESS: at 57.26% examples, 368331 words/s, in_qsiz

2018-12-17 14:08:16,544 : INFO : EPOCH 2 - PROGRESS: at 70.05% examples, 362002 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:08:17,549 : INFO : EPOCH 2 - PROGRESS: at 70.25% examples, 362082 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:08:18,558 : INFO : EPOCH 2 - PROGRESS: at 70.45% examples, 362006 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:08:19,583 : INFO : EPOCH 2 - PROGRESS: at 70.67% examples, 362091 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:08:20,586 : INFO : EPOCH 2 - PROGRESS: at 70.85% examples, 362016 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:08:21,607 : INFO : EPOCH 2 - PROGRESS: at 71.04% examples, 361921 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:08:22,621 : INFO : EPOCH 2 - PROGRESS: at 71.27% examples, 362041 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:08:23,641 : INFO : EPOCH 2 - PROGRESS: at 71.45% examples, 361946 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:08:24,649 : INFO : EPOCH 2 - PROGRESS: at 71.68% examples, 362025 words/s, in_qsiz

2018-12-17 14:09:30,828 : INFO : EPOCH 2 - PROGRESS: at 85.15% examples, 361875 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:09:31,851 : INFO : EPOCH 2 - PROGRESS: at 85.38% examples, 361931 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:09:32,854 : INFO : EPOCH 2 - PROGRESS: at 85.57% examples, 361853 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:09:33,880 : INFO : EPOCH 2 - PROGRESS: at 85.76% examples, 361747 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:09:34,916 : INFO : EPOCH 2 - PROGRESS: at 86.00% examples, 361809 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:09:35,929 : INFO : EPOCH 2 - PROGRESS: at 86.19% examples, 361697 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:09:36,971 : INFO : EPOCH 2 - PROGRESS: at 86.42% examples, 361715 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:09:37,982 : INFO : EPOCH 2 - PROGRESS: at 86.61% examples, 361607 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:09:38,989 : INFO : EPOCH 2 - PROGRESS: at 86.80% examples, 361502 words/s, in_qsiz

2018-12-17 14:10:44,377 : INFO : worker thread finished; awaiting finish of 6 more threads
2018-12-17 14:10:44,378 : INFO : worker thread finished; awaiting finish of 5 more threads
2018-12-17 14:10:44,380 : INFO : worker thread finished; awaiting finish of 4 more threads
2018-12-17 14:10:44,381 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-12-17 14:10:44,382 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-12-17 14:10:44,386 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-12-17 14:10:44,400 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-12-17 14:10:44,401 : INFO : EPOCH - 2 : training on 194929232 raw words (181806835 effective words) took 504.3s, 360485 effective words/s
2018-12-17 14:10:45,414 : INFO : EPOCH 3 - PROGRESS: at 0.18% examples, 369526 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:10:46,437 : INFO : EPOCH 3 - PROGRESS: at 0.39% examples, 393647 words/s, in_qsize 0, out_qsiz

2018-12-17 14:11:54,193 : INFO : EPOCH 3 - PROGRESS: at 13.43% examples, 381503 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:11:55,203 : INFO : EPOCH 3 - PROGRESS: at 13.66% examples, 382399 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:11:56,228 : INFO : EPOCH 3 - PROGRESS: at 13.85% examples, 382136 words/s, in_qsize 1, out_qsize 1
2018-12-17 14:11:57,249 : INFO : EPOCH 3 - PROGRESS: at 14.06% examples, 382382 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:11:58,262 : INFO : EPOCH 3 - PROGRESS: at 14.25% examples, 381928 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:11:59,271 : INFO : EPOCH 3 - PROGRESS: at 14.43% examples, 381615 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:12:00,283 : INFO : EPOCH 3 - PROGRESS: at 14.64% examples, 381993 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:12:01,289 : INFO : EPOCH 3 - PROGRESS: at 14.83% examples, 381600 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:12:02,306 : INFO : EPOCH 3 - PROGRESS: at 15.06% examples, 382138 words/s, in_qsiz

2018-12-17 14:13:08,667 : INFO : EPOCH 3 - PROGRESS: at 28.00% examples, 374873 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:13:09,683 : INFO : EPOCH 3 - PROGRESS: at 28.18% examples, 374550 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:13:10,708 : INFO : EPOCH 3 - PROGRESS: at 28.41% examples, 374652 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:13:11,716 : INFO : EPOCH 3 - PROGRESS: at 28.60% examples, 374488 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:13:12,756 : INFO : EPOCH 3 - PROGRESS: at 28.80% examples, 374367 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:13:13,764 : INFO : EPOCH 3 - PROGRESS: at 29.01% examples, 374385 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:13:14,775 : INFO : EPOCH 3 - PROGRESS: at 29.19% examples, 374152 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:13:15,777 : INFO : EPOCH 3 - PROGRESS: at 29.41% examples, 374318 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:13:16,794 : INFO : EPOCH 3 - PROGRESS: at 29.59% examples, 374131 words/s, in_qsiz

2018-12-17 14:14:23,568 : INFO : EPOCH 3 - PROGRESS: at 42.11% examples, 364076 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:14:24,577 : INFO : EPOCH 3 - PROGRESS: at 42.26% examples, 363670 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:14:25,611 : INFO : EPOCH 3 - PROGRESS: at 42.46% examples, 363569 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:14:26,622 : INFO : EPOCH 3 - PROGRESS: at 42.66% examples, 363511 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:14:27,645 : INFO : EPOCH 3 - PROGRESS: at 42.84% examples, 363292 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:14:28,648 : INFO : EPOCH 3 - PROGRESS: at 43.06% examples, 363450 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:14:29,655 : INFO : EPOCH 3 - PROGRESS: at 43.22% examples, 363218 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:14:30,680 : INFO : EPOCH 3 - PROGRESS: at 43.41% examples, 363012 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:14:31,719 : INFO : EPOCH 3 - PROGRESS: at 43.58% examples, 362708 words/s, in_qsiz

2018-12-17 14:15:37,958 : INFO : EPOCH 3 - PROGRESS: at 56.00% examples, 356623 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:15:38,988 : INFO : EPOCH 3 - PROGRESS: at 56.25% examples, 356735 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:15:40,013 : INFO : EPOCH 3 - PROGRESS: at 56.48% examples, 356851 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:15:41,034 : INFO : EPOCH 3 - PROGRESS: at 56.68% examples, 356851 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:15:42,045 : INFO : EPOCH 3 - PROGRESS: at 56.88% examples, 356830 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:15:43,048 : INFO : EPOCH 3 - PROGRESS: at 57.10% examples, 356907 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:15:44,074 : INFO : EPOCH 3 - PROGRESS: at 57.29% examples, 356802 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:15:45,132 : INFO : EPOCH 3 - PROGRESS: at 57.49% examples, 356755 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:15:46,159 : INFO : EPOCH 3 - PROGRESS: at 57.72% examples, 356904 words/s, in_qsiz

2018-12-17 14:16:52,945 : INFO : EPOCH 3 - PROGRESS: at 71.55% examples, 357560 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:16:53,949 : INFO : EPOCH 3 - PROGRESS: at 71.77% examples, 357653 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:16:54,957 : INFO : EPOCH 3 - PROGRESS: at 71.97% examples, 357639 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:16:55,961 : INFO : EPOCH 3 - PROGRESS: at 72.19% examples, 357707 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:16:56,969 : INFO : EPOCH 3 - PROGRESS: at 72.39% examples, 357718 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:16:57,998 : INFO : EPOCH 3 - PROGRESS: at 72.57% examples, 357583 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:16:59,026 : INFO : EPOCH 3 - PROGRESS: at 72.82% examples, 357775 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:17:00,050 : INFO : EPOCH 3 - PROGRESS: at 73.02% examples, 357750 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:17:01,060 : INFO : EPOCH 3 - PROGRESS: at 73.23% examples, 357757 words/s, in_qsiz

2018-12-17 14:18:07,646 : INFO : EPOCH 3 - PROGRESS: at 87.07% examples, 358979 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:18:08,757 : INFO : EPOCH 3 - PROGRESS: at 87.30% examples, 359007 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:18:09,760 : INFO : EPOCH 3 - PROGRESS: at 87.52% examples, 359061 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:18:10,777 : INFO : EPOCH 3 - PROGRESS: at 87.72% examples, 359021 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:18:11,777 : INFO : EPOCH 3 - PROGRESS: at 87.94% examples, 359100 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:18:12,793 : INFO : EPOCH 3 - PROGRESS: at 88.13% examples, 359080 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:18:13,821 : INFO : EPOCH 3 - PROGRESS: at 88.33% examples, 359028 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:18:14,840 : INFO : EPOCH 3 - PROGRESS: at 88.55% examples, 359114 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:18:15,857 : INFO : EPOCH 3 - PROGRESS: at 88.76% examples, 359116 words/s, in_qsiz

2018-12-17 14:19:14,519 : INFO : EPOCH 4 - PROGRESS: at 0.71% examples, 357322 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:19:15,542 : INFO : EPOCH 4 - PROGRESS: at 0.93% examples, 370773 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:19:16,559 : INFO : EPOCH 4 - PROGRESS: at 1.10% examples, 366597 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:19:17,561 : INFO : EPOCH 4 - PROGRESS: at 1.30% examples, 373611 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:19:18,566 : INFO : EPOCH 4 - PROGRESS: at 1.48% examples, 373782 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:19:19,582 : INFO : EPOCH 4 - PROGRESS: at 1.68% examples, 376530 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:19:20,585 : INFO : EPOCH 4 - PROGRESS: at 1.86% examples, 375890 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:19:21,591 : INFO : EPOCH 4 - PROGRESS: at 2.03% examples, 372573 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:19:22,618 : INFO : EPOCH 4 - PROGRESS: at 2.25% examples, 376995 words/s, in_qsize 0, out_

2018-12-17 14:20:30,152 : INFO : EPOCH 4 - PROGRESS: at 13.45% examples, 334738 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:20:31,164 : INFO : EPOCH 4 - PROGRESS: at 13.67% examples, 335850 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:20:32,200 : INFO : EPOCH 4 - PROGRESS: at 13.86% examples, 336055 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:20:33,212 : INFO : EPOCH 4 - PROGRESS: at 14.05% examples, 336427 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:20:34,234 : INFO : EPOCH 4 - PROGRESS: at 14.26% examples, 337203 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:20:35,240 : INFO : EPOCH 4 - PROGRESS: at 14.45% examples, 337480 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:20:36,241 : INFO : EPOCH 4 - PROGRESS: at 14.66% examples, 338378 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:20:37,260 : INFO : EPOCH 4 - PROGRESS: at 14.84% examples, 338376 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:20:38,264 : INFO : EPOCH 4 - PROGRESS: at 15.04% examples, 338661 words/s, in_qsiz

2018-12-17 14:21:44,739 : INFO : EPOCH 4 - PROGRESS: at 28.29% examples, 353864 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:21:45,742 : INFO : EPOCH 4 - PROGRESS: at 28.48% examples, 353789 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:21:46,744 : INFO : EPOCH 4 - PROGRESS: at 28.70% examples, 354195 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:21:47,753 : INFO : EPOCH 4 - PROGRESS: at 28.89% examples, 354169 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:21:48,754 : INFO : EPOCH 4 - PROGRESS: at 29.08% examples, 354157 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:21:49,775 : INFO : EPOCH 4 - PROGRESS: at 29.30% examples, 354390 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:21:50,781 : INFO : EPOCH 4 - PROGRESS: at 29.50% examples, 354489 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:21:51,911 : INFO : EPOCH 4 - PROGRESS: at 29.71% examples, 354411 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:21:52,912 : INFO : EPOCH 4 - PROGRESS: at 29.93% examples, 354682 words/s, in_qsiz

2018-12-17 14:22:59,339 : INFO : EPOCH 4 - PROGRESS: at 43.17% examples, 356999 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:23:00,354 : INFO : EPOCH 4 - PROGRESS: at 43.39% examples, 357194 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:23:01,356 : INFO : EPOCH 4 - PROGRESS: at 43.58% examples, 357098 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:23:02,366 : INFO : EPOCH 4 - PROGRESS: at 43.78% examples, 357075 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:23:03,398 : INFO : EPOCH 4 - PROGRESS: at 44.00% examples, 357298 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:23:04,407 : INFO : EPOCH 4 - PROGRESS: at 44.20% examples, 357316 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:23:05,416 : INFO : EPOCH 4 - PROGRESS: at 44.42% examples, 357409 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:23:06,417 : INFO : EPOCH 4 - PROGRESS: at 44.61% examples, 357462 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:23:07,426 : INFO : EPOCH 4 - PROGRESS: at 44.80% examples, 357397 words/s, in_qsiz

2018-12-17 14:24:13,891 : INFO : EPOCH 4 - PROGRESS: at 58.50% examples, 359561 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:24:14,910 : INFO : EPOCH 4 - PROGRESS: at 58.72% examples, 359650 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:24:15,937 : INFO : EPOCH 4 - PROGRESS: at 58.92% examples, 359573 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:24:16,995 : INFO : EPOCH 4 - PROGRESS: at 59.13% examples, 359523 words/s, in_qsize 2, out_qsize 1
2018-12-17 14:24:18,028 : INFO : EPOCH 4 - PROGRESS: at 59.35% examples, 359533 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:24:19,040 : INFO : EPOCH 4 - PROGRESS: at 59.56% examples, 359478 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:24:20,046 : INFO : EPOCH 4 - PROGRESS: at 59.79% examples, 359582 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:24:21,064 : INFO : EPOCH 4 - PROGRESS: at 60.00% examples, 359548 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:24:22,097 : INFO : EPOCH 4 - PROGRESS: at 60.20% examples, 359492 words/s, in_qsiz

2018-12-17 14:25:28,377 : INFO : EPOCH 4 - PROGRESS: at 73.27% examples, 356747 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:25:29,405 : INFO : EPOCH 4 - PROGRESS: at 73.49% examples, 356791 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:25:30,415 : INFO : EPOCH 4 - PROGRESS: at 73.69% examples, 356779 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:25:31,421 : INFO : EPOCH 4 - PROGRESS: at 73.89% examples, 356753 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:25:32,451 : INFO : EPOCH 4 - PROGRESS: at 74.11% examples, 356868 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:25:33,468 : INFO : EPOCH 4 - PROGRESS: at 74.32% examples, 356845 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:25:34,536 : INFO : EPOCH 4 - PROGRESS: at 74.54% examples, 356874 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:25:35,541 : INFO : EPOCH 4 - PROGRESS: at 74.74% examples, 356891 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:25:36,547 : INFO : EPOCH 4 - PROGRESS: at 74.93% examples, 356832 words/s, in_qsiz

2018-12-17 14:26:43,076 : INFO : EPOCH 4 - PROGRESS: at 88.58% examples, 357496 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:26:44,096 : INFO : EPOCH 4 - PROGRESS: at 88.77% examples, 357440 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:26:45,164 : INFO : EPOCH 4 - PROGRESS: at 88.96% examples, 357325 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:26:46,173 : INFO : EPOCH 4 - PROGRESS: at 89.19% examples, 357415 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:26:47,174 : INFO : EPOCH 4 - PROGRESS: at 89.39% examples, 357399 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:26:48,181 : INFO : EPOCH 4 - PROGRESS: at 89.61% examples, 357448 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:26:49,205 : INFO : EPOCH 4 - PROGRESS: at 89.81% examples, 357411 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:26:50,232 : INFO : EPOCH 4 - PROGRESS: at 90.00% examples, 357352 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:26:51,246 : INFO : EPOCH 4 - PROGRESS: at 90.22% examples, 357402 words/s, in_qsiz

2018-12-17 14:27:50,111 : INFO : EPOCH 5 - PROGRESS: at 1.80% examples, 362048 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:27:51,123 : INFO : EPOCH 5 - PROGRESS: at 2.00% examples, 363240 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:27:52,129 : INFO : EPOCH 5 - PROGRESS: at 2.20% examples, 366680 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:27:53,179 : INFO : EPOCH 5 - PROGRESS: at 2.39% examples, 365385 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:27:54,184 : INFO : EPOCH 5 - PROGRESS: at 2.62% examples, 370811 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:27:55,190 : INFO : EPOCH 5 - PROGRESS: at 2.80% examples, 370546 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:27:56,286 : INFO : EPOCH 5 - PROGRESS: at 2.99% examples, 370639 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:27:57,294 : INFO : EPOCH 5 - PROGRESS: at 3.20% examples, 373634 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:27:58,300 : INFO : EPOCH 5 - PROGRESS: at 3.39% examples, 373753 words/s, in_qsize 0, out_

2018-12-17 14:29:04,649 : INFO : EPOCH 5 - PROGRESS: at 16.36% examples, 380475 words/s, in_qsize 1, out_qsize 1
2018-12-17 14:29:05,649 : INFO : EPOCH 5 - PROGRESS: at 16.57% examples, 380691 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:29:06,665 : INFO : EPOCH 5 - PROGRESS: at 16.77% examples, 380415 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:29:07,685 : INFO : EPOCH 5 - PROGRESS: at 16.99% examples, 380644 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:29:08,700 : INFO : EPOCH 5 - PROGRESS: at 17.18% examples, 380494 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:29:09,703 : INFO : EPOCH 5 - PROGRESS: at 17.37% examples, 380157 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:29:10,706 : INFO : EPOCH 5 - PROGRESS: at 17.58% examples, 380676 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:29:11,725 : INFO : EPOCH 5 - PROGRESS: at 17.76% examples, 380299 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:29:12,751 : INFO : EPOCH 5 - PROGRESS: at 17.99% examples, 380697 words/s, in_qsiz

2018-12-17 14:30:19,080 : INFO : EPOCH 5 - PROGRESS: at 31.82% examples, 384349 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:30:20,084 : INFO : EPOCH 5 - PROGRESS: at 32.05% examples, 384552 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:30:21,128 : INFO : EPOCH 5 - PROGRESS: at 32.19% examples, 383791 words/s, in_qsize 6, out_qsize 0
2018-12-17 14:30:22,162 : INFO : EPOCH 5 - PROGRESS: at 32.35% examples, 383116 words/s, in_qsize 13, out_qsize 0
2018-12-17 14:30:23,164 : INFO : EPOCH 5 - PROGRESS: at 32.54% examples, 382818 words/s, in_qsize 14, out_qsize 0
2018-12-17 14:30:24,169 : INFO : EPOCH 5 - PROGRESS: at 32.71% examples, 382349 words/s, in_qsize 14, out_qsize 1
2018-12-17 14:30:25,171 : INFO : EPOCH 5 - PROGRESS: at 32.90% examples, 382119 words/s, in_qsize 14, out_qsize 0
2018-12-17 14:30:26,177 : INFO : EPOCH 5 - PROGRESS: at 33.06% examples, 381654 words/s, in_qsize 9, out_qsize 0
2018-12-17 14:30:27,224 : INFO : EPOCH 5 - PROGRESS: at 33.24% examples, 381332 words/s, in_

2018-12-17 14:31:33,329 : INFO : EPOCH 5 - PROGRESS: at 48.20% examples, 388749 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:31:34,335 : INFO : EPOCH 5 - PROGRESS: at 48.45% examples, 388996 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:31:35,346 : INFO : EPOCH 5 - PROGRESS: at 48.67% examples, 388984 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:31:36,397 : INFO : EPOCH 5 - PROGRESS: at 48.89% examples, 388916 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:31:37,402 : INFO : EPOCH 5 - PROGRESS: at 49.15% examples, 389191 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:31:38,416 : INFO : EPOCH 5 - PROGRESS: at 49.39% examples, 389302 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:31:39,423 : INFO : EPOCH 5 - PROGRESS: at 49.61% examples, 389304 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:31:40,527 : INFO : EPOCH 5 - PROGRESS: at 49.85% examples, 389273 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:31:41,544 : INFO : EPOCH 5 - PROGRESS: at 50.10% examples, 389377 words/s, in_qsiz

2018-12-17 14:32:48,266 : INFO : EPOCH 5 - PROGRESS: at 64.38% examples, 386518 words/s, in_qsize 15, out_qsize 0
2018-12-17 14:32:49,271 : INFO : EPOCH 5 - PROGRESS: at 64.60% examples, 386588 words/s, in_qsize 9, out_qsize 0
2018-12-17 14:32:50,285 : INFO : EPOCH 5 - PROGRESS: at 64.75% examples, 386228 words/s, in_qsize 14, out_qsize 1
2018-12-17 14:32:51,351 : INFO : EPOCH 5 - PROGRESS: at 64.95% examples, 386016 words/s, in_qsize 14, out_qsize 0
2018-12-17 14:32:52,356 : INFO : EPOCH 5 - PROGRESS: at 65.14% examples, 385877 words/s, in_qsize 15, out_qsize 1
2018-12-17 14:32:53,383 : INFO : EPOCH 5 - PROGRESS: at 65.41% examples, 386193 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:32:54,383 : INFO : EPOCH 5 - PROGRESS: at 65.67% examples, 386365 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:32:55,411 : INFO : EPOCH 5 - PROGRESS: at 65.90% examples, 386355 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:32:56,424 : INFO : EPOCH 5 - PROGRESS: at 66.14% examples, 386452 words/s, in_

2018-12-17 14:34:03,078 : INFO : EPOCH 5 - PROGRESS: at 80.54% examples, 385460 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:34:04,079 : INFO : EPOCH 5 - PROGRESS: at 80.74% examples, 385446 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:34:05,093 : INFO : EPOCH 5 - PROGRESS: at 80.95% examples, 385445 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:34:06,094 : INFO : EPOCH 5 - PROGRESS: at 81.21% examples, 385587 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:34:07,117 : INFO : EPOCH 5 - PROGRESS: at 81.43% examples, 385583 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:34:08,117 : INFO : EPOCH 5 - PROGRESS: at 81.65% examples, 385644 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:34:09,176 : INFO : EPOCH 5 - PROGRESS: at 81.90% examples, 385687 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:34:10,216 : INFO : EPOCH 5 - PROGRESS: at 82.16% examples, 385831 words/s, in_qsize 0, out_qsize 1
2018-12-17 14:34:11,229 : INFO : EPOCH 5 - PROGRESS: at 82.38% examples, 385787 words/s, in_qsiz

2018-12-17 14:35:17,770 : INFO : EPOCH 5 - PROGRESS: at 97.36% examples, 386776 words/s, in_qsize 1, out_qsize 0
2018-12-17 14:35:18,782 : INFO : EPOCH 5 - PROGRESS: at 97.58% examples, 386796 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:35:19,784 : INFO : EPOCH 5 - PROGRESS: at 97.79% examples, 386827 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:35:20,789 : INFO : EPOCH 5 - PROGRESS: at 98.04% examples, 386957 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:35:21,792 : INFO : EPOCH 5 - PROGRESS: at 98.28% examples, 387060 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:35:22,793 : INFO : EPOCH 5 - PROGRESS: at 98.51% examples, 387088 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:35:23,909 : INFO : EPOCH 5 - PROGRESS: at 98.76% examples, 387161 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:35:24,921 : INFO : EPOCH 5 - PROGRESS: at 99.01% examples, 387300 words/s, in_qsize 0, out_qsize 0
2018-12-17 14:35:25,930 : INFO : EPOCH 5 - PROGRESS: at 99.22% examples, 387298 words/s, in_qsiz

In [10]:
model = Word2Vec.load(MODEL_PATH)

2018-12-17 17:21:11,160 : INFO : loading Word2Vec object from F:/for learn/Python/NLP_in_Action/chapter-7/word2vec/zhwiki.word2vec
2018-12-17 17:21:13,311 : INFO : loading wv recursively from F:/for learn/Python/NLP_in_Action/chapter-7/word2vec/zhwiki.word2vec.wv.* with mmap=None
2018-12-17 17:21:13,312 : INFO : loading vectors from F:/for learn/Python/NLP_in_Action/chapter-7/word2vec/zhwiki.word2vec.wv.vectors.npy with mmap=None
2018-12-17 17:21:13,649 : INFO : setting ignored attribute vectors_norm to None
2018-12-17 17:21:13,651 : INFO : loading vocabulary recursively from F:/for learn/Python/NLP_in_Action/chapter-7/word2vec/zhwiki.word2vec.vocabulary.* with mmap=None
2018-12-17 17:21:13,652 : INFO : loading trainables recursively from F:/for learn/Python/NLP_in_Action/chapter-7/word2vec/zhwiki.word2vec.trainables.* with mmap=None
2018-12-17 17:21:13,653 : INFO : loading syn1neg from F:/for learn/Python/NLP_in_Action/chapter-7/word2vec/zhwiki.word2vec.trainables.syn1neg.npy with mma

In [15]:
print(model.wv.similarity("西红柿", "苹果"))
print(model.wv.similarity("西红柿", "番茄"))

0.39551502
0.53664094


  if np.issubdtype(vec.dtype, np.int):


In [16]:
print(model.wv.most_similar("中国"))

2018-12-17 17:24:04,251 : INFO : precomputing L2-norms of word weight vectors


[('我国', 0.7526365518569946), ('欧洲', 0.663559079170227), ('中华人民共和国', 0.6560628414154053), ('韩国', 0.6495850086212158), ('日本', 0.640602707862854), ('越南', 0.6335252523422241), ('北韩', 0.627105712890625), ('朝鲜', 0.6238020062446594), ('外国', 0.623470664024353), ('美国', 0.6215987801551819)]


  if np.issubdtype(vec.dtype, np.int):
