# Analysis of 220k Monster job postings

 - data used is from Kaggle
 - uses skills-ml pipeline
 - trains embedding model
 - trains occupation classifier

# House-keeping

In [1]:
# general
import os
import json
import copy
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import logging
from retrying import Retrying
from io import BytesIO
from itertools import chain, islice
from typing import Dict, Text, Any, Generator
import pickle

# common schema
from skills_ml.job_postings.raw.virginia import VirginiaTransformer

# embedding
from skills_ml.algorithms.embedding.models import Word2VecModel, FastTextModel
from skills_ml.job_postings.corpora import Word2VecGensimCorpusCreator, Doc2VecGensimCorpusCreator
from skills_ml.algorithms.preprocessing import IterablePipeline
from skills_ml.algorithms import nlp
from functools import partial
from skills_ml.algorithms.embedding.train import EmbeddingTrainer

# occupation classifier
from skills_ml.storage import FSStore, open_sesame
from skills_ml.ontologies.onet import Onet
from skills_ml.algorithms.occupation_classifiers import FullSOC, SOCMajorGroup
from skills_ml.utils import itershuffle
from skills_ml.algorithms.occupation_classifiers import DesignMatrix
from skills_ml.algorithms.occupation_classifiers.train import OccupationClassifierTrainer
from skills_ml.algorithms.occupation_classifiers.test import OccupationClassifierTester
from skills_ml.evaluation.occ_cls_evaluator import OnetOccupationClassificationEvaluator
from skills_ml.algorithms.occupation_classifiers.classifiers import CombinedClassifier
from skills_ml.algorithms.embedding.train import Reiterable
from skills_ml.job_postings.corpora import SimpleCorpusCreator
from skills_ml.job_postings.corpora import Doc2VecGensimCorpusCreator, CorpusCreator

# skill extraction
from skills_ml.algorithms.skill_extractors import SkillEndingPatternExtractor
from skills_ml.job_postings.common_schema import JobPostingCollectionSample
from skills_ml.algorithms.skill_extractors import ExactMatchSkillExtractor
from skills_ml.algorithms.skill_extractors import SocScopedExactMatchSkillExtractor


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


# Load job posting data

In [16]:
file_name = 'indeed_finance_energy.csv' 
monster_220k = pd.read_csv(file_name).reset_index(drop=True)
monster_220k.head()

Unnamed: 0,country,country_code,date_added,has_expired,job_board,job_description,job_title,job_type,location,organization,page_url,salary,sector,uniq_id
0,United States of America,US,,No,jobs.monster.com,TeamSoft is seeing an IT Support Specialist to...,IT Support Technician Job in Madison,Full Time Employee,"Madison, WI 53702",,http://jobview.monster.com/it-support-technici...,,IT/Software Development,11d599f229a80023d2f40e7c52cd941e
1,United States of America,US,,No,jobs.monster.com,The Wisconsin State Journal is seeking a flexi...,Business Reporter/Editor Job in Madison,Full Time,"Madison, WI 53708",Printing and Publishing,http://jobview.monster.com/business-reporter-e...,,,e4cbb126dabf22159aff90223243ff2a
2,United States of America,US,,No,jobs.monster.com,Report this job About the Job DePuy Synthes Co...,Johnson & Johnson Family of Companies Job Appl...,"Full Time, Employee",DePuy Synthes Companies is a member of Johnson...,Personal and Household Services,http://jobview.monster.com/senior-training-lea...,,,839106b353877fa3d896ffb9c1fe01c0
3,United States of America,US,,No,jobs.monster.com,Why Join Altec? If you’re considering a career...,Engineer - Quality Job in Dixon,Full Time,"Dixon, CA",Altec Industries,http://jobview.monster.com/engineer-quality-jo...,,Experienced (Non-Manager),58435fcab804439efdcaa7ecca0fd783
4,United States of America,US,,No,jobs.monster.com,Position ID# 76162 # Positions 1 State CT C...,Shift Supervisor - Part-Time Job in Camphill,Full Time Employee,"Camphill, PA",Retail,http://jobview.monster.com/shift-supervisor-pa...,,Project/Program Management,64d0272dc8496abfd9523a8df63c184c


# Parse into job posting schema

In [17]:
fname = '50_sample.json'
f = open(fname, 'r')
data = f.read().split('\n')[0]
schema = json.loads(data)
s50 = f.read().split('\n')[0:50]

In [18]:
df_json = []
for i in range(monster_220k.shape[0]):
    occupation = copy.deepcopy(schema)
    occupation['hiringOrganization']['location'] = monster_220k['location'][i]
    occupation['title'] = monster_220k['job_title'][i]
    occupation['jobDescription'] = monster_220k['job_description'][i].split('\n')
    df_json.append(occupation)

In [1]:
with open("monster_220k.json", "w") as write_file:
    for i in df_json:
        json.dump(i, write_file)
        write_file.write('\n')

NameError: name 'df_json' is not defined

In [19]:
## reads json data file

JobPostingType = Dict[Text, Any]
JobPostingGeneratorType = Generator[JobPostingType, None, None]
MetadataType = Dict[Text, Dict[Text, Any]]

class monsterParser(object):
    def __init__(self):
        fname = 'monster_220k.json'
        f = open(fname, 'r')
        self.lines = f.read().split('\n')
        self.transformer = VirginiaTransformer(partner_id = 'VA')
        
    def __iter__(self) -> JobPostingGeneratorType:
        for line in self.lines:
            if line:
                yield self.transformer._transform(json.loads(line))
    
    @property
    def metadata(self) -> MetadataType:
        return {'job postings': {
            'downloaded_from': 'jobs.monster.com',
            'month': '2020-08',
            'purpose': 'monster_analysis'
        }}


In [20]:
job_postings = monsterParser()

# Embedding model

In [21]:
cbow = Word2VecModel(size=200, sg=0, window=7, iter=3, batch_words=1000)
skip_gram = Word2VecModel(size=200, sg=1, window=7, iter=3, batch_words=1000)
fasttext = FastTextModel(size=200, window=7, iter=3, batch_words=1000) 

w2v_corpus_generator = Word2VecGensimCorpusCreator(job_postings)

In [22]:
trainer = EmbeddingTrainer(cbow, skip_gram, fasttext, batch_size=100)
trainer.train(w2v_corpus_generator)

2020-08-02 00:47:41,277 : INFO : Training batch #0 
2020-08-02 00:47:41,278 : INFO : collecting all words and their counts
2020-08-02 00:47:41,279 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:47:41,281 : INFO : collected 879 word types from a corpus of 14733 raw words and 100 sentences
2020-08-02 00:47:41,282 : INFO : Loading a fresh vocabulary
2020-08-02 00:47:41,283 : INFO : effective_min_count=5 retains 169 unique words (19% of original 879, drops 710)
2020-08-02 00:47:41,284 : INFO : effective_min_count=5 leaves 13666 word corpus (92% of original 14733, drops 1067)
2020-08-02 00:47:41,286 : INFO : deleting the raw counts dictionary of 879 items
2020-08-02 00:47:41,287 : INFO : sample=0.001 downsamples 87 most-common words
2020-08-02 00:47:41,287 : INFO : downsampling leaves estimated 5581 word corpus (40.8% of prior 13666)
2020-08-02 00:47:41,289 : INFO : estimated required memory for 169 words and 200 dimensions: 354900 bytes
2020-08-02 

2020-08-02 00:47:43,460 : INFO : sample=0.001 downsamples 172 most-common words
2020-08-02 00:47:43,461 : INFO : downsampling leaves estimated 11515 word corpus (83.5% of prior 13783)
2020-08-02 00:47:43,462 : INFO : estimated required memory for 374 words and 200 dimensions: 785400 bytes
2020-08-02 00:47:43,463 : INFO : updating layer weights
2020-08-02 00:47:43,474 : INFO : training model with 3 workers on 240 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:47:43,487 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:43,488 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:43,489 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:43,491 : INFO : EPOCH - 1 : training on 14967 raw words (5874 effective words) took 0.0s, 521704 effective words/s
2020-08-02 00:47:43,504 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02

2020-08-02 00:47:43,940 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:43,941 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:43,941 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:43,942 : INFO : EPOCH - 1 : training on 15234 raw words (5918 effective words) took 0.0s, 695627 effective words/s
2020-08-02 00:47:43,953 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:43,953 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:43,954 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:43,955 : INFO : EPOCH - 2 : training on 15234 raw words (6019 effective words) took 0.0s, 724396 effective words/s
2020-08-02 00:47:43,966 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:43,967 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-

2020-08-02 00:47:44,394 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:44,395 : INFO : EPOCH - 2 : training on 15311 raw words (6153 effective words) took 0.0s, 667219 effective words/s
2020-08-02 00:47:44,407 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:44,408 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:44,409 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:44,409 : INFO : EPOCH - 3 : training on 15311 raw words (6159 effective words) took 0.0s, 830249 effective words/s
2020-08-02 00:47:44,410 : INFO : training on a 45933 raw words (18444 effective words) took 0.0s, 451795 effective words/s
2020-08-02 00:47:44,412 : INFO : collecting all words and their counts
2020-08-02 00:47:44,413 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:47:44,415 : INFO : collected 940 word types from a corpus of 1531

2020-08-02 00:47:44,862 : INFO : training on a 42951 raw words (16552 effective words) took 0.0s, 421475 effective words/s
2020-08-02 00:47:44,862 : INFO : collecting all words and their counts
2020-08-02 00:47:44,863 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:47:44,865 : INFO : collected 853 word types from a corpus of 14317 raw words and 100 sentences
2020-08-02 00:47:44,866 : INFO : Updating model with new vocabulary
2020-08-02 00:47:44,867 : INFO : New added 158 unique words (15% of original 1011) and increased the count of 158 pre-existing words (15% of original 1011)
2020-08-02 00:47:44,869 : INFO : deleting the raw counts dictionary of 853 items
2020-08-02 00:47:44,870 : INFO : sample=0.001 downsamples 166 most-common words
2020-08-02 00:47:44,870 : INFO : downsampling leaves estimated 10720 word corpus (80.4% of prior 13331)
2020-08-02 00:47:44,872 : INFO : estimated required memory for 316 words and 200 dimensions: 663600 bytes
202

2020-08-02 00:47:45,393 : INFO : deleting the raw counts dictionary of 987 items
2020-08-02 00:47:45,394 : INFO : sample=0.001 downsamples 168 most-common words
2020-08-02 00:47:45,394 : INFO : downsampling leaves estimated 11107 word corpus (81.7% of prior 13598)
2020-08-02 00:47:45,396 : INFO : estimated required memory for 366 words and 200 dimensions: 768600 bytes
2020-08-02 00:47:45,397 : INFO : updating layer weights
2020-08-02 00:47:45,403 : INFO : training model with 3 workers on 416 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:47:45,427 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:45,428 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:45,429 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:45,429 : INFO : EPOCH - 1 : training on 14814 raw words (5670 effective words) took 0.0s, 238735 effective words/s
2020-08-02 00:47:45,

2020-08-02 00:47:45,852 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:45,853 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:45,854 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:45,854 : INFO : EPOCH - 1 : training on 14614 raw words (5429 effective words) took 0.0s, 266002 effective words/s
2020-08-02 00:47:45,877 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:45,878 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:45,879 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:45,880 : INFO : EPOCH - 2 : training on 14614 raw words (5551 effective words) took 0.0s, 275783 effective words/s
2020-08-02 00:47:45,901 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:45,902 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-

2020-08-02 00:47:46,305 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:46,306 : INFO : EPOCH - 2 : training on 15241 raw words (5867 effective words) took 0.0s, 273860 effective words/s
2020-08-02 00:47:46,327 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:46,329 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:46,331 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:46,331 : INFO : EPOCH - 3 : training on 15241 raw words (5985 effective words) took 0.0s, 269003 effective words/s
2020-08-02 00:47:46,332 : INFO : training on a 45723 raw words (17666 effective words) took 0.1s, 228536 effective words/s
2020-08-02 00:47:46,333 : INFO : collecting all words and their counts
2020-08-02 00:47:46,334 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:47:46,336 : INFO : collected 1218 word types from a corpus of 152

2020-08-02 00:47:46,760 : INFO : training on a 43047 raw words (16591 effective words) took 0.1s, 198287 effective words/s
2020-08-02 00:47:46,762 : INFO : collecting all words and their counts
2020-08-02 00:47:46,767 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:47:46,770 : INFO : collected 879 word types from a corpus of 14349 raw words and 100 sentences
2020-08-02 00:47:46,771 : INFO : Updating model with new vocabulary
2020-08-02 00:47:46,772 : INFO : New added 160 unique words (15% of original 1039) and increased the count of 160 pre-existing words (15% of original 1039)
2020-08-02 00:47:46,774 : INFO : deleting the raw counts dictionary of 879 items
2020-08-02 00:47:46,775 : INFO : sample=0.001 downsamples 170 most-common words
2020-08-02 00:47:46,776 : INFO : downsampling leaves estimated 10826 word corpus (81.0% of prior 13362)
2020-08-02 00:47:46,782 : INFO : estimated required memory for 507 words, 6733 buckets and 200 dimensions: 65

2020-08-02 00:47:47,186 : INFO : deleting the raw counts dictionary of 841 items
2020-08-02 00:47:47,186 : INFO : sample=0.001 downsamples 168 most-common words
2020-08-02 00:47:47,187 : INFO : downsampling leaves estimated 10721 word corpus (80.6% of prior 13307)
2020-08-02 00:47:47,193 : INFO : estimated required memory for 527 words, 6893 buckets and 200 dimensions: 6740964 bytes
2020-08-02 00:47:47,195 : INFO : updating layer weights
2020-08-02 00:47:47,211 : INFO : Number of new ngrams is 160
2020-08-02 00:47:47,217 : INFO : training model with 3 workers on 527 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:47:47,251 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:47,252 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:47,253 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:47,254 : INFO : EPOCH - 1 : training on 14264 raw words (55

2020-08-02 00:47:47,692 : INFO : training model with 3 workers on 562 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:47:47,728 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:47,730 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:47,731 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:47,732 : INFO : EPOCH - 1 : training on 14940 raw words (5837 effective words) took 0.0s, 161823 effective words/s
2020-08-02 00:47:47,765 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:47,766 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:47,768 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:47,769 : INFO : EPOCH - 2 : training on 14940 raw words (5866 effective words) took 0.0s, 176254 effective words/s
2020-08-02 00:47:47,800 : INFO : worker th

2020-08-02 00:47:48,894 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:48,896 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:48,899 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:48,900 : INFO : EPOCH - 2 : training on 40630 raw words (26584 effective words) took 0.1s, 202974 effective words/s
2020-08-02 00:47:49,057 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:49,058 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:49,061 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:49,062 : INFO : EPOCH - 3 : training on 40630 raw words (26734 effective words) took 0.2s, 168680 effective words/s
2020-08-02 00:47:49,062 : INFO : training on a 121890 raw words (79974 effective words) took 0.4s, 185091 effective words/s
2020-08-02 00:47:49,416 : INFO : Training batch #12 
2020-08-02 

2020-08-02 00:47:50,561 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:50,561 : INFO : EPOCH - 3 : training on 45532 raw words (28804 effective words) took 0.2s, 175444 effective words/s
2020-08-02 00:47:50,562 : INFO : training on a 136596 raw words (86713 effective words) took 0.5s, 162209 effective words/s
2020-08-02 00:47:50,949 : INFO : Training batch #13 
2020-08-02 00:47:50,950 : INFO : collecting all words and their counts
2020-08-02 00:47:50,950 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:47:50,958 : INFO : collected 4628 word types from a corpus of 43892 raw words and 100 sentences
2020-08-02 00:47:50,958 : INFO : Updating model with new vocabulary
2020-08-02 00:47:50,962 : INFO : New added 1192 unique words (20% of original 5820) and increased the count of 1192 pre-existing words (20% of original 5820)
2020-08-02 00:47:50,970 : INFO : deleting the raw counts dictionary of 4628 items
2020-08-02 

2020-08-02 00:47:52,460 : INFO : collected 4867 word types from a corpus of 48299 raw words and 100 sentences
2020-08-02 00:47:52,461 : INFO : Updating model with new vocabulary
2020-08-02 00:47:52,466 : INFO : New added 1282 unique words (20% of original 6149) and increased the count of 1282 pre-existing words (20% of original 6149)
2020-08-02 00:47:52,474 : INFO : deleting the raw counts dictionary of 4867 items
2020-08-02 00:47:52,475 : INFO : sample=0.001 downsamples 146 most-common words
2020-08-02 00:47:52,475 : INFO : downsampling leaves estimated 61215 word corpus (143.9% of prior 42537)
2020-08-02 00:47:52,480 : INFO : estimated required memory for 2564 words and 200 dimensions: 5384400 bytes
2020-08-02 00:47:52,481 : INFO : updating layer weights
2020-08-02 00:47:52,489 : INFO : training model with 3 workers on 2220 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:47:52,523 : INFO : worker thread finished; awaiting finish of 2 more t

2020-08-02 00:47:54,108 : INFO : estimated required memory for 2408 words and 200 dimensions: 5056800 bytes
2020-08-02 00:47:54,109 : INFO : updating layer weights
2020-08-02 00:47:54,124 : INFO : training model with 3 workers on 2403 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:47:54,189 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:54,191 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:54,192 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:54,193 : INFO : EPOCH - 1 : training on 45809 raw words (30625 effective words) took 0.1s, 512799 effective words/s
2020-08-02 00:47:54,250 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:54,253 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:54,253 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-

2020-08-02 00:47:55,839 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:55,841 : INFO : EPOCH - 1 : training on 45890 raw words (31852 effective words) took 0.1s, 351968 effective words/s
2020-08-02 00:47:55,887 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:55,889 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:55,890 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:55,891 : INFO : EPOCH - 2 : training on 45890 raw words (31712 effective words) took 0.0s, 709524 effective words/s
2020-08-02 00:47:55,932 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:55,934 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:55,937 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:55,939 : INFO : EPOCH - 3 : training on 45890 raw words (31743 effective words) 

2020-08-02 00:47:57,535 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:47:57,536 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:47:57,536 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:47:57,537 : INFO : EPOCH - 3 : training on 48858 raw words (33791 effective words) took 0.0s, 1033581 effective words/s
2020-08-02 00:47:57,538 : INFO : training on a 146574 raw words (101285 effective words) took 0.1s, 901534 effective words/s
2020-08-02 00:47:57,540 : INFO : collecting all words and their counts
2020-08-02 00:47:57,541 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:47:57,547 : INFO : collected 3993 word types from a corpus of 48858 raw words and 100 sentences
2020-08-02 00:47:57,548 : INFO : Updating model with new vocabulary
2020-08-02 00:47:57,551 : INFO : New added 1122 unique words (21% of original 5115) and increased the count of 1122 pre-

2020-08-02 00:47:59,160 : INFO : collecting all words and their counts
2020-08-02 00:47:59,161 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:47:59,167 : INFO : collected 4059 word types from a corpus of 47495 raw words and 100 sentences
2020-08-02 00:47:59,168 : INFO : Updating model with new vocabulary
2020-08-02 00:47:59,170 : INFO : New added 1125 unique words (21% of original 5184) and increased the count of 1125 pre-existing words (21% of original 5184)
2020-08-02 00:47:59,177 : INFO : deleting the raw counts dictionary of 4059 items
2020-08-02 00:47:59,178 : INFO : sample=0.001 downsamples 148 most-common words
2020-08-02 00:47:59,179 : INFO : downsampling leaves estimated 61851 word corpus (145.1% of prior 42634)
2020-08-02 00:47:59,183 : INFO : estimated required memory for 2250 words and 200 dimensions: 4725000 bytes
2020-08-02 00:47:59,184 : INFO : updating layer weights
2020-08-02 00:47:59,195 : INFO : training model with 3 workers 

2020-08-02 00:48:00,787 : INFO : deleting the raw counts dictionary of 4514 items
2020-08-02 00:48:00,789 : INFO : sample=0.001 downsamples 138 most-common words
2020-08-02 00:48:00,790 : INFO : downsampling leaves estimated 64233 word corpus (146.6% of prior 43822)
2020-08-02 00:48:00,795 : INFO : estimated required memory for 2468 words and 200 dimensions: 5182800 bytes
2020-08-02 00:48:00,796 : INFO : updating layer weights
2020-08-02 00:48:00,807 : INFO : training model with 3 workers on 2782 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:48:00,936 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:00,939 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:00,941 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:00,942 : INFO : EPOCH - 1 : training on 49210 raw words (33980 effective words) took 0.1s, 260848 effective words/s
2020-08-02 00:

2020-08-02 00:48:02,712 : INFO : training model with 3 workers on 2849 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:48:02,836 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:02,838 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:02,842 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:02,843 : INFO : EPOCH - 1 : training on 47450 raw words (32860 effective words) took 0.1s, 257196 effective words/s
2020-08-02 00:48:02,958 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:02,960 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:02,964 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:02,965 : INFO : EPOCH - 2 : training on 47450 raw words (32923 effective words) took 0.1s, 277741 effective words/s
2020-08-02 00:48:03,082 : INFO : worker

2020-08-02 00:48:04,721 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:04,723 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:04,729 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:04,732 : INFO : EPOCH - 2 : training on 57009 raw words (40197 effective words) took 0.2s, 198672 effective words/s
2020-08-02 00:48:04,961 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:04,963 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:04,966 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:04,967 : INFO : EPOCH - 3 : training on 57009 raw words (40390 effective words) took 0.2s, 177532 effective words/s
2020-08-02 00:48:04,967 : INFO : training on a 171027 raw words (120787 effective words) took 0.6s, 205896 effective words/s
2020-08-02 00:48:04,968 : INFO : collecting all words and their

2020-08-02 00:48:07,120 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:07,123 : INFO : EPOCH - 3 : training on 58667 raw words (40670 effective words) took 0.1s, 288312 effective words/s
2020-08-02 00:48:07,124 : INFO : training on a 176001 raw words (122157 effective words) took 0.5s, 256764 effective words/s
2020-08-02 00:48:07,125 : INFO : collecting all words and their counts
2020-08-02 00:48:07,127 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:48:07,136 : INFO : collected 5509 word types from a corpus of 58667 raw words and 100 sentences
2020-08-02 00:48:07,137 : INFO : Updating model with new vocabulary
2020-08-02 00:48:07,140 : INFO : New added 1532 unique words (21% of original 7041) and increased the count of 1532 pre-existing words (21% of original 7041)
2020-08-02 00:48:07,149 : INFO : deleting the raw counts dictionary of 5509 items
2020-08-02 00:48:07,150 : INFO : sample=0.001 downsamples 114 m

2020-08-02 00:48:09,048 : INFO : collected 4435 word types from a corpus of 49680 raw words and 100 sentences
2020-08-02 00:48:09,051 : INFO : Updating model with new vocabulary
2020-08-02 00:48:09,054 : INFO : New added 1211 unique words (21% of original 5646) and increased the count of 1211 pre-existing words (21% of original 5646)
2020-08-02 00:48:09,063 : INFO : deleting the raw counts dictionary of 4435 items
2020-08-02 00:48:09,065 : INFO : sample=0.001 downsamples 144 most-common words
2020-08-02 00:48:09,065 : INFO : downsampling leaves estimated 65104 word corpus (146.1% of prior 44574)
2020-08-02 00:48:09,107 : INFO : estimated required memory for 3237 words, 27164 buckets and 200 dimensions: 29270764 bytes
2020-08-02 00:48:09,109 : INFO : updating layer weights
2020-08-02 00:48:09,166 : INFO : Number of new ngrams is 701
2020-08-02 00:48:09,192 : INFO : training model with 3 workers on 3237 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08

2020-08-02 00:48:10,997 : INFO : estimated required memory for 3287 words, 27462 buckets and 200 dimensions: 29624292 bytes
2020-08-02 00:48:11,000 : INFO : updating layer weights
2020-08-02 00:48:11,063 : INFO : Number of new ngrams is 298
2020-08-02 00:48:11,072 : INFO : training model with 3 workers on 3287 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:48:11,265 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:11,266 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:11,270 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:11,271 : INFO : EPOCH - 1 : training on 47318 raw words (32792 effective words) took 0.2s, 169858 effective words/s
2020-08-02 00:48:11,459 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:11,464 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:11,46

2020-08-02 00:48:13,091 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:13,094 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:13,094 : INFO : EPOCH - 1 : training on 51255 raw words (35377 effective words) took 0.2s, 181380 effective words/s
2020-08-02 00:48:13,295 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:13,298 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:13,300 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:13,301 : INFO : EPOCH - 2 : training on 51255 raw words (35409 effective words) took 0.2s, 175468 effective words/s
2020-08-02 00:48:13,490 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:13,494 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:13,496 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-0

2020-08-02 00:48:15,006 : INFO : EPOCH - 2 : training on 48653 raw words (33238 effective words) took 0.2s, 183868 effective words/s
2020-08-02 00:48:15,186 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:15,189 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:15,195 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:15,195 : INFO : EPOCH - 3 : training on 48653 raw words (33143 effective words) took 0.2s, 177795 effective words/s
2020-08-02 00:48:15,196 : INFO : training on a 145959 raw words (99542 effective words) took 0.6s, 177847 effective words/s
2020-08-02 00:48:15,662 : INFO : Training batch #27 
2020-08-02 00:48:15,663 : INFO : collecting all words and their counts
2020-08-02 00:48:15,663 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:48:15,672 : INFO : collected 4820 word types from a corpus of 53528 raw words and 100 sentences
2020

2020-08-02 00:48:17,021 : INFO : training on a 160584 raw words (112341 effective words) took 0.6s, 181469 effective words/s
2020-08-02 00:48:17,467 : INFO : Training batch #28 
2020-08-02 00:48:17,467 : INFO : collecting all words and their counts
2020-08-02 00:48:17,468 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:48:17,476 : INFO : collected 4859 word types from a corpus of 47311 raw words and 100 sentences
2020-08-02 00:48:17,476 : INFO : Updating model with new vocabulary
2020-08-02 00:48:17,479 : INFO : New added 1231 unique words (20% of original 6090) and increased the count of 1231 pre-existing words (20% of original 6090)
2020-08-02 00:48:17,487 : INFO : deleting the raw counts dictionary of 4859 items
2020-08-02 00:48:17,489 : INFO : sample=0.001 downsamples 152 most-common words
2020-08-02 00:48:17,489 : INFO : downsampling leaves estimated 59852 word corpus (144.3% of prior 41485)
2020-08-02 00:48:17,495 : INFO : estimated requir

2020-08-02 00:48:19,135 : INFO : New added 1223 unique words (20% of original 5875) and increased the count of 1223 pre-existing words (20% of original 5875)
2020-08-02 00:48:19,144 : INFO : deleting the raw counts dictionary of 4652 items
2020-08-02 00:48:19,146 : INFO : sample=0.001 downsamples 162 most-common words
2020-08-02 00:48:19,146 : INFO : downsampling leaves estimated 57839 word corpus (142.7% of prior 40522)
2020-08-02 00:48:19,152 : INFO : estimated required memory for 2446 words and 200 dimensions: 5136600 bytes
2020-08-02 00:48:19,152 : INFO : updating layer weights
2020-08-02 00:48:19,160 : INFO : training model with 3 workers on 3735 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:48:19,193 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:19,194 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:19,195 : INFO : worker thread finished; awaiting finish of 0 mo

2020-08-02 00:48:20,937 : INFO : updating layer weights
2020-08-02 00:48:20,944 : INFO : training model with 3 workers on 3789 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:48:20,978 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:20,980 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:20,980 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:20,982 : INFO : EPOCH - 1 : training on 49747 raw words (34730 effective words) took 0.0s, 1021678 effective words/s
2020-08-02 00:48:21,016 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:21,018 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:21,018 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:21,019 : INFO : EPOCH - 2 : training on 49747 raw words (34607 effective words) took 0.0s, 1022913

2020-08-02 00:48:22,751 : INFO : EPOCH - 1 : training on 50089 raw words (34421 effective words) took 0.0s, 999619 effective words/s
2020-08-02 00:48:22,787 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:22,789 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:22,790 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:22,792 : INFO : EPOCH - 2 : training on 50089 raw words (34480 effective words) took 0.0s, 927152 effective words/s
2020-08-02 00:48:22,826 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:22,828 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:22,828 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:22,829 : INFO : EPOCH - 3 : training on 50089 raw words (34464 effective words) took 0.0s, 1053239 effective words/s
2020-08-02 00:48:22,829 : INFO : training on a 150267 

2020-08-02 00:48:24,706 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:24,707 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:24,707 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:24,708 : INFO : EPOCH - 3 : training on 52911 raw words (36916 effective words) took 0.0s, 1031098 effective words/s
2020-08-02 00:48:24,709 : INFO : training on a 158733 raw words (110587 effective words) took 0.1s, 875656 effective words/s
2020-08-02 00:48:24,710 : INFO : collecting all words and their counts
2020-08-02 00:48:24,711 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:48:24,718 : INFO : collected 5104 word types from a corpus of 52911 raw words and 100 sentences
2020-08-02 00:48:24,719 : INFO : Updating model with new vocabulary
2020-08-02 00:48:24,722 : INFO : New added 1327 unique words (20% of original 6431) and increased the count of 1327 pre-

2020-08-02 00:48:26,560 : INFO : collecting all words and their counts
2020-08-02 00:48:26,561 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:48:26,567 : INFO : collected 4826 word types from a corpus of 47642 raw words and 100 sentences
2020-08-02 00:48:26,568 : INFO : Updating model with new vocabulary
2020-08-02 00:48:26,572 : INFO : New added 1245 unique words (20% of original 6071) and increased the count of 1245 pre-existing words (20% of original 6071)
2020-08-02 00:48:26,579 : INFO : deleting the raw counts dictionary of 4826 items
2020-08-02 00:48:26,580 : INFO : sample=0.001 downsamples 152 most-common words
2020-08-02 00:48:26,581 : INFO : downsampling leaves estimated 60217 word corpus (144.1% of prior 41786)
2020-08-02 00:48:26,587 : INFO : estimated required memory for 2490 words and 200 dimensions: 5229000 bytes
2020-08-02 00:48:26,588 : INFO : updating layer weights
2020-08-02 00:48:26,594 : INFO : training model with 3 workers 

2020-08-02 00:48:28,575 : INFO : deleting the raw counts dictionary of 3600 items
2020-08-02 00:48:28,576 : INFO : sample=0.001 downsamples 172 most-common words
2020-08-02 00:48:28,577 : INFO : downsampling leaves estimated 57215 word corpus (144.6% of prior 39573)
2020-08-02 00:48:28,587 : INFO : estimated required memory for 2156 words and 200 dimensions: 4527600 bytes
2020-08-02 00:48:28,588 : INFO : updating layer weights
2020-08-02 00:48:28,604 : INFO : training model with 3 workers on 4053 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:48:28,749 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:28,753 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:28,755 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:28,757 : INFO : EPOCH - 1 : training on 43675 raw words (30865 effective words) took 0.1s, 210184 effective words/s
2020-08-02 00:

2020-08-02 00:48:30,548 : INFO : training model with 3 workers on 4087 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:48:30,675 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:30,676 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:30,679 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:30,680 : INFO : EPOCH - 1 : training on 43486 raw words (30069 effective words) took 0.1s, 236809 effective words/s
2020-08-02 00:48:30,818 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:30,821 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:30,824 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:30,826 : INFO : EPOCH - 2 : training on 43486 raw words (29972 effective words) took 0.1s, 210258 effective words/s
2020-08-02 00:48:30,943 : INFO : worker

2020-08-02 00:48:32,713 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:32,716 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:32,718 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:32,719 : INFO : EPOCH - 2 : training on 54142 raw words (38468 effective words) took 0.1s, 273760 effective words/s
2020-08-02 00:48:32,859 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:32,863 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:32,864 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:32,865 : INFO : EPOCH - 3 : training on 54142 raw words (38457 effective words) took 0.1s, 270899 effective words/s
2020-08-02 00:48:32,865 : INFO : training on a 162426 raw words (115392 effective words) took 0.4s, 258185 effective words/s
2020-08-02 00:48:32,866 : INFO : collecting all words and their

2020-08-02 00:48:34,810 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:34,811 : INFO : EPOCH - 3 : training on 46027 raw words (32147 effective words) took 0.1s, 259717 effective words/s
2020-08-02 00:48:34,812 : INFO : training on a 138081 raw words (96311 effective words) took 0.4s, 248983 effective words/s
2020-08-02 00:48:34,813 : INFO : collecting all words and their counts
2020-08-02 00:48:34,814 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:48:34,820 : INFO : collected 4779 word types from a corpus of 46027 raw words and 100 sentences
2020-08-02 00:48:34,821 : INFO : Updating model with new vocabulary
2020-08-02 00:48:34,824 : INFO : New added 1241 unique words (20% of original 6020) and increased the count of 1241 pre-existing words (20% of original 6020)
2020-08-02 00:48:34,832 : INFO : deleting the raw counts dictionary of 4779 items
2020-08-02 00:48:34,833 : INFO : sample=0.001 downsamples 170 mo

2020-08-02 00:48:36,683 : INFO : collected 4042 word types from a corpus of 46786 raw words and 100 sentences
2020-08-02 00:48:36,683 : INFO : Updating model with new vocabulary
2020-08-02 00:48:36,686 : INFO : New added 1217 unique words (23% of original 5259) and increased the count of 1217 pre-existing words (23% of original 5259)
2020-08-02 00:48:36,694 : INFO : deleting the raw counts dictionary of 4042 items
2020-08-02 00:48:36,695 : INFO : sample=0.001 downsamples 150 most-common words
2020-08-02 00:48:36,695 : INFO : downsampling leaves estimated 61058 word corpus (145.6% of prior 41950)
2020-08-02 00:48:36,750 : INFO : estimated required memory for 4344 words, 34293 buckets and 200 dimensions: 37557912 bytes
2020-08-02 00:48:36,751 : INFO : updating layer weights
2020-08-02 00:48:36,827 : INFO : Number of new ngrams is 546
2020-08-02 00:48:36,838 : INFO : training model with 3 workers on 4344 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08

2020-08-02 00:48:38,648 : INFO : estimated required memory for 4377 words, 34503 buckets and 200 dimensions: 37803788 bytes
2020-08-02 00:48:38,650 : INFO : updating layer weights
2020-08-02 00:48:38,717 : INFO : Number of new ngrams is 210
2020-08-02 00:48:38,728 : INFO : training model with 3 workers on 4377 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:48:38,898 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:38,903 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:38,904 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:38,905 : INFO : EPOCH - 1 : training on 46520 raw words (32463 effective words) took 0.2s, 188852 effective words/s
2020-08-02 00:48:39,083 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:39,085 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:39,08

2020-08-02 00:48:40,623 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:40,624 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:40,625 : INFO : EPOCH - 1 : training on 46559 raw words (33296 effective words) took 0.2s, 196328 effective words/s
2020-08-02 00:48:40,805 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:40,806 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:40,807 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:40,808 : INFO : EPOCH - 2 : training on 46559 raw words (33312 effective words) took 0.2s, 184875 effective words/s
2020-08-02 00:48:40,983 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:40,985 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:40,986 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-0

2020-08-02 00:48:42,571 : INFO : EPOCH - 2 : training on 47100 raw words (33353 effective words) took 0.2s, 186988 effective words/s
2020-08-02 00:48:42,759 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:42,764 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:42,766 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:42,767 : INFO : EPOCH - 3 : training on 47100 raw words (33526 effective words) took 0.2s, 174266 effective words/s
2020-08-02 00:48:42,768 : INFO : training on a 141300 raw words (100331 effective words) took 0.6s, 177607 effective words/s
2020-08-02 00:48:43,249 : INFO : Training batch #42 
2020-08-02 00:48:43,249 : INFO : collecting all words and their counts
2020-08-02 00:48:43,250 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:48:43,258 : INFO : collected 4425 word types from a corpus of 44783 raw words and 100 sentences
202

2020-08-02 00:48:44,811 : INFO : training on a 134349 raw words (93943 effective words) took 0.7s, 125319 effective words/s
2020-08-02 00:48:45,309 : INFO : Training batch #43 
2020-08-02 00:48:45,310 : INFO : collecting all words and their counts
2020-08-02 00:48:45,311 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:48:45,318 : INFO : collected 4112 word types from a corpus of 43027 raw words and 100 sentences
2020-08-02 00:48:45,319 : INFO : Updating model with new vocabulary
2020-08-02 00:48:45,322 : INFO : New added 1142 unique words (21% of original 5254) and increased the count of 1142 pre-existing words (21% of original 5254)
2020-08-02 00:48:45,329 : INFO : deleting the raw counts dictionary of 4112 items
2020-08-02 00:48:45,330 : INFO : sample=0.001 downsamples 198 most-common words
2020-08-02 00:48:45,331 : INFO : downsampling leaves estimated 54909 word corpus (143.8% of prior 38179)
2020-08-02 00:48:45,338 : INFO : estimated require

2020-08-02 00:48:47,198 : INFO : New added 1208 unique words (21% of original 5715) and increased the count of 1208 pre-existing words (21% of original 5715)
2020-08-02 00:48:47,206 : INFO : deleting the raw counts dictionary of 4507 items
2020-08-02 00:48:47,207 : INFO : sample=0.001 downsamples 144 most-common words
2020-08-02 00:48:47,208 : INFO : downsampling leaves estimated 60254 word corpus (144.8% of prior 41600)
2020-08-02 00:48:47,215 : INFO : estimated required memory for 2416 words and 200 dimensions: 5073600 bytes
2020-08-02 00:48:47,216 : INFO : updating layer weights
2020-08-02 00:48:47,224 : INFO : training model with 3 workers on 4546 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:48:47,259 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:47,260 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:47,261 : INFO : worker thread finished; awaiting finish of 0 mo

2020-08-02 00:48:48,954 : INFO : updating layer weights
2020-08-02 00:48:48,962 : INFO : training model with 3 workers on 4589 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:48:48,995 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:48,996 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:48,996 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:48,997 : INFO : EPOCH - 1 : training on 47485 raw words (33611 effective words) took 0.0s, 1137490 effective words/s
2020-08-02 00:48:49,032 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:49,034 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:49,035 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:49,036 : INFO : EPOCH - 2 : training on 47485 raw words (33668 effective words) took 0.0s, 995126 

2020-08-02 00:48:51,013 : INFO : EPOCH - 1 : training on 47730 raw words (33812 effective words) took 0.0s, 958036 effective words/s
2020-08-02 00:48:51,050 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:51,051 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:51,052 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:51,053 : INFO : EPOCH - 2 : training on 47730 raw words (33927 effective words) took 0.0s, 981145 effective words/s
2020-08-02 00:48:51,090 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:51,091 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:51,092 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:51,093 : INFO : EPOCH - 3 : training on 47730 raw words (33896 effective words) took 0.0s, 956344 effective words/s
2020-08-02 00:48:51,093 : INFO : training on a 143190 r

2020-08-02 00:48:53,006 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:53,007 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:53,008 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:53,009 : INFO : EPOCH - 3 : training on 43111 raw words (30328 effective words) took 0.0s, 721037 effective words/s
2020-08-02 00:48:53,010 : INFO : training on a 129333 raw words (90813 effective words) took 0.1s, 645629 effective words/s
2020-08-02 00:48:53,011 : INFO : collecting all words and their counts
2020-08-02 00:48:53,012 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:48:53,020 : INFO : collected 3923 word types from a corpus of 43111 raw words and 100 sentences
2020-08-02 00:48:53,021 : INFO : Updating model with new vocabulary
2020-08-02 00:48:53,024 : INFO : New added 1098 unique words (21% of original 5021) and increased the count of 1098 pre-ex

2020-08-02 00:48:54,863 : INFO : collecting all words and their counts
2020-08-02 00:48:54,864 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:48:54,880 : INFO : collected 4402 word types from a corpus of 49691 raw words and 100 sentences
2020-08-02 00:48:54,881 : INFO : Updating model with new vocabulary
2020-08-02 00:48:54,888 : INFO : New added 1323 unique words (23% of original 5725) and increased the count of 1323 pre-existing words (23% of original 5725)
2020-08-02 00:48:54,905 : INFO : deleting the raw counts dictionary of 4402 items
2020-08-02 00:48:54,908 : INFO : sample=0.001 downsamples 126 most-common words
2020-08-02 00:48:54,910 : INFO : downsampling leaves estimated 65370 word corpus (147.4% of prior 44346)
2020-08-02 00:48:54,919 : INFO : estimated required memory for 2646 words and 200 dimensions: 5556600 bytes
2020-08-02 00:48:54,920 : INFO : updating layer weights
2020-08-02 00:48:54,931 : INFO : training model with 3 workers 

2020-08-02 00:48:56,828 : INFO : deleting the raw counts dictionary of 4231 items
2020-08-02 00:48:56,830 : INFO : sample=0.001 downsamples 160 most-common words
2020-08-02 00:48:56,831 : INFO : downsampling leaves estimated 59253 word corpus (145.5% of prior 40732)
2020-08-02 00:48:56,841 : INFO : estimated required memory for 2348 words and 200 dimensions: 4930800 bytes
2020-08-02 00:48:56,842 : INFO : updating layer weights
2020-08-02 00:48:56,851 : INFO : training model with 3 workers on 4671 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:48:56,971 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:56,972 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:56,974 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:56,976 : INFO : EPOCH - 1 : training on 45871 raw words (32658 effective words) took 0.1s, 273843 effective words/s
2020-08-02 00:

2020-08-02 00:48:58,607 : INFO : training model with 3 workers on 4703 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:48:58,719 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:58,721 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:58,722 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:58,723 : INFO : EPOCH - 1 : training on 42757 raw words (29723 effective words) took 0.1s, 281112 effective words/s
2020-08-02 00:48:58,854 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:48:58,855 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:48:58,856 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:48:58,857 : INFO : EPOCH - 2 : training on 42757 raw words (29703 effective words) took 0.1s, 230095 effective words/s
2020-08-02 00:48:58,980 : INFO : worker

2020-08-02 00:49:00,685 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:00,688 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:00,688 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:00,689 : INFO : EPOCH - 2 : training on 48633 raw words (34819 effective words) took 0.1s, 279460 effective words/s
2020-08-02 00:49:00,808 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:00,811 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:00,813 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:00,814 : INFO : EPOCH - 3 : training on 48633 raw words (34725 effective words) took 0.1s, 286171 effective words/s
2020-08-02 00:49:00,814 : INFO : training on a 145899 raw words (104286 effective words) took 0.4s, 268199 effective words/s
2020-08-02 00:49:00,815 : INFO : collecting all words and their

2020-08-02 00:49:02,939 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:02,940 : INFO : EPOCH - 3 : training on 49314 raw words (35472 effective words) took 0.1s, 256620 effective words/s
2020-08-02 00:49:02,940 : INFO : training on a 147942 raw words (106518 effective words) took 0.5s, 231773 effective words/s
2020-08-02 00:49:02,942 : INFO : collecting all words and their counts
2020-08-02 00:49:02,943 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:02,951 : INFO : collected 4468 word types from a corpus of 49314 raw words and 100 sentences
2020-08-02 00:49:02,952 : INFO : Updating model with new vocabulary
2020-08-02 00:49:02,956 : INFO : New added 1360 unique words (23% of original 5828) and increased the count of 1360 pre-existing words (23% of original 5828)
2020-08-02 00:49:02,967 : INFO : deleting the raw counts dictionary of 4468 items
2020-08-02 00:49:02,969 : INFO : sample=0.001 downsamples 142 m

2020-08-02 00:49:04,923 : INFO : collected 4590 word types from a corpus of 44680 raw words and 100 sentences
2020-08-02 00:49:04,924 : INFO : Updating model with new vocabulary
2020-08-02 00:49:04,927 : INFO : New added 1206 unique words (20% of original 5796) and increased the count of 1206 pre-existing words (20% of original 5796)
2020-08-02 00:49:04,934 : INFO : deleting the raw counts dictionary of 4590 items
2020-08-02 00:49:04,935 : INFO : sample=0.001 downsamples 170 most-common words
2020-08-02 00:49:04,936 : INFO : downsampling leaves estimated 56125 word corpus (143.4% of prior 39129)
2020-08-02 00:49:04,993 : INFO : estimated required memory for 4839 words, 37302 buckets and 200 dimensions: 41121612 bytes
2020-08-02 00:49:04,995 : INFO : updating layer weights
2020-08-02 00:49:05,073 : INFO : Number of new ngrams is 202
2020-08-02 00:49:05,084 : INFO : training model with 3 workers on 4839 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08

2020-08-02 00:49:06,783 : INFO : estimated required memory for 4888 words, 37577 buckets and 200 dimensions: 41455552 bytes
2020-08-02 00:49:06,785 : INFO : updating layer weights
2020-08-02 00:49:06,871 : INFO : Number of new ngrams is 275
2020-08-02 00:49:06,881 : INFO : training model with 3 workers on 4888 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:49:07,067 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:07,073 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:07,077 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:07,077 : INFO : EPOCH - 1 : training on 50858 raw words (36307 effective words) took 0.2s, 188488 effective words/s
2020-08-02 00:49:07,262 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:07,267 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:07,27

2020-08-02 00:49:08,969 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:08,975 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:08,976 : INFO : EPOCH - 1 : training on 49956 raw words (35699 effective words) took 0.2s, 162417 effective words/s
2020-08-02 00:49:09,167 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:09,171 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:09,177 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:09,178 : INFO : EPOCH - 2 : training on 49956 raw words (35625 effective words) took 0.2s, 179187 effective words/s
2020-08-02 00:49:09,374 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:09,377 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:09,387 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-0

2020-08-02 00:49:11,029 : INFO : EPOCH - 2 : training on 41669 raw words (28662 effective words) took 0.2s, 167092 effective words/s
2020-08-02 00:49:11,200 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:11,201 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:11,213 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:11,214 : INFO : EPOCH - 3 : training on 41669 raw words (28858 effective words) took 0.2s, 159469 effective words/s
2020-08-02 00:49:11,215 : INFO : training on a 125007 raw words (86279 effective words) took 0.5s, 162753 effective words/s
2020-08-02 00:49:11,800 : INFO : Training batch #57 
2020-08-02 00:49:11,801 : INFO : collecting all words and their counts
2020-08-02 00:49:11,802 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:11,813 : INFO : collected 4435 word types from a corpus of 61595 raw words and 100 sentences
2020

2020-08-02 00:49:13,616 : INFO : training on a 184785 raw words (129695 effective words) took 0.8s, 167366 effective words/s
2020-08-02 00:49:14,157 : INFO : Training batch #58 
2020-08-02 00:49:14,158 : INFO : collecting all words and their counts
2020-08-02 00:49:14,159 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:14,169 : INFO : collected 4426 word types from a corpus of 57482 raw words and 100 sentences
2020-08-02 00:49:14,170 : INFO : Updating model with new vocabulary
2020-08-02 00:49:14,173 : INFO : New added 1156 unique words (20% of original 5582) and increased the count of 1156 pre-existing words (20% of original 5582)
2020-08-02 00:49:14,181 : INFO : deleting the raw counts dictionary of 4426 items
2020-08-02 00:49:14,181 : INFO : sample=0.001 downsamples 112 most-common words
2020-08-02 00:49:14,182 : INFO : downsampling leaves estimated 74461 word corpus (142.3% of prior 52342)
2020-08-02 00:49:14,194 : INFO : estimated requir

2020-08-02 00:49:16,399 : INFO : New added 1211 unique words (21% of original 5554) and increased the count of 1211 pre-existing words (21% of original 5554)
2020-08-02 00:49:16,409 : INFO : deleting the raw counts dictionary of 4343 items
2020-08-02 00:49:16,410 : INFO : sample=0.001 downsamples 132 most-common words
2020-08-02 00:49:16,410 : INFO : downsampling leaves estimated 63380 word corpus (142.6% of prior 44453)
2020-08-02 00:49:16,419 : INFO : estimated required memory for 2422 words and 200 dimensions: 5086200 bytes
2020-08-02 00:49:16,420 : INFO : updating layer weights
2020-08-02 00:49:16,427 : INFO : training model with 3 workers on 5054 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:49:16,474 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:16,477 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:16,478 : INFO : worker thread finished; awaiting finish of 0 mo

2020-08-02 00:49:18,468 : INFO : updating layer weights
2020-08-02 00:49:18,477 : INFO : training model with 3 workers on 5081 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:49:18,525 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:18,528 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:18,528 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:18,530 : INFO : EPOCH - 1 : training on 42227 raw words (29154 effective words) took 0.0s, 594113 effective words/s
2020-08-02 00:49:18,569 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:18,570 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:18,571 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:18,572 : INFO : EPOCH - 2 : training on 42227 raw words (29155 effective words) took 0.0s, 802266 e

2020-08-02 00:49:20,209 : INFO : EPOCH - 1 : training on 44238 raw words (30835 effective words) took 0.0s, 791793 effective words/s
2020-08-02 00:49:20,245 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:20,246 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:20,247 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:20,248 : INFO : EPOCH - 2 : training on 44238 raw words (30806 effective words) took 0.0s, 894333 effective words/s
2020-08-02 00:49:20,287 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:20,288 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:20,289 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:20,290 : INFO : EPOCH - 3 : training on 44238 raw words (30903 effective words) took 0.0s, 800829 effective words/s
2020-08-02 00:49:20,291 : INFO : training on a 132714 r

2020-08-02 00:49:21,989 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:21,991 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:21,992 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:21,992 : INFO : EPOCH - 3 : training on 46203 raw words (32508 effective words) took 0.0s, 777764 effective words/s
2020-08-02 00:49:21,993 : INFO : training on a 138609 raw words (97438 effective words) took 0.1s, 753656 effective words/s
2020-08-02 00:49:21,994 : INFO : collecting all words and their counts
2020-08-02 00:49:21,994 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:22,001 : INFO : collected 4778 word types from a corpus of 46203 raw words and 100 sentences
2020-08-02 00:49:22,002 : INFO : Updating model with new vocabulary
2020-08-02 00:49:22,006 : INFO : New added 1228 unique words (20% of original 6006) and increased the count of 1228 pre-ex

2020-08-02 00:49:23,695 : INFO : collecting all words and their counts
2020-08-02 00:49:23,696 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:23,702 : INFO : collected 4427 word types from a corpus of 44701 raw words and 100 sentences
2020-08-02 00:49:23,702 : INFO : Updating model with new vocabulary
2020-08-02 00:49:23,705 : INFO : New added 1190 unique words (21% of original 5617) and increased the count of 1190 pre-existing words (21% of original 5617)
2020-08-02 00:49:23,713 : INFO : deleting the raw counts dictionary of 4427 items
2020-08-02 00:49:23,714 : INFO : sample=0.001 downsamples 184 most-common words
2020-08-02 00:49:23,717 : INFO : downsampling leaves estimated 55864 word corpus (142.1% of prior 39313)
2020-08-02 00:49:23,724 : INFO : estimated required memory for 2380 words and 200 dimensions: 4998000 bytes
2020-08-02 00:49:23,725 : INFO : updating layer weights
2020-08-02 00:49:23,733 : INFO : training model with 3 workers 

2020-08-02 00:49:25,323 : INFO : sample=0.001 downsamples 192 most-common words
2020-08-02 00:49:25,324 : INFO : downsampling leaves estimated 38998 word corpus (132.0% of prior 29535)
2020-08-02 00:49:25,332 : INFO : estimated required memory for 1718 words and 200 dimensions: 3607800 bytes
2020-08-02 00:49:25,333 : INFO : updating layer weights
2020-08-02 00:49:25,342 : INFO : training model with 3 workers on 5191 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:49:25,416 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:25,418 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:25,421 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:25,422 : INFO : EPOCH - 1 : training on 33824 raw words (22209 effective words) took 0.1s, 289659 effective words/s
2020-08-02 00:49:25,505 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-

2020-08-02 00:49:26,700 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:26,702 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:26,707 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:26,708 : INFO : EPOCH - 1 : training on 31796 raw words (20493 effective words) took 0.1s, 287709 effective words/s
2020-08-02 00:49:26,786 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:26,788 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:26,794 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:26,795 : INFO : EPOCH - 2 : training on 31796 raw words (20598 effective words) took 0.1s, 245772 effective words/s
2020-08-02 00:49:26,867 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:26,870 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-0

2020-08-02 00:49:28,093 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:28,094 : INFO : EPOCH - 2 : training on 32843 raw words (21229 effective words) took 0.1s, 259430 effective words/s
2020-08-02 00:49:28,174 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:28,176 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:28,177 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:28,178 : INFO : EPOCH - 3 : training on 32843 raw words (21182 effective words) took 0.1s, 263653 effective words/s
2020-08-02 00:49:28,179 : INFO : training on a 98529 raw words (63594 effective words) took 0.2s, 256729 effective words/s
2020-08-02 00:49:28,179 : INFO : collecting all words and their counts
2020-08-02 00:49:28,180 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:28,185 : INFO : collected 3328 word types from a corpus of 3

2020-08-02 00:49:29,527 : INFO : training on a 106932 raw words (71389 effective words) took 0.3s, 265660 effective words/s
2020-08-02 00:49:29,528 : INFO : collecting all words and their counts
2020-08-02 00:49:29,530 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:29,535 : INFO : collected 3696 word types from a corpus of 35644 raw words and 100 sentences
2020-08-02 00:49:29,536 : INFO : Updating model with new vocabulary
2020-08-02 00:49:29,539 : INFO : New added 913 unique words (19% of original 4609) and increased the count of 913 pre-existing words (19% of original 4609)
2020-08-02 00:49:29,545 : INFO : deleting the raw counts dictionary of 3696 items
2020-08-02 00:49:29,546 : INFO : sample=0.001 downsamples 194 most-common words
2020-08-02 00:49:29,547 : INFO : downsampling leaves estimated 41893 word corpus (134.5% of prior 31150)
2020-08-02 00:49:29,603 : INFO : estimated required memory for 5273 words, 40118 buckets and 200 dimensio

2020-08-02 00:49:30,994 : INFO : deleting the raw counts dictionary of 3502 items
2020-08-02 00:49:30,994 : INFO : sample=0.001 downsamples 190 most-common words
2020-08-02 00:49:30,995 : INFO : downsampling leaves estimated 39022 word corpus (130.6% of prior 29881)
2020-08-02 00:49:31,056 : INFO : estimated required memory for 5292 words, 40267 buckets and 200 dimensions: 44550056 bytes
2020-08-02 00:49:31,058 : INFO : updating layer weights
2020-08-02 00:49:31,140 : INFO : Number of new ngrams is 149
2020-08-02 00:49:31,157 : INFO : training model with 3 workers on 5292 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:49:31,299 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:31,302 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:31,304 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:31,305 : INFO : EPOCH - 1 : training on 34200 raw wor

2020-08-02 00:49:32,569 : INFO : training model with 3 workers on 5310 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:49:32,679 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:32,684 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:32,686 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:32,687 : INFO : EPOCH - 1 : training on 34912 raw words (23033 effective words) took 0.1s, 200465 effective words/s
2020-08-02 00:49:32,804 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:32,809 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:32,812 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:32,813 : INFO : EPOCH - 2 : training on 34912 raw words (22937 effective words) took 0.1s, 188426 effective words/s
2020-08-02 00:49:32,932 : INFO : worker

2020-08-02 00:49:34,109 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:34,113 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:34,116 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:34,116 : INFO : EPOCH - 2 : training on 31611 raw words (20300 effective words) took 0.1s, 190156 effective words/s
2020-08-02 00:49:34,223 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:34,228 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:34,230 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:34,231 : INFO : EPOCH - 3 : training on 31611 raw words (20414 effective words) took 0.1s, 183162 effective words/s
2020-08-02 00:49:34,232 : INFO : training on a 94833 raw words (60998 effective words) took 0.3s, 180765 effective words/s
2020-08-02 00:49:34,696 : INFO : Training batch #71 
2020-08-02 0

2020-08-02 00:49:35,896 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:35,897 : INFO : EPOCH - 3 : training on 46284 raw words (32361 effective words) took 0.2s, 196858 effective words/s
2020-08-02 00:49:35,898 : INFO : training on a 138852 raw words (97040 effective words) took 0.5s, 193985 effective words/s
2020-08-02 00:49:36,356 : INFO : Training batch #72 
2020-08-02 00:49:36,357 : INFO : collecting all words and their counts
2020-08-02 00:49:36,357 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:36,364 : INFO : collected 4644 word types from a corpus of 45946 raw words and 100 sentences
2020-08-02 00:49:36,365 : INFO : Updating model with new vocabulary
2020-08-02 00:49:36,369 : INFO : New added 1213 unique words (20% of original 5857) and increased the count of 1213 pre-existing words (20% of original 5857)
2020-08-02 00:49:36,376 : INFO : deleting the raw counts dictionary of 4644 items
2020-08-02 

2020-08-02 00:49:38,062 : INFO : collected 2328 word types from a corpus of 48026 raw words and 100 sentences
2020-08-02 00:49:38,062 : INFO : Updating model with new vocabulary
2020-08-02 00:49:38,064 : INFO : New added 705 unique words (23% of original 3033) and increased the count of 705 pre-existing words (23% of original 3033)
2020-08-02 00:49:38,069 : INFO : deleting the raw counts dictionary of 2328 items
2020-08-02 00:49:38,070 : INFO : sample=0.001 downsamples 152 most-common words
2020-08-02 00:49:38,072 : INFO : downsampling leaves estimated 65698 word corpus (144.7% of prior 45402)
2020-08-02 00:49:38,081 : INFO : estimated required memory for 1410 words and 200 dimensions: 2961000 bytes
2020-08-02 00:49:38,082 : INFO : updating layer weights
2020-08-02 00:49:38,085 : INFO : training model with 3 workers on 5382 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:49:38,119 : INFO : worker thread finished; awaiting finish of 2 more thr

2020-08-02 00:49:39,766 : INFO : estimated required memory for 1484 words and 200 dimensions: 3116400 bytes
2020-08-02 00:49:39,767 : INFO : updating layer weights
2020-08-02 00:49:39,770 : INFO : training model with 3 workers on 5389 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:49:39,803 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:39,805 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:39,805 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:39,806 : INFO : EPOCH - 1 : training on 47581 raw words (34558 effective words) took 0.0s, 1043493 effective words/s
2020-08-02 00:49:39,839 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:39,840 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:39,840 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08

2020-08-02 00:49:41,554 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:41,554 : INFO : EPOCH - 1 : training on 50835 raw words (37216 effective words) took 0.0s, 1168632 effective words/s
2020-08-02 00:49:41,588 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:41,589 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:41,591 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:41,591 : INFO : EPOCH - 2 : training on 50835 raw words (37183 effective words) took 0.0s, 1136926 effective words/s
2020-08-02 00:49:41,623 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:41,624 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:41,625 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:41,625 : INFO : EPOCH - 3 : training on 50835 raw words (37234 effective words

2020-08-02 00:49:43,354 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:43,355 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:43,356 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:43,357 : INFO : EPOCH - 3 : training on 45996 raw words (33226 effective words) took 0.0s, 1105145 effective words/s
2020-08-02 00:49:43,357 : INFO : training on a 137988 raw words (99574 effective words) took 0.1s, 970341 effective words/s
2020-08-02 00:49:43,358 : INFO : collecting all words and their counts
2020-08-02 00:49:43,359 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:43,364 : INFO : collected 3505 word types from a corpus of 45996 raw words and 100 sentences
2020-08-02 00:49:43,365 : INFO : Updating model with new vocabulary
2020-08-02 00:49:43,367 : INFO : New added 974 unique words (21% of original 4479) and increased the count of 974 pre-exi

2020-08-02 00:49:45,051 : INFO : collecting all words and their counts
2020-08-02 00:49:45,051 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:45,056 : INFO : collected 3608 word types from a corpus of 42800 raw words and 100 sentences
2020-08-02 00:49:45,057 : INFO : Updating model with new vocabulary
2020-08-02 00:49:45,060 : INFO : New added 990 unique words (21% of original 4598) and increased the count of 990 pre-existing words (21% of original 4598)
2020-08-02 00:49:45,066 : INFO : deleting the raw counts dictionary of 3608 items
2020-08-02 00:49:45,067 : INFO : sample=0.001 downsamples 188 most-common words
2020-08-02 00:49:45,068 : INFO : downsampling leaves estimated 54706 word corpus (142.3% of prior 38434)
2020-08-02 00:49:45,075 : INFO : estimated required memory for 1980 words and 200 dimensions: 4158000 bytes
2020-08-02 00:49:45,076 : INFO : updating layer weights
2020-08-02 00:49:45,080 : INFO : training model with 3 workers on

2020-08-02 00:49:46,663 : INFO : sample=0.001 downsamples 184 most-common words
2020-08-02 00:49:46,664 : INFO : downsampling leaves estimated 56316 word corpus (143.6% of prior 39215)
2020-08-02 00:49:46,671 : INFO : estimated required memory for 2260 words and 200 dimensions: 4746000 bytes
2020-08-02 00:49:46,675 : INFO : updating layer weights
2020-08-02 00:49:46,679 : INFO : training model with 3 workers on 5477 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:49:46,779 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:46,782 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:46,785 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:46,786 : INFO : EPOCH - 1 : training on 44568 raw words (31508 effective words) took 0.1s, 302657 effective words/s
2020-08-02 00:49:46,896 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-

2020-08-02 00:49:48,429 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:48,430 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:48,431 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:48,432 : INFO : EPOCH - 1 : training on 45065 raw words (31783 effective words) took 0.1s, 298843 effective words/s
2020-08-02 00:49:48,549 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:48,552 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:48,554 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:48,555 : INFO : EPOCH - 2 : training on 45065 raw words (31910 effective words) took 0.1s, 267963 effective words/s
2020-08-02 00:49:48,670 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:48,672 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-0

2020-08-02 00:49:50,308 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:50,310 : INFO : EPOCH - 2 : training on 50165 raw words (35602 effective words) took 0.1s, 265470 effective words/s
2020-08-02 00:49:50,434 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:50,437 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:50,442 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:50,443 : INFO : EPOCH - 3 : training on 50165 raw words (35609 effective words) took 0.1s, 274098 effective words/s
2020-08-02 00:49:50,443 : INFO : training on a 150495 raw words (106808 effective words) took 0.4s, 267958 effective words/s
2020-08-02 00:49:50,444 : INFO : collecting all words and their counts
2020-08-02 00:49:50,445 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:50,452 : INFO : collected 4927 word types from a corpus of

2020-08-02 00:49:52,304 : INFO : training on a 154941 raw words (109998 effective words) took 0.4s, 268225 effective words/s
2020-08-02 00:49:52,305 : INFO : collecting all words and their counts
2020-08-02 00:49:52,305 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:52,313 : INFO : collected 4711 word types from a corpus of 51647 raw words and 100 sentences
2020-08-02 00:49:52,314 : INFO : Updating model with new vocabulary
2020-08-02 00:49:52,317 : INFO : New added 1317 unique words (21% of original 6028) and increased the count of 1317 pre-existing words (21% of original 6028)
2020-08-02 00:49:52,325 : INFO : deleting the raw counts dictionary of 4711 items
2020-08-02 00:49:52,326 : INFO : sample=0.001 downsamples 132 most-common words
2020-08-02 00:49:52,326 : INFO : downsampling leaves estimated 66728 word corpus (144.7% of prior 46117)
2020-08-02 00:49:52,385 : INFO : estimated required memory for 5602 words, 42191 buckets and 200 dimen

2020-08-02 00:49:54,063 : INFO : deleting the raw counts dictionary of 4128 items
2020-08-02 00:49:54,064 : INFO : sample=0.001 downsamples 184 most-common words
2020-08-02 00:49:54,065 : INFO : downsampling leaves estimated 55328 word corpus (142.0% of prior 38961)
2020-08-02 00:49:54,127 : INFO : estimated required memory for 5639 words, 42392 buckets and 200 dimensions: 47053580 bytes
2020-08-02 00:49:54,129 : INFO : updating layer weights
2020-08-02 00:49:54,217 : INFO : Number of new ngrams is 201
2020-08-02 00:49:54,251 : INFO : training model with 3 workers on 5639 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:49:54,413 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:54,414 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:54,417 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:54,418 : INFO : EPOCH - 1 : training on 43909 raw wor

2020-08-02 00:49:55,828 : INFO : training model with 3 workers on 5652 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:49:55,963 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:55,966 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:55,971 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:55,971 : INFO : EPOCH - 1 : training on 38757 raw words (26879 effective words) took 0.1s, 192334 effective words/s
2020-08-02 00:49:56,100 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:56,104 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:56,108 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:56,108 : INFO : EPOCH - 2 : training on 38757 raw words (26855 effective words) took 0.1s, 203338 effective words/s
2020-08-02 00:49:56,243 : INFO : worker

2020-08-02 00:49:57,746 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:57,748 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:57,750 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:57,750 : INFO : EPOCH - 2 : training on 44074 raw words (31120 effective words) took 0.2s, 202025 effective words/s
2020-08-02 00:49:57,907 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:49:57,909 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:49:57,911 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:57,911 : INFO : EPOCH - 3 : training on 44074 raw words (31059 effective words) took 0.2s, 197100 effective words/s
2020-08-02 00:49:57,913 : INFO : training on a 132222 raw words (93148 effective words) took 0.5s, 196322 effective words/s
2020-08-02 00:49:58,359 : INFO : Training batch #85 
2020-08-02 

2020-08-02 00:49:59,466 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:49:59,466 : INFO : EPOCH - 3 : training on 40265 raw words (27671 effective words) took 0.1s, 193505 effective words/s
2020-08-02 00:49:59,467 : INFO : training on a 120795 raw words (82940 effective words) took 0.4s, 186835 effective words/s
2020-08-02 00:49:59,947 : INFO : Training batch #86 
2020-08-02 00:49:59,948 : INFO : collecting all words and their counts
2020-08-02 00:49:59,949 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:49:59,957 : INFO : collected 4474 word types from a corpus of 45278 raw words and 100 sentences
2020-08-02 00:49:59,958 : INFO : Updating model with new vocabulary
2020-08-02 00:49:59,961 : INFO : New added 1250 unique words (21% of original 5724) and increased the count of 1250 pre-existing words (21% of original 5724)
2020-08-02 00:49:59,969 : INFO : deleting the raw counts dictionary of 4474 items
2020-08-02 

2020-08-02 00:50:01,731 : INFO : collected 4853 word types from a corpus of 50067 raw words and 100 sentences
2020-08-02 00:50:01,732 : INFO : Updating model with new vocabulary
2020-08-02 00:50:01,735 : INFO : New added 1301 unique words (21% of original 6154) and increased the count of 1301 pre-existing words (21% of original 6154)
2020-08-02 00:50:01,742 : INFO : deleting the raw counts dictionary of 4853 items
2020-08-02 00:50:01,743 : INFO : sample=0.001 downsamples 156 most-common words
2020-08-02 00:50:01,744 : INFO : downsampling leaves estimated 63734 word corpus (144.2% of prior 44214)
2020-08-02 00:50:01,752 : INFO : estimated required memory for 2602 words and 200 dimensions: 5464200 bytes
2020-08-02 00:50:01,753 : INFO : updating layer weights
2020-08-02 00:50:01,759 : INFO : training model with 3 workers on 5723 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:50:01,793 : INFO : worker thread finished; awaiting finish of 2 more t

2020-08-02 00:50:03,559 : INFO : estimated required memory for 2414 words and 200 dimensions: 5069400 bytes
2020-08-02 00:50:03,560 : INFO : updating layer weights
2020-08-02 00:50:03,565 : INFO : training model with 3 workers on 5746 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:50:03,596 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:03,597 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:03,598 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:03,598 : INFO : EPOCH - 1 : training on 46010 raw words (32421 effective words) took 0.0s, 1052267 effective words/s
2020-08-02 00:50:03,630 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:03,631 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:03,632 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08

2020-08-02 00:50:05,390 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:05,390 : INFO : EPOCH - 1 : training on 42639 raw words (29520 effective words) took 0.0s, 1027618 effective words/s
2020-08-02 00:50:05,419 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:05,420 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:05,421 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:05,422 : INFO : EPOCH - 2 : training on 42639 raw words (29503 effective words) took 0.0s, 1069045 effective words/s
2020-08-02 00:50:05,451 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:05,452 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:05,453 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:05,453 : INFO : EPOCH - 3 : training on 42639 raw words (29596 effective words

2020-08-02 00:50:07,081 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:07,083 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:07,083 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:07,084 : INFO : EPOCH - 3 : training on 43917 raw words (30749 effective words) took 0.0s, 1066392 effective words/s
2020-08-02 00:50:07,084 : INFO : training on a 131751 raw words (92247 effective words) took 0.1s, 869731 effective words/s
2020-08-02 00:50:07,085 : INFO : collecting all words and their counts
2020-08-02 00:50:07,086 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:50:07,091 : INFO : collected 4354 word types from a corpus of 43917 raw words and 100 sentences
2020-08-02 00:50:07,092 : INFO : Updating model with new vocabulary
2020-08-02 00:50:07,095 : INFO : New added 1160 unique words (21% of original 5514) and increased the count of 1160 pre-e

2020-08-02 00:50:08,795 : INFO : collecting all words and their counts
2020-08-02 00:50:08,796 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:50:08,802 : INFO : collected 5134 word types from a corpus of 46655 raw words and 100 sentences
2020-08-02 00:50:08,802 : INFO : Updating model with new vocabulary
2020-08-02 00:50:08,805 : INFO : New added 1236 unique words (19% of original 6370) and increased the count of 1236 pre-existing words (19% of original 6370)
2020-08-02 00:50:08,813 : INFO : deleting the raw counts dictionary of 5134 items
2020-08-02 00:50:08,814 : INFO : sample=0.001 downsamples 172 most-common words
2020-08-02 00:50:08,814 : INFO : downsampling leaves estimated 58124 word corpus (144.3% of prior 40292)
2020-08-02 00:50:08,823 : INFO : estimated required memory for 2472 words and 200 dimensions: 5191200 bytes
2020-08-02 00:50:08,824 : INFO : updating layer weights
2020-08-02 00:50:08,828 : INFO : training model with 3 workers 

2020-08-02 00:50:10,548 : INFO : deleting the raw counts dictionary of 4249 items
2020-08-02 00:50:10,550 : INFO : sample=0.001 downsamples 154 most-common words
2020-08-02 00:50:10,551 : INFO : downsampling leaves estimated 59659 word corpus (146.1% of prior 40831)
2020-08-02 00:50:10,560 : INFO : estimated required memory for 2398 words and 200 dimensions: 5035800 bytes
2020-08-02 00:50:10,561 : INFO : updating layer weights
2020-08-02 00:50:10,567 : INFO : training model with 3 workers on 5844 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:50:10,682 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:10,684 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:10,685 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:10,686 : INFO : EPOCH - 1 : training on 45951 raw words (33112 effective words) took 0.1s, 286190 effective words/s
2020-08-02 00:

2020-08-02 00:50:12,332 : INFO : training model with 3 workers on 5863 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:50:12,437 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:12,438 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:12,441 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:12,442 : INFO : EPOCH - 1 : training on 44212 raw words (31349 effective words) took 0.1s, 296464 effective words/s
2020-08-02 00:50:12,556 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:12,557 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:12,560 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:12,561 : INFO : EPOCH - 2 : training on 44212 raw words (31512 effective words) took 0.1s, 272568 effective words/s
2020-08-02 00:50:12,674 : INFO : worker

2020-08-02 00:50:14,298 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:14,300 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:14,300 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:14,301 : INFO : EPOCH - 2 : training on 48704 raw words (34917 effective words) took 0.1s, 281758 effective words/s
2020-08-02 00:50:14,426 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:14,427 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:14,428 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:14,428 : INFO : EPOCH - 3 : training on 48704 raw words (34989 effective words) took 0.1s, 285529 effective words/s
2020-08-02 00:50:14,430 : INFO : training on a 146112 raw words (104820 effective words) took 0.4s, 279341 effective words/s
2020-08-02 00:50:14,431 : INFO : collecting all words and their

2020-08-02 00:50:16,311 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:16,312 : INFO : EPOCH - 3 : training on 53790 raw words (38748 effective words) took 0.1s, 276244 effective words/s
2020-08-02 00:50:16,312 : INFO : training on a 161370 raw words (116314 effective words) took 0.4s, 277897 effective words/s
2020-08-02 00:50:16,313 : INFO : collecting all words and their counts
2020-08-02 00:50:16,314 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:50:16,322 : INFO : collected 4630 word types from a corpus of 53790 raw words and 100 sentences
2020-08-02 00:50:16,323 : INFO : Updating model with new vocabulary
2020-08-02 00:50:16,326 : INFO : New added 1389 unique words (23% of original 6019) and increased the count of 1389 pre-existing words (23% of original 6019)
2020-08-02 00:50:16,334 : INFO : deleting the raw counts dictionary of 4630 items
2020-08-02 00:50:16,335 : INFO : sample=0.001 downsamples 134 m

2020-08-02 00:50:18,164 : INFO : collected 4689 word types from a corpus of 45443 raw words and 100 sentences
2020-08-02 00:50:18,166 : INFO : Updating model with new vocabulary
2020-08-02 00:50:18,169 : INFO : New added 1181 unique words (20% of original 5870) and increased the count of 1181 pre-existing words (20% of original 5870)
2020-08-02 00:50:18,175 : INFO : deleting the raw counts dictionary of 4689 items
2020-08-02 00:50:18,177 : INFO : sample=0.001 downsamples 174 most-common words
2020-08-02 00:50:18,180 : INFO : downsampling leaves estimated 55790 word corpus (141.1% of prior 39542)
2020-08-02 00:50:18,243 : INFO : estimated required memory for 5963 words, 44264 buckets and 200 dimensions: 49303260 bytes
2020-08-02 00:50:18,245 : INFO : updating layer weights
2020-08-02 00:50:18,331 : INFO : Number of new ngrams is 109
2020-08-02 00:50:18,366 : INFO : training model with 3 workers on 5963 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08

2020-08-02 00:50:20,068 : INFO : estimated required memory for 5995 words, 44467 buckets and 200 dimensions: 49539588 bytes
2020-08-02 00:50:20,069 : INFO : updating layer weights
2020-08-02 00:50:20,155 : INFO : Number of new ngrams is 203
2020-08-02 00:50:20,190 : INFO : training model with 3 workers on 5995 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:50:20,361 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:20,368 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:20,369 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:20,370 : INFO : EPOCH - 1 : training on 48832 raw words (35053 effective words) took 0.2s, 200059 effective words/s
2020-08-02 00:50:20,546 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:20,552 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:20,55

2020-08-02 00:50:22,288 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:22,294 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:22,295 : INFO : EPOCH - 1 : training on 51832 raw words (37129 effective words) took 0.2s, 199281 effective words/s
2020-08-02 00:50:22,481 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:22,482 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:22,488 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:22,489 : INFO : EPOCH - 2 : training on 51832 raw words (37236 effective words) took 0.2s, 195068 effective words/s
2020-08-02 00:50:22,671 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:22,672 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:22,678 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-0

2020-08-02 00:50:24,705 : INFO : EPOCH - 2 : training on 63179 raw words (47432 effective words) took 0.2s, 195372 effective words/s
2020-08-02 00:50:24,937 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:24,943 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:24,951 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:24,951 : INFO : EPOCH - 3 : training on 63179 raw words (47461 effective words) took 0.2s, 196666 effective words/s
2020-08-02 00:50:24,952 : INFO : training on a 189537 raw words (142333 effective words) took 0.7s, 193071 effective words/s
2020-08-02 00:50:25,464 : INFO : Training batch #100 
2020-08-02 00:50:25,465 : INFO : collecting all words and their counts
2020-08-02 00:50:25,467 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:50:25,474 : INFO : collected 4898 word types from a corpus of 44272 raw words and 100 sentences
20

2020-08-02 00:50:26,680 : INFO : training on a 132816 raw words (92876 effective words) took 0.5s, 192626 effective words/s
2020-08-02 00:50:27,157 : INFO : Training batch #101 
2020-08-02 00:50:27,158 : INFO : collecting all words and their counts
2020-08-02 00:50:27,159 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:50:27,165 : INFO : collected 4370 word types from a corpus of 43907 raw words and 100 sentences
2020-08-02 00:50:27,166 : INFO : Updating model with new vocabulary
2020-08-02 00:50:27,170 : INFO : New added 1110 unique words (20% of original 5480) and increased the count of 1110 pre-existing words (20% of original 5480)
2020-08-02 00:50:27,176 : INFO : deleting the raw counts dictionary of 4370 items
2020-08-02 00:50:27,176 : INFO : sample=0.001 downsamples 188 most-common words
2020-08-02 00:50:27,177 : INFO : downsampling leaves estimated 54584 word corpus (141.9% of prior 38462)
2020-08-02 00:50:27,186 : INFO : estimated requir

2020-08-02 00:50:28,902 : INFO : New added 1217 unique words (20% of original 6065) and increased the count of 1217 pre-existing words (20% of original 6065)
2020-08-02 00:50:28,909 : INFO : deleting the raw counts dictionary of 4848 items
2020-08-02 00:50:28,911 : INFO : sample=0.001 downsamples 154 most-common words
2020-08-02 00:50:28,911 : INFO : downsampling leaves estimated 60740 word corpus (144.6% of prior 42011)
2020-08-02 00:50:28,921 : INFO : estimated required memory for 2434 words and 200 dimensions: 5111400 bytes
2020-08-02 00:50:28,922 : INFO : updating layer weights
2020-08-02 00:50:28,926 : INFO : training model with 3 workers on 6259 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:50:28,959 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:28,960 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:28,961 : INFO : worker thread finished; awaiting finish of 0 mo

2020-08-02 00:50:30,771 : INFO : updating layer weights
2020-08-02 00:50:30,776 : INFO : training model with 3 workers on 6291 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:50:30,809 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:30,809 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:30,810 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:30,811 : INFO : EPOCH - 1 : training on 47408 raw words (33093 effective words) took 0.0s, 1034669 effective words/s
2020-08-02 00:50:30,845 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:30,846 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:30,846 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:30,847 : INFO : EPOCH - 2 : training on 47408 raw words (33149 effective words) took 0.0s, 1183664

2020-08-02 00:50:32,587 : INFO : EPOCH - 1 : training on 43976 raw words (30745 effective words) took 0.0s, 1146922 effective words/s
2020-08-02 00:50:32,619 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:32,620 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:32,620 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:32,621 : INFO : EPOCH - 2 : training on 43976 raw words (30679 effective words) took 0.0s, 1111923 effective words/s
2020-08-02 00:50:32,652 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:32,654 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:32,655 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:32,656 : INFO : EPOCH - 3 : training on 43976 raw words (30618 effective words) took 0.0s, 967783 effective words/s
2020-08-02 00:50:32,656 : INFO : training on a 131928

2020-08-02 00:50:34,334 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:34,335 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:34,336 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:34,336 : INFO : EPOCH - 3 : training on 43848 raw words (30158 effective words) took 0.0s, 998353 effective words/s
2020-08-02 00:50:34,337 : INFO : training on a 131544 raw words (90390 effective words) took 0.1s, 890283 effective words/s
2020-08-02 00:50:34,338 : INFO : collecting all words and their counts
2020-08-02 00:50:34,338 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:50:34,344 : INFO : collected 4726 word types from a corpus of 43848 raw words and 100 sentences
2020-08-02 00:50:34,345 : INFO : Updating model with new vocabulary
2020-08-02 00:50:34,348 : INFO : New added 1109 unique words (19% of original 5835) and increased the count of 1109 pre-ex

2020-08-02 00:50:36,022 : INFO : collecting all words and their counts
2020-08-02 00:50:36,023 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:50:36,028 : INFO : collected 4652 word types from a corpus of 44246 raw words and 100 sentences
2020-08-02 00:50:36,029 : INFO : Updating model with new vocabulary
2020-08-02 00:50:36,032 : INFO : New added 1147 unique words (19% of original 5799) and increased the count of 1147 pre-existing words (19% of original 5799)
2020-08-02 00:50:36,042 : INFO : deleting the raw counts dictionary of 4652 items
2020-08-02 00:50:36,043 : INFO : sample=0.001 downsamples 188 most-common words
2020-08-02 00:50:36,044 : INFO : downsampling leaves estimated 54938 word corpus (142.5% of prior 38542)
2020-08-02 00:50:36,055 : INFO : estimated required memory for 2294 words and 200 dimensions: 4817400 bytes
2020-08-02 00:50:36,056 : INFO : updating layer weights
2020-08-02 00:50:36,063 : INFO : training model with 3 workers 

2020-08-02 00:50:37,769 : INFO : deleting the raw counts dictionary of 4654 items
2020-08-02 00:50:37,770 : INFO : sample=0.001 downsamples 202 most-common words
2020-08-02 00:50:37,771 : INFO : downsampling leaves estimated 48921 word corpus (138.9% of prior 35212)
2020-08-02 00:50:37,780 : INFO : estimated required memory for 2136 words and 200 dimensions: 4485600 bytes
2020-08-02 00:50:37,781 : INFO : updating layer weights
2020-08-02 00:50:37,785 : INFO : training model with 3 workers on 6412 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:50:37,877 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:37,881 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:37,882 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:37,883 : INFO : EPOCH - 1 : training on 40876 raw words (27959 effective words) took 0.1s, 306027 effective words/s
2020-08-02 00:

2020-08-02 00:50:39,485 : INFO : training model with 3 workers on 6469 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:50:39,606 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:39,607 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:39,612 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:39,612 : INFO : EPOCH - 1 : training on 51262 raw words (37222 effective words) took 0.1s, 299641 effective words/s
2020-08-02 00:50:39,742 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:39,744 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:39,749 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:39,749 : INFO : EPOCH - 2 : training on 51262 raw words (37242 effective words) took 0.1s, 278301 effective words/s
2020-08-02 00:50:39,877 : INFO : worker

2020-08-02 00:50:41,599 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:41,602 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:41,604 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:41,604 : INFO : EPOCH - 2 : training on 47540 raw words (33725 effective words) took 0.1s, 273129 effective words/s
2020-08-02 00:50:41,729 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:41,730 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:41,733 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:41,734 : INFO : EPOCH - 3 : training on 47540 raw words (33773 effective words) took 0.1s, 268986 effective words/s
2020-08-02 00:50:41,735 : INFO : training on a 142620 raw words (101196 effective words) took 0.4s, 270260 effective words/s
2020-08-02 00:50:41,736 : INFO : collecting all words and their

2020-08-02 00:50:43,555 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:43,555 : INFO : EPOCH - 3 : training on 48755 raw words (34625 effective words) took 0.1s, 280095 effective words/s
2020-08-02 00:50:43,557 : INFO : training on a 146265 raw words (103892 effective words) took 0.4s, 275423 effective words/s
2020-08-02 00:50:43,558 : INFO : collecting all words and their counts
2020-08-02 00:50:43,558 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:50:43,565 : INFO : collected 4736 word types from a corpus of 48755 raw words and 100 sentences
2020-08-02 00:50:43,567 : INFO : Updating model with new vocabulary
2020-08-02 00:50:43,571 : INFO : New added 1266 unique words (21% of original 6002) and increased the count of 1266 pre-existing words (21% of original 6002)
2020-08-02 00:50:43,579 : INFO : deleting the raw counts dictionary of 4736 items
2020-08-02 00:50:43,580 : INFO : sample=0.001 downsamples 146 m

2020-08-02 00:50:45,317 : INFO : collected 4513 word types from a corpus of 42210 raw words and 100 sentences
2020-08-02 00:50:45,318 : INFO : Updating model with new vocabulary
2020-08-02 00:50:45,321 : INFO : New added 1082 unique words (19% of original 5595) and increased the count of 1082 pre-existing words (19% of original 5595)
2020-08-02 00:50:45,329 : INFO : deleting the raw counts dictionary of 4513 items
2020-08-02 00:50:45,330 : INFO : sample=0.001 downsamples 206 most-common words
2020-08-02 00:50:45,331 : INFO : downsampling leaves estimated 51133 word corpus (139.2% of prior 36743)
2020-08-02 00:50:45,400 : INFO : estimated required memory for 6510 words, 47520 buckets and 200 dimensions: 53182160 bytes
2020-08-02 00:50:45,402 : INFO : updating layer weights
2020-08-02 00:50:45,497 : INFO : Number of new ngrams is 124
2020-08-02 00:50:45,534 : INFO : training model with 3 workers on 6510 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08

2020-08-02 00:50:47,163 : INFO : estimated required memory for 6530 words, 47610 buckets and 200 dimensions: 53300736 bytes
2020-08-02 00:50:47,165 : INFO : updating layer weights
2020-08-02 00:50:47,260 : INFO : Number of new ngrams is 90
2020-08-02 00:50:47,297 : INFO : training model with 3 workers on 6530 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:50:47,469 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:47,474 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:47,478 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:47,478 : INFO : EPOCH - 1 : training on 49645 raw words (35424 effective words) took 0.2s, 201238 effective words/s
2020-08-02 00:50:47,653 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:47,658 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:47,661

2020-08-02 00:50:49,290 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:49,294 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:49,294 : INFO : EPOCH - 1 : training on 46482 raw words (33305 effective words) took 0.2s, 194412 effective words/s
2020-08-02 00:50:49,463 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:49,465 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:49,469 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:49,469 : INFO : EPOCH - 2 : training on 46482 raw words (33092 effective words) took 0.2s, 192130 effective words/s
2020-08-02 00:50:49,637 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:49,638 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:49,642 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-0

2020-08-02 00:50:51,265 : INFO : EPOCH - 2 : training on 46249 raw words (32501 effective words) took 0.2s, 191823 effective words/s
2020-08-02 00:50:51,432 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:51,434 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:51,437 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:51,438 : INFO : EPOCH - 3 : training on 46249 raw words (32514 effective words) took 0.2s, 194321 effective words/s
2020-08-02 00:50:51,438 : INFO : training on a 138747 raw words (97639 effective words) took 0.5s, 188056 effective words/s
2020-08-02 00:50:51,981 : INFO : Training batch #115 
2020-08-02 00:50:51,983 : INFO : collecting all words and their counts
2020-08-02 00:50:51,983 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:50:51,991 : INFO : collected 5236 word types from a corpus of 51487 raw words and 100 sentences
202

2020-08-02 00:50:53,381 : INFO : training on a 154461 raw words (110567 effective words) took 0.6s, 193587 effective words/s
2020-08-02 00:50:53,963 : INFO : Training batch #116 
2020-08-02 00:50:53,963 : INFO : collecting all words and their counts
2020-08-02 00:50:53,964 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:50:53,973 : INFO : collected 4726 word types from a corpus of 61399 raw words and 100 sentences
2020-08-02 00:50:53,974 : INFO : Updating model with new vocabulary
2020-08-02 00:50:53,976 : INFO : New added 1261 unique words (21% of original 5987) and increased the count of 1261 pre-existing words (21% of original 5987)
2020-08-02 00:50:53,985 : INFO : deleting the raw counts dictionary of 4726 items
2020-08-02 00:50:53,985 : INFO : sample=0.001 downsamples 110 most-common words
2020-08-02 00:50:53,986 : INFO : downsampling leaves estimated 80444 word corpus (144.0% of prior 55865)
2020-08-02 00:50:53,995 : INFO : estimated requi

2020-08-02 00:50:56,147 : INFO : New added 1157 unique words (21% of original 5440) and increased the count of 1157 pre-existing words (21% of original 5440)
2020-08-02 00:50:56,155 : INFO : deleting the raw counts dictionary of 4283 items
2020-08-02 00:50:56,156 : INFO : sample=0.001 downsamples 114 most-common words
2020-08-02 00:50:56,156 : INFO : downsampling leaves estimated 79542 word corpus (141.6% of prior 56162)
2020-08-02 00:50:56,166 : INFO : estimated required memory for 2314 words and 200 dimensions: 4859400 bytes
2020-08-02 00:50:56,166 : INFO : updating layer weights
2020-08-02 00:50:56,170 : INFO : training model with 3 workers on 6605 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:50:56,210 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:56,212 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:56,213 : INFO : worker thread finished; awaiting finish of 0 mo

2020-08-02 00:50:58,298 : INFO : updating layer weights
2020-08-02 00:50:58,304 : INFO : training model with 3 workers on 6611 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:50:58,342 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:58,344 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:58,344 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:58,345 : INFO : EPOCH - 1 : training on 58822 raw words (41326 effective words) took 0.0s, 1115220 effective words/s
2020-08-02 00:50:58,384 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:50:58,385 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:50:58,386 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:50:58,387 : INFO : EPOCH - 2 : training on 58822 raw words (41250 effective words) took 0.0s, 1076274

2020-08-02 00:51:00,396 : INFO : EPOCH - 1 : training on 52318 raw words (37514 effective words) took 0.0s, 1073498 effective words/s
2020-08-02 00:51:00,432 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:00,433 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:00,434 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:00,434 : INFO : EPOCH - 2 : training on 52318 raw words (37389 effective words) took 0.0s, 1109464 effective words/s
2020-08-02 00:51:00,472 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:00,473 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:00,474 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:00,474 : INFO : EPOCH - 3 : training on 52318 raw words (37538 effective words) took 0.0s, 1073429 effective words/s
2020-08-02 00:51:00,475 : INFO : training on a 15695

2020-08-02 00:51:02,411 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:02,412 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:02,413 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:02,413 : INFO : EPOCH - 3 : training on 46318 raw words (32884 effective words) took 0.0s, 1047136 effective words/s
2020-08-02 00:51:02,414 : INFO : training on a 138954 raw words (98584 effective words) took 0.1s, 942087 effective words/s
2020-08-02 00:51:02,415 : INFO : collecting all words and their counts
2020-08-02 00:51:02,416 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:51:02,423 : INFO : collected 4032 word types from a corpus of 46318 raw words and 100 sentences
2020-08-02 00:51:02,424 : INFO : Updating model with new vocabulary
2020-08-02 00:51:02,426 : INFO : New added 1234 unique words (23% of original 5266) and increased the count of 1234 pre-e

2020-08-02 00:51:04,229 : INFO : collecting all words and their counts
2020-08-02 00:51:04,229 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:51:04,235 : INFO : collected 4147 word types from a corpus of 47960 raw words and 100 sentences
2020-08-02 00:51:04,236 : INFO : Updating model with new vocabulary
2020-08-02 00:51:04,239 : INFO : New added 1248 unique words (23% of original 5395) and increased the count of 1248 pre-existing words (23% of original 5395)
2020-08-02 00:51:04,247 : INFO : deleting the raw counts dictionary of 4147 items
2020-08-02 00:51:04,247 : INFO : sample=0.001 downsamples 150 most-common words
2020-08-02 00:51:04,248 : INFO : downsampling leaves estimated 61586 word corpus (143.6% of prior 42895)
2020-08-02 00:51:04,259 : INFO : estimated required memory for 2496 words and 200 dimensions: 5241600 bytes
2020-08-02 00:51:04,260 : INFO : updating layer weights
2020-08-02 00:51:04,269 : INFO : training model with 3 workers 

2020-08-02 00:51:06,101 : INFO : deleting the raw counts dictionary of 4842 items
2020-08-02 00:51:06,103 : INFO : sample=0.001 downsamples 142 most-common words
2020-08-02 00:51:06,103 : INFO : downsampling leaves estimated 67912 word corpus (146.1% of prior 46482)
2020-08-02 00:51:06,113 : INFO : estimated required memory for 2652 words and 200 dimensions: 5569200 bytes
2020-08-02 00:51:06,114 : INFO : updating layer weights
2020-08-02 00:51:06,121 : INFO : training model with 3 workers on 6732 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:51:06,247 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:06,248 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:06,250 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:06,251 : INFO : EPOCH - 1 : training on 52361 raw words (37667 effective words) took 0.1s, 296102 effective words/s
2020-08-02 00:

2020-08-02 00:51:07,992 : INFO : training model with 3 workers on 6740 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:51:08,096 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:08,099 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:08,101 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:08,101 : INFO : EPOCH - 1 : training on 42838 raw words (30095 effective words) took 0.1s, 289905 effective words/s
2020-08-02 00:51:08,212 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:08,215 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:08,216 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:08,217 : INFO : EPOCH - 2 : training on 42838 raw words (30087 effective words) took 0.1s, 270229 effective words/s
2020-08-02 00:51:08,331 : INFO : worker

2020-08-02 00:51:10,027 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:10,028 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:10,028 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:10,030 : INFO : EPOCH - 2 : training on 49357 raw words (35502 effective words) took 0.1s, 281781 effective words/s
2020-08-02 00:51:10,159 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:10,160 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:10,161 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:10,162 : INFO : EPOCH - 3 : training on 49357 raw words (35723 effective words) took 0.1s, 279030 effective words/s
2020-08-02 00:51:10,163 : INFO : training on a 148071 raw words (106941 effective words) took 0.4s, 276790 effective words/s
2020-08-02 00:51:10,164 : INFO : collecting all words and their

2020-08-02 00:51:12,025 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:12,026 : INFO : EPOCH - 3 : training on 50066 raw words (36174 effective words) took 0.1s, 285866 effective words/s
2020-08-02 00:51:12,027 : INFO : training on a 150198 raw words (108360 effective words) took 0.4s, 282936 effective words/s
2020-08-02 00:51:12,028 : INFO : collecting all words and their counts
2020-08-02 00:51:12,029 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:51:12,037 : INFO : collected 4427 word types from a corpus of 50066 raw words and 100 sentences
2020-08-02 00:51:12,038 : INFO : Updating model with new vocabulary
2020-08-02 00:51:12,041 : INFO : New added 1248 unique words (21% of original 5675) and increased the count of 1248 pre-existing words (21% of original 5675)
2020-08-02 00:51:12,050 : INFO : deleting the raw counts dictionary of 4427 items
2020-08-02 00:51:12,051 : INFO : sample=0.001 downsamples 136 m

2020-08-02 00:51:13,948 : INFO : collected 3898 word types from a corpus of 51750 raw words and 100 sentences
2020-08-02 00:51:13,948 : INFO : Updating model with new vocabulary
2020-08-02 00:51:13,951 : INFO : New added 1247 unique words (24% of original 5145) and increased the count of 1247 pre-existing words (24% of original 5145)
2020-08-02 00:51:13,959 : INFO : deleting the raw counts dictionary of 3898 items
2020-08-02 00:51:13,960 : INFO : sample=0.001 downsamples 134 most-common words
2020-08-02 00:51:13,961 : INFO : downsampling leaves estimated 68701 word corpus (145.7% of prior 47158)
2020-08-02 00:51:14,032 : INFO : estimated required memory for 6788 words, 48988 buckets and 200 dimensions: 55001424 bytes
2020-08-02 00:51:14,034 : INFO : updating layer weights
2020-08-02 00:51:14,131 : INFO : Number of new ngrams is 54
2020-08-02 00:51:14,169 : INFO : training model with 3 workers on 6788 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-

2020-08-02 00:51:15,955 : INFO : estimated required memory for 6822 words, 49203 buckets and 200 dimensions: 55254200 bytes
2020-08-02 00:51:15,957 : INFO : updating layer weights
2020-08-02 00:51:16,056 : INFO : Number of new ngrams is 215
2020-08-02 00:51:16,095 : INFO : training model with 3 workers on 6822 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:51:16,271 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:16,275 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:16,282 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:16,282 : INFO : EPOCH - 1 : training on 50882 raw words (37084 effective words) took 0.2s, 200579 effective words/s
2020-08-02 00:51:16,464 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:16,467 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:16,47

2020-08-02 00:51:18,114 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:18,115 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:18,115 : INFO : EPOCH - 1 : training on 45961 raw words (33135 effective words) took 0.2s, 202449 effective words/s
2020-08-02 00:51:18,284 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:18,285 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:18,287 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:18,288 : INFO : EPOCH - 2 : training on 45961 raw words (33174 effective words) took 0.2s, 195855 effective words/s
2020-08-02 00:51:18,454 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:18,455 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:18,456 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-0

2020-08-02 00:51:20,196 : INFO : EPOCH - 2 : training on 51026 raw words (36451 effective words) took 0.2s, 194474 effective words/s
2020-08-02 00:51:20,383 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:20,389 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:20,390 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:20,391 : INFO : EPOCH - 3 : training on 51026 raw words (36615 effective words) took 0.2s, 192008 effective words/s
2020-08-02 00:51:20,391 : INFO : training on a 153078 raw words (109580 effective words) took 0.6s, 190728 effective words/s
2020-08-02 00:51:20,951 : INFO : Training batch #130 
2020-08-02 00:51:20,951 : INFO : collecting all words and their counts
2020-08-02 00:51:20,952 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:51:20,960 : INFO : collected 4862 word types from a corpus of 50374 raw words and 100 sentences
20

2020-08-02 00:51:22,351 : INFO : training on a 151122 raw words (107875 effective words) took 0.6s, 185846 effective words/s
2020-08-02 00:51:22,850 : INFO : Training batch #131 
2020-08-02 00:51:22,851 : INFO : collecting all words and their counts
2020-08-02 00:51:22,851 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:51:22,858 : INFO : collected 4348 word types from a corpus of 45624 raw words and 100 sentences
2020-08-02 00:51:22,859 : INFO : Updating model with new vocabulary
2020-08-02 00:51:22,863 : INFO : New added 1105 unique words (20% of original 5453) and increased the count of 1105 pre-existing words (20% of original 5453)
2020-08-02 00:51:22,870 : INFO : deleting the raw counts dictionary of 4348 items
2020-08-02 00:51:22,871 : INFO : sample=0.001 downsamples 162 most-common words
2020-08-02 00:51:22,871 : INFO : downsampling leaves estimated 58180 word corpus (144.4% of prior 40289)
2020-08-02 00:51:22,881 : INFO : estimated requi

2020-08-02 00:51:24,732 : INFO : New added 1139 unique words (19% of original 5746) and increased the count of 1139 pre-existing words (19% of original 5746)
2020-08-02 00:51:24,740 : INFO : deleting the raw counts dictionary of 4607 items
2020-08-02 00:51:24,741 : INFO : sample=0.001 downsamples 172 most-common words
2020-08-02 00:51:24,741 : INFO : downsampling leaves estimated 55967 word corpus (143.0% of prior 39134)
2020-08-02 00:51:24,753 : INFO : estimated required memory for 2278 words and 200 dimensions: 4783800 bytes
2020-08-02 00:51:24,755 : INFO : updating layer weights
2020-08-02 00:51:24,761 : INFO : training model with 3 workers on 6939 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:51:24,796 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:24,797 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:24,798 : INFO : worker thread finished; awaiting finish of 0 mo

2020-08-02 00:51:26,774 : INFO : updating layer weights
2020-08-02 00:51:26,780 : INFO : training model with 3 workers on 6956 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:51:26,818 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:26,820 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:26,820 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:26,821 : INFO : EPOCH - 1 : training on 52199 raw words (37630 effective words) took 0.0s, 988981 effective words/s
2020-08-02 00:51:26,860 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:26,861 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:26,862 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:26,862 : INFO : EPOCH - 2 : training on 52199 raw words (37632 effective words) took 0.0s, 1014119 

2020-08-02 00:51:28,835 : INFO : EPOCH - 1 : training on 50641 raw words (36902 effective words) took 0.0s, 1056357 effective words/s
2020-08-02 00:51:28,871 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:28,872 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:28,873 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:28,874 : INFO : EPOCH - 2 : training on 50641 raw words (36877 effective words) took 0.0s, 1075897 effective words/s
2020-08-02 00:51:28,910 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:28,911 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:28,911 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:28,912 : INFO : EPOCH - 3 : training on 50641 raw words (37058 effective words) took 0.0s, 1038524 effective words/s
2020-08-02 00:51:28,913 : INFO : training on a 15192

2020-08-02 00:51:30,918 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:30,919 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:30,920 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:30,920 : INFO : EPOCH - 3 : training on 49301 raw words (35526 effective words) took 0.0s, 1181202 effective words/s
2020-08-02 00:51:30,921 : INFO : training on a 147903 raw words (106558 effective words) took 0.1s, 942784 effective words/s
2020-08-02 00:51:30,922 : INFO : collecting all words and their counts
2020-08-02 00:51:30,923 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:51:30,929 : INFO : collected 4891 word types from a corpus of 49301 raw words and 100 sentences
2020-08-02 00:51:30,930 : INFO : Updating model with new vocabulary
2020-08-02 00:51:30,933 : INFO : New added 1285 unique words (20% of original 6176) and increased the count of 1285 pre-

2020-08-02 00:51:32,831 : INFO : collecting all words and their counts
2020-08-02 00:51:32,832 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:51:32,838 : INFO : collected 4896 word types from a corpus of 51270 raw words and 100 sentences
2020-08-02 00:51:32,839 : INFO : Updating model with new vocabulary
2020-08-02 00:51:32,842 : INFO : New added 1379 unique words (21% of original 6275) and increased the count of 1379 pre-existing words (21% of original 6275)
2020-08-02 00:51:32,850 : INFO : deleting the raw counts dictionary of 4896 items
2020-08-02 00:51:32,851 : INFO : sample=0.001 downsamples 134 most-common words
2020-08-02 00:51:32,852 : INFO : downsampling leaves estimated 66366 word corpus (146.5% of prior 45316)
2020-08-02 00:51:32,862 : INFO : estimated required memory for 2758 words and 200 dimensions: 5791800 bytes
2020-08-02 00:51:32,863 : INFO : updating layer weights
2020-08-02 00:51:32,869 : INFO : training model with 3 workers 

2020-08-02 00:51:34,915 : INFO : deleting the raw counts dictionary of 4595 items
2020-08-02 00:51:34,916 : INFO : sample=0.001 downsamples 114 most-common words
2020-08-02 00:51:34,916 : INFO : downsampling leaves estimated 76925 word corpus (143.1% of prior 53762)
2020-08-02 00:51:34,927 : INFO : estimated required memory for 2558 words and 200 dimensions: 5371800 bytes
2020-08-02 00:51:34,928 : INFO : updating layer weights
2020-08-02 00:51:34,935 : INFO : training model with 3 workers on 7032 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:51:35,075 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:35,077 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:35,080 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:35,080 : INFO : EPOCH - 1 : training on 59209 raw words (42109 effective words) took 0.1s, 295177 effective words/s
2020-08-02 00:

2020-08-02 00:51:37,082 : INFO : training model with 3 workers on 7052 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:51:37,228 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:37,229 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:37,233 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:37,233 : INFO : EPOCH - 1 : training on 62653 raw words (44717 effective words) took 0.1s, 301993 effective words/s
2020-08-02 00:51:37,387 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:37,389 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:37,392 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:37,393 : INFO : EPOCH - 2 : training on 62653 raw words (44677 effective words) took 0.2s, 285068 effective words/s
2020-08-02 00:51:37,548 : INFO : worker

2020-08-02 00:51:39,579 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:39,580 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:39,583 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:39,584 : INFO : EPOCH - 2 : training on 60835 raw words (42991 effective words) took 0.2s, 283447 effective words/s
2020-08-02 00:51:39,734 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:39,737 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:39,739 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:39,740 : INFO : EPOCH - 3 : training on 60835 raw words (43206 effective words) took 0.2s, 283848 effective words/s
2020-08-02 00:51:39,741 : INFO : training on a 182505 raw words (129137 effective words) took 0.5s, 283878 effective words/s
2020-08-02 00:51:39,742 : INFO : collecting all words and their

2020-08-02 00:51:41,915 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:41,916 : INFO : EPOCH - 3 : training on 61610 raw words (43233 effective words) took 0.2s, 282261 effective words/s
2020-08-02 00:51:41,917 : INFO : training on a 184830 raw words (129473 effective words) took 0.5s, 285101 effective words/s
2020-08-02 00:51:41,917 : INFO : collecting all words and their counts
2020-08-02 00:51:41,918 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:51:41,926 : INFO : collected 4166 word types from a corpus of 61610 raw words and 100 sentences
2020-08-02 00:51:41,926 : INFO : Updating model with new vocabulary
2020-08-02 00:51:41,930 : INFO : New added 1095 unique words (20% of original 5261) and increased the count of 1095 pre-existing words (20% of original 5261)
2020-08-02 00:51:41,937 : INFO : deleting the raw counts dictionary of 4166 items
2020-08-02 00:51:41,939 : INFO : sample=0.001 downsamples 108 m

2020-08-02 00:51:44,111 : INFO : collected 4826 word types from a corpus of 60080 raw words and 100 sentences
2020-08-02 00:51:44,112 : INFO : Updating model with new vocabulary
2020-08-02 00:51:44,115 : INFO : New added 1304 unique words (21% of original 6130) and increased the count of 1304 pre-existing words (21% of original 6130)
2020-08-02 00:51:44,122 : INFO : deleting the raw counts dictionary of 4826 items
2020-08-02 00:51:44,123 : INFO : sample=0.001 downsamples 124 most-common words
2020-08-02 00:51:44,123 : INFO : downsampling leaves estimated 78049 word corpus (143.8% of prior 54288)
2020-08-02 00:51:44,198 : INFO : estimated required memory for 7106 words, 50977 buckets and 200 dimensions: 57333328 bytes
2020-08-02 00:51:44,200 : INFO : updating layer weights
2020-08-02 00:51:44,307 : INFO : Number of new ngrams is 141
2020-08-02 00:51:44,346 : INFO : training model with 3 workers on 7106 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08

2020-08-02 00:51:46,442 : INFO : estimated required memory for 7123 words, 51065 buckets and 200 dimensions: 57443252 bytes
2020-08-02 00:51:46,443 : INFO : updating layer weights
2020-08-02 00:51:46,545 : INFO : Number of new ngrams is 88
2020-08-02 00:51:46,585 : INFO : training model with 3 workers on 7123 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:51:46,805 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:46,811 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:46,814 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:46,815 : INFO : EPOCH - 1 : training on 64546 raw words (45642 effective words) took 0.2s, 202257 effective words/s
2020-08-02 00:51:47,053 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:47,060 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:47,064

2020-08-02 00:51:49,084 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:49,086 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:49,087 : INFO : EPOCH - 1 : training on 63655 raw words (45157 effective words) took 0.2s, 194119 effective words/s
2020-08-02 00:51:49,324 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:49,327 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:49,329 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:49,329 : INFO : EPOCH - 2 : training on 63655 raw words (45009 effective words) took 0.2s, 188132 effective words/s
2020-08-02 00:51:49,554 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:49,558 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:49,560 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-0

2020-08-02 00:51:51,624 : INFO : EPOCH - 2 : training on 69210 raw words (48212 effective words) took 0.2s, 195924 effective words/s
2020-08-02 00:51:51,873 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:51,874 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:51,877 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:51,878 : INFO : EPOCH - 3 : training on 69210 raw words (48300 effective words) took 0.2s, 195007 effective words/s
2020-08-02 00:51:51,879 : INFO : training on a 207630 raw words (144784 effective words) took 0.7s, 195672 effective words/s
2020-08-02 00:51:52,473 : INFO : Training batch #145 
2020-08-02 00:51:52,474 : INFO : collecting all words and their counts
2020-08-02 00:51:52,474 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:51:52,484 : INFO : collected 4428 word types from a corpus of 62261 raw words and 100 sentences
20

2020-08-02 00:51:54,079 : INFO : training on a 186783 raw words (131977 effective words) took 0.7s, 190557 effective words/s
2020-08-02 00:51:54,698 : INFO : Training batch #146 
2020-08-02 00:51:54,698 : INFO : collecting all words and their counts
2020-08-02 00:51:54,699 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:51:54,708 : INFO : collected 4646 word types from a corpus of 61393 raw words and 100 sentences
2020-08-02 00:51:54,709 : INFO : Updating model with new vocabulary
2020-08-02 00:51:54,712 : INFO : New added 1296 unique words (21% of original 5942) and increased the count of 1296 pre-existing words (21% of original 5942)
2020-08-02 00:51:54,720 : INFO : deleting the raw counts dictionary of 4646 items
2020-08-02 00:51:54,720 : INFO : sample=0.001 downsamples 108 most-common words
2020-08-02 00:51:54,721 : INFO : downsampling leaves estimated 80360 word corpus (143.6% of prior 55953)
2020-08-02 00:51:54,732 : INFO : estimated requi

2020-08-02 00:51:56,931 : INFO : New added 1084 unique words (20% of original 5269) and increased the count of 1084 pre-existing words (20% of original 5269)
2020-08-02 00:51:56,938 : INFO : deleting the raw counts dictionary of 4185 items
2020-08-02 00:51:56,939 : INFO : sample=0.001 downsamples 114 most-common words
2020-08-02 00:51:56,940 : INFO : downsampling leaves estimated 78638 word corpus (140.3% of prior 56047)
2020-08-02 00:51:56,950 : INFO : estimated required memory for 2168 words and 200 dimensions: 4552800 bytes
2020-08-02 00:51:56,950 : INFO : updating layer weights
2020-08-02 00:51:56,954 : INFO : training model with 3 workers on 7194 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:51:56,995 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:56,996 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:56,997 : INFO : worker thread finished; awaiting finish of 0 mo

2020-08-02 00:51:59,123 : INFO : updating layer weights
2020-08-02 00:51:59,129 : INFO : training model with 3 workers on 7215 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:51:59,168 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:59,169 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:59,170 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:59,171 : INFO : EPOCH - 1 : training on 57599 raw words (40895 effective words) took 0.0s, 1051791 effective words/s
2020-08-02 00:51:59,212 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:51:59,214 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:51:59,214 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:51:59,215 : INFO : EPOCH - 2 : training on 57599 raw words (41008 effective words) took 0.0s, 1022111

2020-08-02 00:52:01,226 : INFO : EPOCH - 1 : training on 48267 raw words (34263 effective words) took 0.0s, 1038271 effective words/s
2020-08-02 00:52:01,261 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:01,262 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:01,263 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:01,264 : INFO : EPOCH - 2 : training on 48267 raw words (34362 effective words) took 0.0s, 1021544 effective words/s
2020-08-02 00:52:01,299 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:01,300 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:01,300 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:01,301 : INFO : EPOCH - 3 : training on 48267 raw words (34346 effective words) took 0.0s, 1003655 effective words/s
2020-08-02 00:52:01,301 : INFO : training on a 14480

2020-08-02 00:52:03,204 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:03,207 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:03,207 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:03,208 : INFO : EPOCH - 3 : training on 50333 raw words (35823 effective words) took 0.0s, 1021848 effective words/s
2020-08-02 00:52:03,208 : INFO : training on a 150999 raw words (107282 effective words) took 0.1s, 932344 effective words/s
2020-08-02 00:52:03,209 : INFO : collecting all words and their counts
2020-08-02 00:52:03,210 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:52:03,216 : INFO : collected 4968 word types from a corpus of 50333 raw words and 100 sentences
2020-08-02 00:52:03,217 : INFO : Updating model with new vocabulary
2020-08-02 00:52:03,220 : INFO : New added 1238 unique words (19% of original 6206) and increased the count of 1238 pre-

2020-08-02 00:52:05,214 : INFO : collecting all words and their counts
2020-08-02 00:52:05,215 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:52:05,221 : INFO : collected 4729 word types from a corpus of 49330 raw words and 100 sentences
2020-08-02 00:52:05,222 : INFO : Updating model with new vocabulary
2020-08-02 00:52:05,225 : INFO : New added 1236 unique words (20% of original 5965) and increased the count of 1236 pre-existing words (20% of original 5965)
2020-08-02 00:52:05,233 : INFO : deleting the raw counts dictionary of 4729 items
2020-08-02 00:52:05,234 : INFO : sample=0.001 downsamples 136 most-common words
2020-08-02 00:52:05,234 : INFO : downsampling leaves estimated 62818 word corpus (143.9% of prior 43641)
2020-08-02 00:52:05,245 : INFO : estimated required memory for 2472 words and 200 dimensions: 5191200 bytes
2020-08-02 00:52:05,246 : INFO : updating layer weights
2020-08-02 00:52:05,249 : INFO : training model with 3 workers 

2020-08-02 00:52:07,194 : INFO : deleting the raw counts dictionary of 4603 items
2020-08-02 00:52:07,196 : INFO : sample=0.001 downsamples 148 most-common words
2020-08-02 00:52:07,197 : INFO : downsampling leaves estimated 59432 word corpus (142.9% of prior 41600)
2020-08-02 00:52:07,208 : INFO : estimated required memory for 2392 words and 200 dimensions: 5023200 bytes
2020-08-02 00:52:07,208 : INFO : updating layer weights
2020-08-02 00:52:07,213 : INFO : training model with 3 workers on 7320 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:52:07,325 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:07,326 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:07,330 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:07,331 : INFO : EPOCH - 1 : training on 47140 raw words (33460 effective words) took 0.1s, 297037 effective words/s
2020-08-02 00:

2020-08-02 00:52:09,140 : INFO : training model with 3 workers on 7343 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:52:09,259 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:09,261 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:09,264 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:09,265 : INFO : EPOCH - 1 : training on 50290 raw words (36777 effective words) took 0.1s, 303108 effective words/s
2020-08-02 00:52:09,389 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:09,390 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:09,394 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:09,394 : INFO : EPOCH - 2 : training on 50290 raw words (36697 effective words) took 0.1s, 289883 effective words/s
2020-08-02 00:52:09,520 : INFO : worker

2020-08-02 00:52:11,283 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:11,284 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:11,289 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:11,290 : INFO : EPOCH - 2 : training on 45843 raw words (33196 effective words) took 0.1s, 273084 effective words/s
2020-08-02 00:52:11,403 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:11,406 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:11,410 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:11,411 : INFO : EPOCH - 3 : training on 45843 raw words (33209 effective words) took 0.1s, 282180 effective words/s
2020-08-02 00:52:11,411 : INFO : training on a 137529 raw words (99603 effective words) took 0.4s, 274396 effective words/s
2020-08-02 00:52:11,412 : INFO : collecting all words and their 

2020-08-02 00:52:13,219 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:13,220 : INFO : EPOCH - 3 : training on 45671 raw words (33337 effective words) took 0.1s, 288190 effective words/s
2020-08-02 00:52:13,220 : INFO : training on a 137013 raw words (99838 effective words) took 0.4s, 277720 effective words/s
2020-08-02 00:52:13,222 : INFO : collecting all words and their counts
2020-08-02 00:52:13,223 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:52:13,230 : INFO : collected 4336 word types from a corpus of 45671 raw words and 100 sentences
2020-08-02 00:52:13,231 : INFO : Updating model with new vocabulary
2020-08-02 00:52:13,234 : INFO : New added 1193 unique words (21% of original 5529) and increased the count of 1193 pre-existing words (21% of original 5529)
2020-08-02 00:52:13,241 : INFO : deleting the raw counts dictionary of 4336 items
2020-08-02 00:52:13,242 : INFO : sample=0.001 downsamples 164 mo

2020-08-02 00:52:15,153 : INFO : collected 4612 word types from a corpus of 45143 raw words and 100 sentences
2020-08-02 00:52:15,154 : INFO : Updating model with new vocabulary
2020-08-02 00:52:15,156 : INFO : New added 1209 unique words (20% of original 5821) and increased the count of 1209 pre-existing words (20% of original 5821)
2020-08-02 00:52:15,164 : INFO : deleting the raw counts dictionary of 4612 items
2020-08-02 00:52:15,165 : INFO : sample=0.001 downsamples 168 most-common words
2020-08-02 00:52:15,166 : INFO : downsampling leaves estimated 56447 word corpus (143.3% of prior 39395)
2020-08-02 00:52:15,255 : INFO : estimated required memory for 7403 words, 52684 buckets and 200 dimensions: 59387036 bytes
2020-08-02 00:52:15,257 : INFO : updating layer weights
2020-08-02 00:52:15,388 : INFO : Number of new ngrams is 66
2020-08-02 00:52:15,435 : INFO : training model with 3 workers on 7403 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-

2020-08-02 00:52:17,331 : INFO : estimated required memory for 7480 words, 53139 buckets and 200 dimensions: 59933784 bytes
2020-08-02 00:52:17,333 : INFO : updating layer weights
2020-08-02 00:52:17,439 : INFO : Number of new ngrams is 455
2020-08-02 00:52:17,482 : INFO : training model with 3 workers on 7480 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:52:17,681 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:17,685 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:17,687 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:17,688 : INFO : EPOCH - 1 : training on 54443 raw words (39949 effective words) took 0.2s, 197410 effective words/s
2020-08-02 00:52:17,889 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:17,892 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:17,89

2020-08-02 00:52:19,805 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:19,810 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:19,811 : INFO : EPOCH - 1 : training on 56012 raw words (41007 effective words) took 0.2s, 198958 effective words/s
2020-08-02 00:52:20,018 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:20,020 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:20,025 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:20,025 : INFO : EPOCH - 2 : training on 56012 raw words (40874 effective words) took 0.2s, 192772 effective words/s
2020-08-02 00:52:20,231 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:20,233 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:20,239 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-0

2020-08-02 00:52:21,871 : INFO : EPOCH - 2 : training on 44504 raw words (31564 effective words) took 0.2s, 200596 effective words/s
2020-08-02 00:52:22,030 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:22,031 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:22,036 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:22,036 : INFO : EPOCH - 3 : training on 44504 raw words (31613 effective words) took 0.2s, 195719 effective words/s
2020-08-02 00:52:22,037 : INFO : training on a 133512 raw words (94785 effective words) took 0.5s, 193090 effective words/s
2020-08-02 00:52:22,580 : INFO : Training batch #160 
2020-08-02 00:52:22,581 : INFO : collecting all words and their counts
2020-08-02 00:52:22,581 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:52:22,588 : INFO : collected 3964 word types from a corpus of 41806 raw words and 100 sentences
202

2020-08-02 00:52:23,776 : INFO : training on a 125418 raw words (88020 effective words) took 0.4s, 197531 effective words/s
2020-08-02 00:52:24,342 : INFO : Training batch #161 
2020-08-02 00:52:24,343 : INFO : collecting all words and their counts
2020-08-02 00:52:24,343 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:52:24,352 : INFO : collected 4444 word types from a corpus of 46819 raw words and 100 sentences
2020-08-02 00:52:24,352 : INFO : Updating model with new vocabulary
2020-08-02 00:52:24,356 : INFO : New added 1266 unique words (22% of original 5710) and increased the count of 1266 pre-existing words (22% of original 5710)
2020-08-02 00:52:24,364 : INFO : deleting the raw counts dictionary of 4444 items
2020-08-02 00:52:24,364 : INFO : sample=0.001 downsamples 156 most-common words
2020-08-02 00:52:24,365 : INFO : downsampling leaves estimated 60978 word corpus (147.0% of prior 41488)
2020-08-02 00:52:24,377 : INFO : estimated requir

2020-08-02 00:52:26,356 : INFO : New added 1117 unique words (22% of original 4960) and increased the count of 1117 pre-existing words (22% of original 4960)
2020-08-02 00:52:26,362 : INFO : deleting the raw counts dictionary of 3843 items
2020-08-02 00:52:26,363 : INFO : sample=0.001 downsamples 108 most-common words
2020-08-02 00:52:26,364 : INFO : downsampling leaves estimated 86892 word corpus (140.4% of prior 61903)
2020-08-02 00:52:26,374 : INFO : estimated required memory for 2234 words and 200 dimensions: 4691400 bytes
2020-08-02 00:52:26,377 : INFO : updating layer weights
2020-08-02 00:52:26,381 : INFO : training model with 3 workers on 7577 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:52:26,422 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:26,423 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:26,424 : INFO : worker thread finished; awaiting finish of 0 mo

2020-08-02 00:52:28,672 : INFO : updating layer weights
2020-08-02 00:52:28,674 : INFO : training model with 3 workers on 7586 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:52:28,714 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:28,715 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:28,716 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:28,717 : INFO : EPOCH - 1 : training on 61338 raw words (43258 effective words) took 0.0s, 1093892 effective words/s
2020-08-02 00:52:28,757 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:28,758 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:28,759 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:28,759 : INFO : EPOCH - 2 : training on 61338 raw words (43140 effective words) took 0.0s, 1103686

2020-08-02 00:52:30,835 : INFO : EPOCH - 1 : training on 43082 raw words (31076 effective words) took 0.0s, 1067990 effective words/s
2020-08-02 00:52:30,867 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:30,868 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:30,869 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:30,870 : INFO : EPOCH - 2 : training on 43082 raw words (31091 effective words) took 0.0s, 1006986 effective words/s
2020-08-02 00:52:30,900 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:30,901 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:30,902 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:30,903 : INFO : EPOCH - 3 : training on 43082 raw words (31080 effective words) took 0.0s, 1080716 effective words/s
2020-08-02 00:52:30,903 : INFO : training on a 12924

2020-08-02 00:52:32,731 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:32,733 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:32,733 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:32,734 : INFO : EPOCH - 3 : training on 47136 raw words (34392 effective words) took 0.0s, 1074397 effective words/s
2020-08-02 00:52:32,735 : INFO : training on a 141408 raw words (103185 effective words) took 0.1s, 929536 effective words/s
2020-08-02 00:52:32,735 : INFO : collecting all words and their counts
2020-08-02 00:52:32,736 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:52:32,741 : INFO : collected 3736 word types from a corpus of 47136 raw words and 100 sentences
2020-08-02 00:52:32,742 : INFO : Updating model with new vocabulary
2020-08-02 00:52:32,745 : INFO : New added 1158 unique words (23% of original 4894) and increased the count of 1158 pre-

2020-08-02 00:52:34,640 : INFO : collecting all words and their counts
2020-08-02 00:52:34,641 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:52:34,647 : INFO : collected 4060 word types from a corpus of 49383 raw words and 100 sentences
2020-08-02 00:52:34,648 : INFO : Updating model with new vocabulary
2020-08-02 00:52:34,650 : INFO : New added 1200 unique words (22% of original 5260) and increased the count of 1200 pre-existing words (22% of original 5260)
2020-08-02 00:52:34,657 : INFO : deleting the raw counts dictionary of 4060 items
2020-08-02 00:52:34,658 : INFO : sample=0.001 downsamples 124 most-common words
2020-08-02 00:52:34,658 : INFO : downsampling leaves estimated 65789 word corpus (147.0% of prior 44747)
2020-08-02 00:52:34,670 : INFO : estimated required memory for 2400 words and 200 dimensions: 5040000 bytes
2020-08-02 00:52:34,671 : INFO : updating layer weights
2020-08-02 00:52:34,674 : INFO : training model with 3 workers 

2020-08-02 00:52:36,611 : INFO : deleting the raw counts dictionary of 3832 items
2020-08-02 00:52:36,612 : INFO : sample=0.001 downsamples 130 most-common words
2020-08-02 00:52:36,613 : INFO : downsampling leaves estimated 64759 word corpus (146.1% of prior 44337)
2020-08-02 00:52:36,624 : INFO : estimated required memory for 2434 words and 200 dimensions: 5111400 bytes
2020-08-02 00:52:36,625 : INFO : updating layer weights
2020-08-02 00:52:36,634 : INFO : training model with 3 workers on 7666 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:52:36,748 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:36,749 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:36,753 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:36,754 : INFO : EPOCH - 1 : training on 48788 raw words (35567 effective words) took 0.1s, 306766 effective words/s
2020-08-02 00:

2020-08-02 00:52:38,602 : INFO : training model with 3 workers on 7681 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:52:38,720 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:38,722 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:38,726 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:38,727 : INFO : EPOCH - 1 : training on 48671 raw words (35554 effective words) took 0.1s, 296492 effective words/s
2020-08-02 00:52:38,852 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:38,854 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:38,859 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:38,860 : INFO : EPOCH - 2 : training on 48671 raw words (35623 effective words) took 0.1s, 273594 effective words/s
2020-08-02 00:52:38,982 : INFO : worker

2020-08-02 00:52:40,868 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:40,870 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:40,870 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:40,873 : INFO : EPOCH - 2 : training on 53359 raw words (39038 effective words) took 0.1s, 278707 effective words/s
2020-08-02 00:52:41,006 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:41,009 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:41,010 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:41,011 : INFO : EPOCH - 3 : training on 53359 raw words (39131 effective words) took 0.1s, 292876 effective words/s
2020-08-02 00:52:41,012 : INFO : training on a 160077 raw words (117380 effective words) took 0.4s, 281890 effective words/s
2020-08-02 00:52:41,013 : INFO : collecting all words and their

2020-08-02 00:52:43,100 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:43,101 : INFO : EPOCH - 3 : training on 54896 raw words (40618 effective words) took 0.1s, 278353 effective words/s
2020-08-02 00:52:43,102 : INFO : training on a 164688 raw words (121778 effective words) took 0.4s, 275005 effective words/s
2020-08-02 00:52:43,102 : INFO : collecting all words and their counts
2020-08-02 00:52:43,103 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:52:43,110 : INFO : collected 4660 word types from a corpus of 54896 raw words and 100 sentences
2020-08-02 00:52:43,112 : INFO : Updating model with new vocabulary
2020-08-02 00:52:43,115 : INFO : New added 1460 unique words (23% of original 6120) and increased the count of 1460 pre-existing words (23% of original 6120)
2020-08-02 00:52:43,123 : INFO : deleting the raw counts dictionary of 4660 items
2020-08-02 00:52:43,124 : INFO : sample=0.001 downsamples 122 m

2020-08-02 00:52:45,418 : INFO : collected 3843 word types from a corpus of 65663 raw words and 100 sentences
2020-08-02 00:52:45,418 : INFO : Updating model with new vocabulary
2020-08-02 00:52:45,421 : INFO : New added 1624 unique words (29% of original 5467) and increased the count of 1624 pre-existing words (29% of original 5467)
2020-08-02 00:52:45,431 : INFO : deleting the raw counts dictionary of 3843 items
2020-08-02 00:52:45,431 : INFO : sample=0.001 downsamples 110 most-common words
2020-08-02 00:52:45,432 : INFO : downsampling leaves estimated 93160 word corpus (151.2% of prior 61614)
2020-08-02 00:52:45,513 : INFO : estimated required memory for 7777 words, 54839 buckets and 200 dimensions: 61985044 bytes
2020-08-02 00:52:45,515 : INFO : updating layer weights
2020-08-02 00:52:45,627 : INFO : Number of new ngrams is 358
2020-08-02 00:52:45,669 : INFO : training model with 3 workers on 7777 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08

2020-08-02 00:52:47,838 : INFO : downsampling leaves estimated 85978 word corpus (149.7% of prior 57427)
2020-08-02 00:52:47,921 : INFO : estimated required memory for 7797 words, 54959 buckets and 200 dimensions: 62127084 bytes
2020-08-02 00:52:47,924 : INFO : updating layer weights
2020-08-02 00:52:48,037 : INFO : Number of new ngrams is 120
2020-08-02 00:52:48,079 : INFO : training model with 3 workers on 7797 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:52:48,307 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:48,310 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:48,317 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:48,318 : INFO : EPOCH - 1 : training on 62532 raw words (46662 effective words) took 0.2s, 198283 effective words/s
2020-08-02 00:52:48,550 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-

2020-08-02 00:52:50,278 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:50,286 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:50,287 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:50,288 : INFO : EPOCH - 1 : training on 43080 raw words (30512 effective words) took 0.2s, 193623 effective words/s
2020-08-02 00:52:50,437 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:50,446 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:50,447 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:50,449 : INFO : EPOCH - 2 : training on 43080 raw words (30470 effective words) took 0.2s, 197322 effective words/s
2020-08-02 00:52:50,603 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:50,613 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-0

2020-08-02 00:52:52,228 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:52,229 : INFO : EPOCH - 2 : training on 41766 raw words (29628 effective words) took 0.2s, 194930 effective words/s
2020-08-02 00:52:52,377 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:52,381 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:52,386 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:52,387 : INFO : EPOCH - 3 : training on 41766 raw words (29570 effective words) took 0.2s, 195713 effective words/s
2020-08-02 00:52:52,387 : INFO : training on a 125298 raw words (88843 effective words) took 0.5s, 188664 effective words/s
2020-08-02 00:52:52,957 : INFO : Training batch #175 
2020-08-02 00:52:52,958 : INFO : collecting all words and their counts
2020-08-02 00:52:52,959 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:52:52,96

2020-08-02 00:52:54,465 : INFO : training on a 159177 raw words (119204 effective words) took 0.6s, 192507 effective words/s
2020-08-02 00:52:55,076 : INFO : Training batch #176 
2020-08-02 00:52:55,076 : INFO : collecting all words and their counts
2020-08-02 00:52:55,077 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:52:55,086 : INFO : collected 3703 word types from a corpus of 59514 raw words and 100 sentences
2020-08-02 00:52:55,086 : INFO : Updating model with new vocabulary
2020-08-02 00:52:55,089 : INFO : New added 1266 unique words (25% of original 4969) and increased the count of 1266 pre-existing words (25% of original 4969)
2020-08-02 00:52:55,097 : INFO : deleting the raw counts dictionary of 3703 items
2020-08-02 00:52:55,098 : INFO : sample=0.001 downsamples 110 most-common words
2020-08-02 00:52:55,098 : INFO : downsampling leaves estimated 82583 word corpus (149.6% of prior 55216)
2020-08-02 00:52:55,110 : INFO : estimated requi

2020-08-02 00:52:57,361 : INFO : New added 1382 unique words (25% of original 5326) and increased the count of 1382 pre-existing words (25% of original 5326)
2020-08-02 00:52:57,369 : INFO : deleting the raw counts dictionary of 3944 items
2020-08-02 00:52:57,370 : INFO : sample=0.001 downsamples 108 most-common words
2020-08-02 00:52:57,370 : INFO : downsampling leaves estimated 81367 word corpus (150.2% of prior 54188)
2020-08-02 00:52:57,383 : INFO : estimated required memory for 2764 words and 200 dimensions: 5804400 bytes
2020-08-02 00:52:57,384 : INFO : updating layer weights
2020-08-02 00:52:57,391 : INFO : training model with 3 workers on 7890 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:52:57,432 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:57,434 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:57,435 : INFO : worker thread finished; awaiting finish of 0 mo

2020-08-02 00:52:59,618 : INFO : updating layer weights
2020-08-02 00:52:59,622 : INFO : training model with 3 workers on 7894 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:52:59,658 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:59,660 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:59,660 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:59,661 : INFO : EPOCH - 1 : training on 53287 raw words (39651 effective words) took 0.0s, 1108546 effective words/s
2020-08-02 00:52:59,699 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:52:59,700 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:52:59,701 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:52:59,702 : INFO : EPOCH - 2 : training on 53287 raw words (39630 effective words) took 0.0s, 1094866

2020-08-02 00:53:01,712 : INFO : EPOCH - 1 : training on 45047 raw words (32233 effective words) took 0.0s, 1027199 effective words/s
2020-08-02 00:53:01,745 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:01,746 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:01,747 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:01,747 : INFO : EPOCH - 2 : training on 45047 raw words (32206 effective words) took 0.0s, 1032800 effective words/s
2020-08-02 00:53:01,780 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:01,781 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:01,782 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:01,783 : INFO : EPOCH - 3 : training on 45047 raw words (32220 effective words) took 0.0s, 1039822 effective words/s
2020-08-02 00:53:01,783 : INFO : training on a 13514

2020-08-02 00:53:03,649 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:03,650 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:03,651 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:03,652 : INFO : EPOCH - 3 : training on 49820 raw words (35637 effective words) took 0.0s, 1108438 effective words/s
2020-08-02 00:53:03,652 : INFO : training on a 149460 raw words (106914 effective words) took 0.1s, 946063 effective words/s
2020-08-02 00:53:03,653 : INFO : collecting all words and their counts
2020-08-02 00:53:03,655 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:53:03,661 : INFO : collected 4470 word types from a corpus of 49820 raw words and 100 sentences
2020-08-02 00:53:03,662 : INFO : Updating model with new vocabulary
2020-08-02 00:53:03,665 : INFO : New added 1235 unique words (21% of original 5705) and increased the count of 1235 pre-

2020-08-02 00:53:05,666 : INFO : collecting all words and their counts
2020-08-02 00:53:05,667 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:53:05,673 : INFO : collected 4195 word types from a corpus of 51578 raw words and 100 sentences
2020-08-02 00:53:05,674 : INFO : Updating model with new vocabulary
2020-08-02 00:53:05,677 : INFO : New added 1148 unique words (21% of original 5343) and increased the count of 1148 pre-existing words (21% of original 5343)
2020-08-02 00:53:05,685 : INFO : deleting the raw counts dictionary of 4195 items
2020-08-02 00:53:05,685 : INFO : sample=0.001 downsamples 150 most-common words
2020-08-02 00:53:05,686 : INFO : downsampling leaves estimated 66045 word corpus (141.9% of prior 46545)
2020-08-02 00:53:05,699 : INFO : estimated required memory for 2296 words and 200 dimensions: 4821600 bytes
2020-08-02 00:53:05,700 : INFO : updating layer weights
2020-08-02 00:53:05,708 : INFO : training model with 3 workers 

2020-08-02 00:53:07,672 : INFO : deleting the raw counts dictionary of 4590 items
2020-08-02 00:53:07,673 : INFO : sample=0.001 downsamples 144 most-common words
2020-08-02 00:53:07,674 : INFO : downsampling leaves estimated 61592 word corpus (143.5% of prior 42929)
2020-08-02 00:53:07,685 : INFO : estimated required memory for 2578 words and 200 dimensions: 5413800 bytes
2020-08-02 00:53:07,686 : INFO : updating layer weights
2020-08-02 00:53:07,690 : INFO : training model with 3 workers on 7964 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:53:07,812 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:07,815 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:07,818 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:07,819 : INFO : EPOCH - 1 : training on 48519 raw words (34795 effective words) took 0.1s, 288151 effective words/s
2020-08-02 00:

2020-08-02 00:53:09,621 : INFO : training model with 3 workers on 7976 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:53:09,725 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:09,727 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:09,728 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:09,729 : INFO : EPOCH - 1 : training on 43046 raw words (30212 effective words) took 0.1s, 295393 effective words/s
2020-08-02 00:53:09,838 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:09,839 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:09,841 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:09,842 : INFO : EPOCH - 2 : training on 43046 raw words (30245 effective words) took 0.1s, 277622 effective words/s
2020-08-02 00:53:09,951 : INFO : worker

2020-08-02 00:53:11,611 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:11,613 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:11,617 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:11,617 : INFO : EPOCH - 2 : training on 42175 raw words (29312 effective words) took 0.1s, 280088 effective words/s
2020-08-02 00:53:11,720 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:11,722 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:11,725 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:11,726 : INFO : EPOCH - 3 : training on 42175 raw words (29249 effective words) took 0.1s, 276054 effective words/s
2020-08-02 00:53:11,727 : INFO : training on a 126525 raw words (87757 effective words) took 0.3s, 274382 effective words/s
2020-08-02 00:53:11,728 : INFO : collecting all words and their 

2020-08-02 00:53:13,610 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:13,611 : INFO : EPOCH - 3 : training on 45066 raw words (31863 effective words) took 0.1s, 270989 effective words/s
2020-08-02 00:53:13,612 : INFO : training on a 135198 raw words (95538 effective words) took 0.4s, 265618 effective words/s
2020-08-02 00:53:13,612 : INFO : collecting all words and their counts
2020-08-02 00:53:13,613 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:53:13,620 : INFO : collected 4762 word types from a corpus of 45066 raw words and 100 sentences
2020-08-02 00:53:13,621 : INFO : Updating model with new vocabulary
2020-08-02 00:53:13,625 : INFO : New added 1206 unique words (20% of original 5968) and increased the count of 1206 pre-existing words (20% of original 5968)
2020-08-02 00:53:13,637 : INFO : deleting the raw counts dictionary of 4762 items
2020-08-02 00:53:13,638 : INFO : sample=0.001 downsamples 186 mo

2020-08-02 00:53:15,461 : INFO : collected 4680 word types from a corpus of 44035 raw words and 100 sentences
2020-08-02 00:53:15,462 : INFO : Updating model with new vocabulary
2020-08-02 00:53:15,466 : INFO : New added 1172 unique words (20% of original 5852) and increased the count of 1172 pre-existing words (20% of original 5852)
2020-08-02 00:53:15,474 : INFO : deleting the raw counts dictionary of 4680 items
2020-08-02 00:53:15,474 : INFO : sample=0.001 downsamples 192 most-common words
2020-08-02 00:53:15,475 : INFO : downsampling leaves estimated 53907 word corpus (140.6% of prior 38343)
2020-08-02 00:53:15,557 : INFO : estimated required memory for 8010 words, 56178 buckets and 200 dimensions: 63602472 bytes
2020-08-02 00:53:15,559 : INFO : updating layer weights
2020-08-02 00:53:15,673 : INFO : Number of new ngrams is 35
2020-08-02 00:53:15,716 : INFO : training model with 3 workers on 8010 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-

2020-08-02 00:53:17,339 : INFO : estimated required memory for 8019 words, 56238 buckets and 200 dimensions: 63671036 bytes
2020-08-02 00:53:17,341 : INFO : updating layer weights
2020-08-02 00:53:17,454 : INFO : Number of new ngrams is 60
2020-08-02 00:53:17,498 : INFO : training model with 3 workers on 8019 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:53:17,636 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:17,639 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:17,641 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:17,642 : INFO : EPOCH - 1 : training on 41382 raw words (28846 effective words) took 0.1s, 203240 effective words/s
2020-08-02 00:53:17,785 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:17,788 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:17,789

2020-08-02 00:53:19,445 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:19,446 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:19,447 : INFO : EPOCH - 1 : training on 43640 raw words (30617 effective words) took 0.1s, 205591 effective words/s
2020-08-02 00:53:19,599 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:19,604 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:19,605 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:19,606 : INFO : EPOCH - 2 : training on 43640 raw words (30575 effective words) took 0.2s, 196498 effective words/s
2020-08-02 00:53:19,755 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:19,759 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:19,760 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-0

2020-08-02 00:53:21,332 : INFO : EPOCH - 2 : training on 40432 raw words (27849 effective words) took 0.1s, 198230 effective words/s
2020-08-02 00:53:21,470 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:21,471 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:21,477 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:21,477 : INFO : EPOCH - 3 : training on 40432 raw words (27887 effective words) took 0.1s, 196151 effective words/s
2020-08-02 00:53:21,478 : INFO : training on a 121296 raw words (83615 effective words) took 0.4s, 192193 effective words/s
2020-08-02 00:53:22,094 : INFO : Training batch #190 
2020-08-02 00:53:22,094 : INFO : collecting all words and their counts
2020-08-02 00:53:22,095 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:53:22,103 : INFO : collected 5061 word types from a corpus of 49143 raw words and 100 sentences
202

2020-08-02 00:53:23,491 : INFO : training on a 147429 raw words (105666 effective words) took 0.6s, 190457 effective words/s
2020-08-02 00:53:24,054 : INFO : Training batch #191 
2020-08-02 00:53:24,055 : INFO : collecting all words and their counts
2020-08-02 00:53:24,055 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:53:24,064 : INFO : collected 4668 word types from a corpus of 46248 raw words and 100 sentences
2020-08-02 00:53:24,064 : INFO : Updating model with new vocabulary
2020-08-02 00:53:24,068 : INFO : New added 1253 unique words (21% of original 5921) and increased the count of 1253 pre-existing words (21% of original 5921)
2020-08-02 00:53:24,077 : INFO : deleting the raw counts dictionary of 4668 items
2020-08-02 00:53:24,077 : INFO : sample=0.001 downsamples 170 most-common words
2020-08-02 00:53:24,079 : INFO : downsampling leaves estimated 58093 word corpus (143.5% of prior 40488)
2020-08-02 00:53:24,091 : INFO : estimated requi

2020-08-02 00:53:25,945 : INFO : New added 1184 unique words (19% of original 6208) and increased the count of 1184 pre-existing words (19% of original 6208)
2020-08-02 00:53:25,953 : INFO : deleting the raw counts dictionary of 5024 items
2020-08-02 00:53:25,954 : INFO : sample=0.001 downsamples 170 most-common words
2020-08-02 00:53:25,955 : INFO : downsampling leaves estimated 56194 word corpus (142.2% of prior 39511)
2020-08-02 00:53:25,968 : INFO : estimated required memory for 2368 words and 200 dimensions: 4972800 bytes
2020-08-02 00:53:25,969 : INFO : updating layer weights
2020-08-02 00:53:25,973 : INFO : training model with 3 workers on 8069 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:53:26,007 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:26,008 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:26,009 : INFO : worker thread finished; awaiting finish of 0 mo

2020-08-02 00:53:27,821 : INFO : updating layer weights
2020-08-02 00:53:27,824 : INFO : training model with 3 workers on 8074 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:53:27,855 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:27,856 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:27,857 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:27,858 : INFO : EPOCH - 1 : training on 43561 raw words (30550 effective words) took 0.0s, 1017139 effective words/s
2020-08-02 00:53:27,890 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:27,891 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:27,892 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:27,892 : INFO : EPOCH - 2 : training on 43561 raw words (30527 effective words) took 0.0s, 997384 

2020-08-02 00:53:29,719 : INFO : EPOCH - 1 : training on 42444 raw words (30335 effective words) took 0.0s, 1006763 effective words/s
2020-08-02 00:53:29,749 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:29,750 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:29,750 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:29,751 : INFO : EPOCH - 2 : training on 42444 raw words (30326 effective words) took 0.0s, 1093950 effective words/s
2020-08-02 00:53:29,783 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:29,784 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:29,785 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:29,786 : INFO : EPOCH - 3 : training on 42444 raw words (30293 effective words) took 0.0s, 954275 effective words/s
2020-08-02 00:53:29,787 : INFO : training on a 127332

2020-08-02 00:53:31,622 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:31,623 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:31,624 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:31,625 : INFO : EPOCH - 3 : training on 46854 raw words (34149 effective words) took 0.0s, 1045599 effective words/s
2020-08-02 00:53:31,625 : INFO : training on a 140562 raw words (102458 effective words) took 0.1s, 947941 effective words/s
2020-08-02 00:53:31,626 : INFO : collecting all words and their counts
2020-08-02 00:53:31,627 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:53:31,633 : INFO : collected 4287 word types from a corpus of 46854 raw words and 100 sentences
2020-08-02 00:53:31,634 : INFO : Updating model with new vocabulary
2020-08-02 00:53:31,636 : INFO : New added 1272 unique words (22% of original 5559) and increased the count of 1272 pre-

2020-08-02 00:53:33,553 : INFO : collecting all words and their counts
2020-08-02 00:53:33,553 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:53:33,559 : INFO : collected 4723 word types from a corpus of 47133 raw words and 100 sentences
2020-08-02 00:53:33,560 : INFO : Updating model with new vocabulary
2020-08-02 00:53:33,563 : INFO : New added 1259 unique words (21% of original 5982) and increased the count of 1259 pre-existing words (21% of original 5982)
2020-08-02 00:53:33,570 : INFO : deleting the raw counts dictionary of 4723 items
2020-08-02 00:53:33,572 : INFO : sample=0.001 downsamples 146 most-common words
2020-08-02 00:53:33,573 : INFO : downsampling leaves estimated 60728 word corpus (146.6% of prior 41431)
2020-08-02 00:53:33,584 : INFO : estimated required memory for 2518 words and 200 dimensions: 5287800 bytes
2020-08-02 00:53:33,585 : INFO : updating layer weights
2020-08-02 00:53:33,590 : INFO : training model with 3 workers 

2020-08-02 00:53:35,526 : INFO : deleting the raw counts dictionary of 4089 items
2020-08-02 00:53:35,527 : INFO : sample=0.001 downsamples 140 most-common words
2020-08-02 00:53:35,528 : INFO : downsampling leaves estimated 62644 word corpus (146.3% of prior 42805)
2020-08-02 00:53:35,540 : INFO : estimated required memory for 2394 words and 200 dimensions: 5027400 bytes
2020-08-02 00:53:35,541 : INFO : updating layer weights
2020-08-02 00:53:35,548 : INFO : training model with 3 workers on 8149 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:53:35,663 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:35,665 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:35,667 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:35,668 : INFO : EPOCH - 1 : training on 47569 raw words (34513 effective words) took 0.1s, 296715 effective words/s
2020-08-02 00:

2020-08-02 00:53:37,513 : INFO : training model with 3 workers on 8160 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:53:37,626 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:37,630 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:37,632 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:37,632 : INFO : EPOCH - 1 : training on 46790 raw words (33820 effective words) took 0.1s, 289367 effective words/s
2020-08-02 00:53:37,757 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:37,761 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:37,762 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:37,763 : INFO : EPOCH - 2 : training on 46790 raw words (33859 effective words) took 0.1s, 270353 effective words/s
2020-08-02 00:53:37,885 : INFO : worker

2020-08-02 00:53:39,665 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:39,667 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:39,669 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:39,670 : INFO : EPOCH - 2 : training on 44248 raw words (31596 effective words) took 0.1s, 276121 effective words/s
2020-08-02 00:53:39,781 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:39,783 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:39,785 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:39,786 : INFO : EPOCH - 3 : training on 44248 raw words (31602 effective words) took 0.1s, 284346 effective words/s
2020-08-02 00:53:39,787 : INFO : training on a 132744 raw words (94833 effective words) took 0.3s, 271986 effective words/s
2020-08-02 00:53:39,788 : INFO : collecting all words and their 

2020-08-02 00:53:41,708 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:41,708 : INFO : EPOCH - 3 : training on 46588 raw words (33711 effective words) took 0.1s, 282147 effective words/s
2020-08-02 00:53:41,709 : INFO : training on a 139764 raw words (101127 effective words) took 0.4s, 280044 effective words/s
2020-08-02 00:53:41,710 : INFO : collecting all words and their counts
2020-08-02 00:53:41,711 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:53:41,720 : INFO : collected 4672 word types from a corpus of 46588 raw words and 100 sentences
2020-08-02 00:53:41,721 : INFO : Updating model with new vocabulary
2020-08-02 00:53:41,724 : INFO : New added 1256 unique words (21% of original 5928) and increased the count of 1256 pre-existing words (21% of original 5928)
2020-08-02 00:53:41,731 : INFO : deleting the raw counts dictionary of 4672 items
2020-08-02 00:53:41,732 : INFO : sample=0.001 downsamples 158 m

2020-08-02 00:53:43,719 : INFO : collected 4509 word types from a corpus of 50722 raw words and 100 sentences
2020-08-02 00:53:43,720 : INFO : Updating model with new vocabulary
2020-08-02 00:53:43,724 : INFO : New added 1330 unique words (22% of original 5839) and increased the count of 1330 pre-existing words (22% of original 5839)
2020-08-02 00:53:43,732 : INFO : deleting the raw counts dictionary of 4509 items
2020-08-02 00:53:43,732 : INFO : sample=0.001 downsamples 132 most-common words
2020-08-02 00:53:43,733 : INFO : downsampling leaves estimated 66404 word corpus (146.1% of prior 45441)
2020-08-02 00:53:43,819 : INFO : estimated required memory for 8244 words, 57455 buckets and 200 dimensions: 65166792 bytes
2020-08-02 00:53:43,821 : INFO : updating layer weights
2020-08-02 00:53:43,943 : INFO : Number of new ngrams is 190
2020-08-02 00:53:43,988 : INFO : training model with 3 workers on 8244 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08

2020-08-02 00:53:45,733 : INFO : estimated required memory for 8252 words, 57497 buckets and 200 dimensions: 65218824 bytes
2020-08-02 00:53:45,735 : INFO : updating layer weights
2020-08-02 00:53:45,856 : INFO : Number of new ngrams is 42
2020-08-02 00:53:45,900 : INFO : training model with 3 workers on 8252 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:53:46,053 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:46,055 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:46,059 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:46,059 : INFO : EPOCH - 1 : training on 42705 raw words (30284 effective words) took 0.2s, 192989 effective words/s
2020-08-02 00:53:46,209 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:46,210 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:46,214

2020-08-02 00:53:47,842 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:47,846 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:47,846 : INFO : EPOCH - 1 : training on 42646 raw words (30290 effective words) took 0.2s, 196931 effective words/s
2020-08-02 00:53:47,998 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:48,000 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:48,004 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:48,004 : INFO : EPOCH - 2 : training on 42646 raw words (30366 effective words) took 0.2s, 197460 effective words/s
2020-08-02 00:53:48,156 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:48,157 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:48,162 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-0

2020-08-02 00:53:49,940 : INFO : EPOCH - 2 : training on 47182 raw words (34238 effective words) took 0.2s, 195637 effective words/s
2020-08-02 00:53:50,113 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:50,114 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:50,119 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:50,120 : INFO : EPOCH - 3 : training on 47182 raw words (34248 effective words) took 0.2s, 192784 effective words/s
2020-08-02 00:53:50,121 : INFO : training on a 141546 raw words (102680 effective words) took 0.5s, 192781 effective words/s
2020-08-02 00:53:50,672 : INFO : Training batch #205 
2020-08-02 00:53:50,673 : INFO : collecting all words and their counts
2020-08-02 00:53:50,674 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:53:50,681 : INFO : collected 4443 word types from a corpus of 44574 raw words and 100 sentences
20

2020-08-02 00:53:51,966 : INFO : training on a 133722 raw words (95781 effective words) took 0.5s, 192229 effective words/s
2020-08-02 00:53:52,547 : INFO : Training batch #206 
2020-08-02 00:53:52,548 : INFO : collecting all words and their counts
2020-08-02 00:53:52,548 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:53:52,556 : INFO : collected 4473 word types from a corpus of 43418 raw words and 100 sentences
2020-08-02 00:53:52,556 : INFO : Updating model with new vocabulary
2020-08-02 00:53:52,559 : INFO : New added 1103 unique words (19% of original 5576) and increased the count of 1103 pre-existing words (19% of original 5576)
2020-08-02 00:53:52,567 : INFO : deleting the raw counts dictionary of 4473 items
2020-08-02 00:53:52,568 : INFO : sample=0.001 downsamples 208 most-common words
2020-08-02 00:53:52,569 : INFO : downsampling leaves estimated 53913 word corpus (142.9% of prior 37718)
2020-08-02 00:53:52,582 : INFO : estimated requir

2020-08-02 00:53:54,438 : INFO : New added 1176 unique words (21% of original 5583) and increased the count of 1176 pre-existing words (21% of original 5583)
2020-08-02 00:53:54,447 : INFO : deleting the raw counts dictionary of 4407 items
2020-08-02 00:53:54,448 : INFO : sample=0.001 downsamples 172 most-common words
2020-08-02 00:53:54,449 : INFO : downsampling leaves estimated 57236 word corpus (144.6% of prior 39596)
2020-08-02 00:53:54,461 : INFO : estimated required memory for 2352 words and 200 dimensions: 4939200 bytes
2020-08-02 00:53:54,461 : INFO : updating layer weights
2020-08-02 00:53:54,468 : INFO : training model with 3 workers on 8325 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:53:54,501 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:54,503 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:54,503 : INFO : worker thread finished; awaiting finish of 0 mo

2020-08-02 00:53:56,394 : INFO : updating layer weights
2020-08-02 00:53:56,398 : INFO : training model with 3 workers on 8339 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:53:56,432 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:56,433 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:56,434 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:56,435 : INFO : EPOCH - 1 : training on 49026 raw words (35601 effective words) took 0.0s, 1035660 effective words/s
2020-08-02 00:53:56,470 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:56,472 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:56,472 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:56,473 : INFO : EPOCH - 2 : training on 49026 raw words (35591 effective words) took 0.0s, 1093373

2020-08-02 00:53:58,435 : INFO : EPOCH - 1 : training on 41543 raw words (29331 effective words) took 0.0s, 1064500 effective words/s
2020-08-02 00:53:58,464 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:58,466 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:58,466 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:58,467 : INFO : EPOCH - 2 : training on 41543 raw words (29363 effective words) took 0.0s, 1027970 effective words/s
2020-08-02 00:53:58,499 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:53:58,500 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:53:58,500 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:53:58,501 : INFO : EPOCH - 3 : training on 41543 raw words (29358 effective words) took 0.0s, 987206 effective words/s
2020-08-02 00:53:58,502 : INFO : training on a 124629

2020-08-02 00:54:00,312 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:00,313 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:00,314 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:00,314 : INFO : EPOCH - 3 : training on 43100 raw words (30530 effective words) took 0.0s, 989144 effective words/s
2020-08-02 00:54:00,315 : INFO : training on a 129300 raw words (91582 effective words) took 0.1s, 877415 effective words/s
2020-08-02 00:54:00,316 : INFO : collecting all words and their counts
2020-08-02 00:54:00,316 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:54:00,322 : INFO : collected 4573 word types from a corpus of 43100 raw words and 100 sentences
2020-08-02 00:54:00,323 : INFO : Updating model with new vocabulary
2020-08-02 00:54:00,326 : INFO : New added 1083 unique words (19% of original 5656) and increased the count of 1083 pre-ex

2020-08-02 00:54:02,127 : INFO : collecting all words and their counts
2020-08-02 00:54:02,128 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:54:02,134 : INFO : collected 4494 word types from a corpus of 43989 raw words and 100 sentences
2020-08-02 00:54:02,134 : INFO : Updating model with new vocabulary
2020-08-02 00:54:02,137 : INFO : New added 1206 unique words (21% of original 5700) and increased the count of 1206 pre-existing words (21% of original 5700)
2020-08-02 00:54:02,145 : INFO : deleting the raw counts dictionary of 4494 items
2020-08-02 00:54:02,145 : INFO : sample=0.001 downsamples 182 most-common words
2020-08-02 00:54:02,146 : INFO : downsampling leaves estimated 55794 word corpus (145.5% of prior 38351)
2020-08-02 00:54:02,158 : INFO : estimated required memory for 2412 words and 200 dimensions: 5065200 bytes
2020-08-02 00:54:02,159 : INFO : updating layer weights
2020-08-02 00:54:02,163 : INFO : training model with 3 workers 

2020-08-02 00:54:04,131 : INFO : deleting the raw counts dictionary of 4803 items
2020-08-02 00:54:04,132 : INFO : sample=0.001 downsamples 136 most-common words
2020-08-02 00:54:04,133 : INFO : downsampling leaves estimated 68707 word corpus (147.1% of prior 46721)
2020-08-02 00:54:04,148 : INFO : estimated required memory for 2744 words and 200 dimensions: 5762400 bytes
2020-08-02 00:54:04,148 : INFO : updating layer weights
2020-08-02 00:54:04,152 : INFO : training model with 3 workers on 8386 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:54:04,290 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:04,291 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:04,293 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:04,294 : INFO : EPOCH - 1 : training on 52416 raw words (38409 effective words) took 0.1s, 278994 effective words/s
2020-08-02 00:

2020-08-02 00:54:06,265 : INFO : training model with 3 workers on 8388 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:54:06,386 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:06,391 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:06,393 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:06,394 : INFO : EPOCH - 1 : training on 51650 raw words (38049 effective words) took 0.1s, 304030 effective words/s
2020-08-02 00:54:06,524 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:06,528 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:06,531 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:06,532 : INFO : EPOCH - 2 : training on 51650 raw words (38052 effective words) took 0.1s, 283139 effective words/s
2020-08-02 00:54:06,662 : INFO : worker

2020-08-02 00:54:08,607 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:08,608 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:08,610 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:08,611 : INFO : EPOCH - 2 : training on 46206 raw words (33659 effective words) took 0.1s, 286764 effective words/s
2020-08-02 00:54:08,728 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:08,729 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:08,731 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:08,733 : INFO : EPOCH - 3 : training on 46206 raw words (33742 effective words) took 0.1s, 287572 effective words/s
2020-08-02 00:54:08,733 : INFO : training on a 138618 raw words (101018 effective words) took 0.4s, 283036 effective words/s
2020-08-02 00:54:08,734 : INFO : collecting all words and their

2020-08-02 00:54:10,774 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:10,775 : INFO : EPOCH - 3 : training on 53900 raw words (39779 effective words) took 0.1s, 288201 effective words/s
2020-08-02 00:54:10,776 : INFO : training on a 161700 raw words (119089 effective words) took 0.4s, 277434 effective words/s
2020-08-02 00:54:10,778 : INFO : collecting all words and their counts
2020-08-02 00:54:10,778 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:54:10,786 : INFO : collected 4811 word types from a corpus of 53900 raw words and 100 sentences
2020-08-02 00:54:10,786 : INFO : Updating model with new vocabulary
2020-08-02 00:54:10,790 : INFO : New added 1427 unique words (22% of original 6238) and increased the count of 1427 pre-existing words (22% of original 6238)
2020-08-02 00:54:10,799 : INFO : deleting the raw counts dictionary of 4811 items
2020-08-02 00:54:10,800 : INFO : sample=0.001 downsamples 126 m

2020-08-02 00:54:12,884 : INFO : collected 4930 word types from a corpus of 51303 raw words and 100 sentences
2020-08-02 00:54:12,885 : INFO : Updating model with new vocabulary
2020-08-02 00:54:12,888 : INFO : New added 1313 unique words (21% of original 6243) and increased the count of 1313 pre-existing words (21% of original 6243)
2020-08-02 00:54:12,897 : INFO : deleting the raw counts dictionary of 4930 items
2020-08-02 00:54:12,898 : INFO : sample=0.001 downsamples 142 most-common words
2020-08-02 00:54:12,899 : INFO : downsampling leaves estimated 65566 word corpus (144.9% of prior 45260)
2020-08-02 00:54:12,990 : INFO : estimated required memory for 8444 words, 58533 buckets and 200 dimensions: 66492792 bytes
2020-08-02 00:54:12,991 : INFO : updating layer weights
2020-08-02 00:54:13,114 : INFO : Number of new ngrams is 135
2020-08-02 00:54:13,158 : INFO : training model with 3 workers on 8444 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08

2020-08-02 00:54:15,149 : INFO : estimated required memory for 8450 words, 58579 buckets and 200 dimensions: 66543728 bytes
2020-08-02 00:54:15,150 : INFO : updating layer weights
2020-08-02 00:54:15,270 : INFO : Number of new ngrams is 46
2020-08-02 00:54:15,315 : INFO : training model with 3 workers on 8450 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:54:15,513 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:15,518 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:15,519 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:15,520 : INFO : EPOCH - 1 : training on 55574 raw words (41172 effective words) took 0.2s, 206410 effective words/s
2020-08-02 00:54:15,724 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:15,729 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:15,730

2020-08-02 00:54:17,688 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:17,688 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:17,689 : INFO : EPOCH - 1 : training on 55921 raw words (40908 effective words) took 0.2s, 204357 effective words/s
2020-08-02 00:54:17,890 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:17,894 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:17,896 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:17,896 : INFO : EPOCH - 2 : training on 55921 raw words (41031 effective words) took 0.2s, 202285 effective words/s
2020-08-02 00:54:18,100 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:18,103 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:18,104 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-0

2020-08-02 00:54:19,738 : INFO : EPOCH - 2 : training on 41936 raw words (29906 effective words) took 0.2s, 197261 effective words/s
2020-08-02 00:54:19,881 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:19,888 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:19,889 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:19,889 : INFO : EPOCH - 3 : training on 41936 raw words (29855 effective words) took 0.1s, 200742 effective words/s
2020-08-02 00:54:19,891 : INFO : training on a 125808 raw words (89530 effective words) took 0.5s, 195966 effective words/s
2020-08-02 00:54:20,445 : INFO : Training batch #220 
2020-08-02 00:54:20,446 : INFO : collecting all words and their counts
2020-08-02 00:54:20,446 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:54:20,453 : INFO : collected 3645 word types from a corpus of 38923 raw words and 100 sentences
202

2020-08-02 00:54:21,611 : INFO : training on a 116769 raw words (81546 effective words) took 0.4s, 189837 effective words/s
2020-08-02 00:54:22,158 : INFO : Training batch #221 
2020-08-02 00:54:22,158 : INFO : collecting all words and their counts
2020-08-02 00:54:22,159 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:54:22,165 : INFO : collected 3459 word types from a corpus of 39753 raw words and 100 sentences
2020-08-02 00:54:22,166 : INFO : Updating model with new vocabulary
2020-08-02 00:54:22,169 : INFO : New added 1054 unique words (23% of original 4513) and increased the count of 1054 pre-existing words (23% of original 4513)
2020-08-02 00:54:22,176 : INFO : deleting the raw counts dictionary of 3459 items
2020-08-02 00:54:22,177 : INFO : sample=0.001 downsamples 200 most-common words
2020-08-02 00:54:22,177 : INFO : downsampling leaves estimated 50268 word corpus (140.8% of prior 35706)
2020-08-02 00:54:22,190 : INFO : estimated requir

2020-08-02 00:54:23,934 : INFO : New added 1150 unique words (21% of original 5462) and increased the count of 1150 pre-existing words (21% of original 5462)
2020-08-02 00:54:23,941 : INFO : deleting the raw counts dictionary of 4312 items
2020-08-02 00:54:23,942 : INFO : sample=0.001 downsamples 198 most-common words
2020-08-02 00:54:23,942 : INFO : downsampling leaves estimated 53535 word corpus (140.9% of prior 38000)
2020-08-02 00:54:23,955 : INFO : estimated required memory for 2300 words and 200 dimensions: 4830000 bytes
2020-08-02 00:54:23,956 : INFO : updating layer weights
2020-08-02 00:54:23,960 : INFO : training model with 3 workers on 8530 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:54:23,991 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:23,992 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:23,993 : INFO : worker thread finished; awaiting finish of 0 mo

2020-08-02 00:54:25,865 : INFO : updating layer weights
2020-08-02 00:54:25,871 : INFO : training model with 3 workers on 8552 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:54:25,902 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:25,904 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:25,904 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:25,905 : INFO : EPOCH - 1 : training on 44074 raw words (31372 effective words) took 0.0s, 1005611 effective words/s
2020-08-02 00:54:25,937 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:25,938 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:25,939 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:25,939 : INFO : EPOCH - 2 : training on 44074 raw words (31304 effective words) took 0.0s, 1025763

2020-08-02 00:54:27,827 : INFO : EPOCH - 1 : training on 44809 raw words (31797 effective words) took 0.0s, 927585 effective words/s
2020-08-02 00:54:27,860 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:27,861 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:27,862 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:27,862 : INFO : EPOCH - 2 : training on 44809 raw words (31942 effective words) took 0.0s, 1078759 effective words/s
2020-08-02 00:54:27,895 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:27,896 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:27,897 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:27,898 : INFO : EPOCH - 3 : training on 44809 raw words (31788 effective words) took 0.0s, 984649 effective words/s
2020-08-02 00:54:27,899 : INFO : training on a 134427 

2020-08-02 00:54:29,812 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:29,813 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:29,814 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:29,815 : INFO : EPOCH - 3 : training on 47473 raw words (33952 effective words) took 0.0s, 948255 effective words/s
2020-08-02 00:54:29,816 : INFO : training on a 142419 raw words (101782 effective words) took 0.1s, 897367 effective words/s
2020-08-02 00:54:29,816 : INFO : collecting all words and their counts
2020-08-02 00:54:29,817 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:54:29,823 : INFO : collected 4498 word types from a corpus of 47473 raw words and 100 sentences
2020-08-02 00:54:29,824 : INFO : Updating model with new vocabulary
2020-08-02 00:54:29,828 : INFO : New added 1200 unique words (21% of original 5698) and increased the count of 1200 pre-e

2020-08-02 00:54:31,804 : INFO : collecting all words and their counts
2020-08-02 00:54:31,805 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:54:31,811 : INFO : collected 4354 word types from a corpus of 45218 raw words and 100 sentences
2020-08-02 00:54:31,812 : INFO : Updating model with new vocabulary
2020-08-02 00:54:31,815 : INFO : New added 1148 unique words (20% of original 5502) and increased the count of 1148 pre-existing words (20% of original 5502)
2020-08-02 00:54:31,822 : INFO : deleting the raw counts dictionary of 4354 items
2020-08-02 00:54:31,823 : INFO : sample=0.001 downsamples 172 most-common words
2020-08-02 00:54:31,823 : INFO : downsampling leaves estimated 56801 word corpus (142.2% of prior 39938)
2020-08-02 00:54:31,836 : INFO : estimated required memory for 2296 words and 200 dimensions: 4821600 bytes
2020-08-02 00:54:31,837 : INFO : updating layer weights
2020-08-02 00:54:31,840 : INFO : training model with 3 workers 

2020-08-02 00:54:33,728 : INFO : deleting the raw counts dictionary of 4812 items
2020-08-02 00:54:33,729 : INFO : sample=0.001 downsamples 152 most-common words
2020-08-02 00:54:33,730 : INFO : downsampling leaves estimated 61861 word corpus (143.6% of prior 43090)
2020-08-02 00:54:33,743 : INFO : estimated required memory for 2548 words and 200 dimensions: 5350800 bytes
2020-08-02 00:54:33,745 : INFO : updating layer weights
2020-08-02 00:54:33,752 : INFO : training model with 3 workers on 8589 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:54:33,870 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:33,872 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:33,873 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:33,874 : INFO : EPOCH - 1 : training on 48820 raw words (35145 effective words) took 0.1s, 297270 effective words/s
2020-08-02 00:

2020-08-02 00:54:35,708 : INFO : training model with 3 workers on 8596 vocabulary and 200 features, using sg=1 hs=0 sample=0.001 negative=5 window=7
2020-08-02 00:54:35,807 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:35,812 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:35,814 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:35,816 : INFO : EPOCH - 1 : training on 43031 raw words (30139 effective words) took 0.1s, 289383 effective words/s
2020-08-02 00:54:35,922 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:35,927 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:35,928 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:35,929 : INFO : EPOCH - 2 : training on 43031 raw words (30317 effective words) took 0.1s, 274211 effective words/s
2020-08-02 00:54:36,033 : INFO : worker

2020-08-02 00:54:37,816 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:37,818 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:37,820 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:37,820 : INFO : EPOCH - 2 : training on 44458 raw words (31364 effective words) took 0.1s, 277857 effective words/s
2020-08-02 00:54:37,935 : INFO : worker thread finished; awaiting finish of 2 more threads
2020-08-02 00:54:37,937 : INFO : worker thread finished; awaiting finish of 1 more threads
2020-08-02 00:54:37,939 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:37,940 : INFO : EPOCH - 3 : training on 44458 raw words (31528 effective words) took 0.1s, 273433 effective words/s
2020-08-02 00:54:37,941 : INFO : training on a 133374 raw words (94450 effective words) took 0.4s, 269582 effective words/s
2020-08-02 00:54:37,942 : INFO : collecting all words and their 

2020-08-02 00:54:39,827 : INFO : worker thread finished; awaiting finish of 0 more threads
2020-08-02 00:54:39,827 : INFO : EPOCH - 3 : training on 45580 raw words (32638 effective words) took 0.1s, 284042 effective words/s
2020-08-02 00:54:39,828 : INFO : training on a 136740 raw words (98169 effective words) took 0.4s, 273537 effective words/s
2020-08-02 00:54:39,829 : INFO : collecting all words and their counts
2020-08-02 00:54:39,830 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2020-08-02 00:54:39,837 : INFO : collected 4266 word types from a corpus of 45580 raw words and 100 sentences
2020-08-02 00:54:39,838 : INFO : Updating model with new vocabulary
2020-08-02 00:54:39,842 : INFO : New added 1174 unique words (21% of original 5440) and increased the count of 1174 pre-existing words (21% of original 5440)
2020-08-02 00:54:39,849 : INFO : deleting the raw counts dictionary of 4266 items
2020-08-02 00:54:39,849 : INFO : sample=0.001 downsamples 170 mo

2020-08-02 00:54:41,458 : INFO : collected 3518 word types from a corpus of 27315 raw words and 60 sentences
2020-08-02 00:54:41,459 : INFO : Updating model with new vocabulary
2020-08-02 00:54:41,462 : INFO : New added 819 unique words (18% of original 4337) and increased the count of 819 pre-existing words (18% of original 4337)
2020-08-02 00:54:41,466 : INFO : deleting the raw counts dictionary of 3518 items
2020-08-02 00:54:41,467 : INFO : sample=0.001 downsamples 192 most-common words
2020-08-02 00:54:41,468 : INFO : downsampling leaves estimated 31499 word corpus (137.4% of prior 22918)
2020-08-02 00:54:41,557 : INFO : estimated required memory for 8609 words, 59531 buckets and 200 dimensions: 67673868 bytes
2020-08-02 00:54:41,559 : INFO : updating layer weights
2020-08-02 00:54:41,682 : INFO : Number of new ngrams is 17
2020-08-02 00:54:41,727 : INFO : training model with 3 workers on 8609 vocabulary and 200 features, using sg=0 hs=0 sample=0.001 negative=5 window=7
2020-08-02 

In [29]:
for c, s in zip(cbow.wv.most_similar(['engineer']), skip_gram.wv.most_similar(['engineer'])):
    print(c, s)

('tester', 0.6862378120422363) ('devops', 0.5889719724655151)
('developer', 0.6647790670394897) ('tester', 0.5448418259620667)
('technician', 0.6424199342727661) ('fpga', 0.5426498651504517)
('programmer', 0.6294628381729126) ('opto', 0.5265384912490845)
('sr', 0.6243979334831238) ('atg', 0.5201519727706909)
('administrator', 0.6040023565292358) ('sqa', 0.5168964862823486)
('designer', 0.6038491725921631) ('murata', 0.5127512216567993)
('architect', 0.5957659482955933) ('mulesoft', 0.5110695958137512)
('scientist', 0.5900628566741943) ('routercim', 0.5072110891342163)
('analyst', 0.5836350917816162) ('algorithm', 0.5070369839668274)


  if np.issubdtype(vec.dtype, np.int):


# Train occupation classifier

In [14]:
onet = Onet()
onet.print_summary_stats()

2020-08-02 16:41:28,196 : INFO : Manual build specified. Building O*NET CompetencyOntology via direct querying from O*NET site, or local cache.
2020-08-02 16:41:28,197 : INFO : Processing Content Model Reference
2020-08-02 16:41:29,778 : INFO : Processing occupation data
2020-08-02 16:41:31,138 : INFO : Processing Knowledge, Skills, Abilities
2020-08-02 16:42:03,946 : INFO : Processing tools and technology


Ontology summary statistics for onet
Num competencies: 32030
Num occupations: 1133
Num competency-occupation edges: 107305
Median occupations per competency: 1
Median competencies per occupation: 89
Mean occupations per competency: 3.350245090386837
Mean competencies per occupation: 94.70873786407768


In [38]:
class trainJobPostingParser(object):
    def __init__(self):
#         fname = '800_sample.json'
        fname = '8k_sample.json'
        f = open(fname, 'r')
        self.lines = f.read().split('\n')
        self.transformer = VirginiaTransformer(partner_id = 'VA')
        
    def __iter__(self) -> JobPostingGeneratorType:
        for line in self.lines:
            if line:
                yield self.transformer._transform(json.loads(line))

job_postings_train = trainJobPostingParser()

In [39]:
# target variable
full_soc = FullSOC(onet_cache=onet)

# design matrix
document_schema_fields = ['description','experienceRequirements', 'qualifications', 'skills']

dataset = itershuffle(job_postings_train)
train = islice(dataset, 0, 8000)
test =islice(dataset, 8000)

pipe_x = IterablePipeline(
    partial(nlp.fields_join, document_schema_fields=document_schema_fields),
    nlp.clean_str,
    nlp.word_tokenize,
    partial(nlp.vectorize, embedding_model=fasttext)
)

pipe_y = IterablePipeline(
    full_soc.transformer
)        


matrix = DesignMatrix(
    train, 
    full_soc,
    pipe_x,
    pipe_y,
)

# occupation classifier trainer
grid_config = {
                 'sklearn.ensemble.ExtraTreesClassifier': {
                     'n_estimators': [50, 100],
                     'criterion': ['entropy', 'gini'],
                     'max_depth': [20],
                     'max_features': ['log2'],
                     'min_samples_split': [10]
                      },
                 'sklearn.neural_network.MLPClassifier': {
                    'hidden_layer_sizes': [100, 500],
                     'activation': ['logistic', 'relu'],
                     'solver': ['adam']
                     },
                 }

cls_trainer = OccupationClassifierTrainer(
    matrix=matrix,
    k_folds=2,
    grid_config=grid_config,
    storage=FSStore('tmp/model_cache/soc_classifiers/monster'),
    n_jobs=14)


In [40]:
cls_trainer.train(save=False)



2020-08-02 11:19:07,879 : INFO : Start training 2020-08-02T11:18:59.715516
2020-08-02 11:19:07,881 : INFO : Building matrix
2020-08-02 11:19:33,997 : INFO : total jobpostings: 8000
2020-08-02 11:19:33,998 : INFO : filtered jobpostings: 8000
2020-08-02 11:19:33,998 : INFO : dropped jobposting: 0.0
2020-08-02 11:19:34,010 : INFO : training ExtraTreesClassifier
2020-08-02 11:19:34,011 : INFO : Creating model hash from unique data {'className': 'ExtraTreesClassifier', 'parameters': {'n_estimators': [50, 100], 'criterion': ['entropy', 'gini'], 'max_depth': [20], 'max_features': ['log2'], 'min_samples_split': [10]}, 'project_path': 'tmp/model_cache/soc_classifiers/monster', 'training_metadata': {'pipe_X': ['partial(func, *args, **keywords) - new function with partial application\n    of the given arguments and keywords.\n', '\n    Args:\n        text: A unicode string\n    Returns:\n        str: lowercased, sans punctuation, non-English letters\n    ', '\n    Args:\n        text (str): a uni

In [41]:
cls_trainer.best_estimators



[<ProxyObjectWithStorage at 0x7fe867213830 for GridSearchCV at 0x7fe880c3d198>,
 <ProxyObjectWithStorage at 0x7fe8672137d8 for GridSearchCV at 0x7fe880b0d208>]

## Evaluations

In [42]:
steps = [
    partial(nlp.fields_join, document_schema_fields=document_schema_fields),
    nlp.normalize,
    nlp.clean_str,
    nlp.word_tokenize,
]

evaluators = []
test_data = list(test)
for cls in cls_trainer.best_estimators:
    tester = OccupationClassifierTester(
        test_data_generator=job_postings_train, 
        preprocessing=steps, 
        classifier=CombinedClassifier(fasttext, cls)
    )
    evaluators.append(OnetOccupationClassificationEvaluator(tester))

  if __name__ == '__main__':


In [43]:
for e, c in zip(evaluators, cls_trainer.best_estimators):
    print(c.best_estimator_)
    print('accuracy: ', e.accuracy)
    print('precision: ', e.precision)
    print('f1: ', e.f1)
    print('major group: ', e.accuracy_major_group)
    print('macro precision: ', e.macro_precision)
    print('micro precision: ', e.micro_precision)
    print('recall: ', e.recall)
    print('macro recall: ', e.macro_recall)
    print('micro recall: ', e.micro_recall)
    print('macro f1: ', e.macro_f1)
    print('micro f1: ', e.micro_f1)
    print('\n')

ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='entropy',
           max_depth=20, max_features='log2', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=10,
           min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=14,
           oob_score=False, random_state=None, verbose=0, warm_start=False)
accuracy:  1.0
precision:  [1. 1. 1. ... 1. 1. 1.]
f1:  [1. 1. 1. ... 1. 1. 1.]
major group:  1.0
macro precision:  1.0
micro precision:  1.0
recall:  [1. 1. 1. ... 1. 1. 1.]
macro recall:  1.0
micro recall:  1.0
macro f1:  1.0
micro f1:  1.0


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=500, learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shu

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


# Generate Job posting corpus for soc prediction

In [44]:
monster_220k.describe()

Unnamed: 0,country,country_code,date_added,has_expired,job_board,job_description,job_title,job_type,location,organization,page_url,salary,sector,uniq_id
count,22000,22000,122,22000,22000,22000,22000,20372,22000,15133,22000,3446,16806,22000
unique,1,1,77,1,1,18744,18749,39,8423,738,22000,1737,163,22000
top,United States of America,US,9/22/16,No,jobs.monster.com,12N Horizontal Construction Engineers Job Desc...,Monster,Full Time,"Dallas, TX",Healthcare Services,http://jobview.monster.com/Registered-Nurse-1-...,"40,000.00 - 100,000.00 $ /year",Experienced (Non-Manager),89706a078c37b0f70cec1dc93cff86db
freq,22000,22000,6,22000,22000,104,318,6757,646,1919,1,50,4594,1


In [45]:
# job_postings_generator = JobPostingCollectionSample()
corpus = CorpusCreator(job_postings)
corpus_creator = SimpleCorpusCreator()
common_classifier = CombinedClassifier(fasttext, cls)
# predicted_soc = common_classifier.predict_soc(corpus_creator._transform(sample_600))

soc_list = []
for job_posting in job_postings:
    predicted_soc = common_classifier.predict_soc(corpus_creator._transform(job_posting))
    soc_list.append(predicted_soc[0][0])

In [47]:
len(soc_list[1160:])

22000

# Merge with dataframe

In [48]:
print(monster_220k.columns)
monster_220k.head()

Index(['country', 'country_code', 'date_added', 'has_expired', 'job_board',
       'job_description', 'job_title', 'job_type', 'location', 'organization',
       'page_url', 'salary', 'sector', 'uniq_id'],
      dtype='object')


Unnamed: 0,country,country_code,date_added,has_expired,job_board,job_description,job_title,job_type,location,organization,page_url,salary,sector,uniq_id
0,United States of America,US,,No,jobs.monster.com,TeamSoft is seeing an IT Support Specialist to...,IT Support Technician Job in Madison,Full Time Employee,"Madison, WI 53702",,http://jobview.monster.com/it-support-technici...,,IT/Software Development,11d599f229a80023d2f40e7c52cd941e
1,United States of America,US,,No,jobs.monster.com,The Wisconsin State Journal is seeking a flexi...,Business Reporter/Editor Job in Madison,Full Time,"Madison, WI 53708",Printing and Publishing,http://jobview.monster.com/business-reporter-e...,,,e4cbb126dabf22159aff90223243ff2a
2,United States of America,US,,No,jobs.monster.com,Report this job About the Job DePuy Synthes Co...,Johnson & Johnson Family of Companies Job Appl...,"Full Time, Employee",DePuy Synthes Companies is a member of Johnson...,Personal and Household Services,http://jobview.monster.com/senior-training-lea...,,,839106b353877fa3d896ffb9c1fe01c0
3,United States of America,US,,No,jobs.monster.com,Why Join Altec? If you’re considering a career...,Engineer - Quality Job in Dixon,Full Time,"Dixon, CA",Altec Industries,http://jobview.monster.com/engineer-quality-jo...,,Experienced (Non-Manager),58435fcab804439efdcaa7ecca0fd783
4,United States of America,US,,No,jobs.monster.com,Position ID# 76162 # Positions 1 State CT C...,Shift Supervisor - Part-Time Job in Camphill,Full Time Employee,"Camphill, PA",Retail,http://jobview.monster.com/shift-supervisor-pa...,,Project/Program Management,64d0272dc8496abfd9523a8df63c184c


In [49]:
col_toDrop = ['country', 'date_added', 'has_expired', 'job_board', 'salary', 'uniq_id', 'page_url']
monster_220k.drop(col_toDrop, axis=1, inplace=True)

In [50]:
monster_220k['soc_code'] = soc_list[1160:]

In [51]:
import pickle

dirPData = ''
f_name = dirPData + 'df_output.pickle'
with open(f_name, "wb") as f:
    pickle.dump(monster_220k, f)

In [52]:
len(list(set(soc_list)))

98

# Skills extraction

## Reopen and parse job postings data

In [9]:
dirPData = '../output/'
f_name = dirPData + 'df_output.pickle'

with open(f_name, "rb") as f:
    dict_ = pickle.load(f)

monster_220k = dict_
del f_name, dict_


In [12]:
monster_220k.head()

Unnamed: 0,country_code,job_description,job_title,job_type,location,organization,sector,soc_code,pattern,exact_match,soc_match,skills
0,US,TeamSoft is seeing an IT Support Specialist to...,IT Support Technician Job in Madison,Full Time Employee,"Madison, WI 53702",,IT/Software Development,19-1099.00,,,,
1,US,The Wisconsin State Journal is seeking a flexi...,Business Reporter/Editor Job in Madison,Full Time,"Madison, WI 53708",Printing and Publishing,,19-1099.00,,,,
2,US,Report this job About the Job DePuy Synthes Co...,Johnson & Johnson Family of Companies Job Appl...,"Full Time, Employee",DePuy Synthes Companies is a member of Johnson...,Personal and Household Services,,17-2199.05,,,,
3,US,Why Join Altec? If you’re considering a career...,Engineer - Quality Job in Dixon,Full Time,"Dixon, CA",Altec Industries,Experienced (Non-Manager),11-9039.00,,,,
4,US,Position ID# 76162 # Positions 1 State CT C...,Shift Supervisor - Part-Time Job in Camphill,Full Time Employee,"Camphill, PA",Retail,Project/Program Management,15-1199.00,,,,


In [19]:
fname = '../data/50_sample.json'
f = open(fname, 'r')
data = f.read().split('\n')[0]
schema = json.loads(data)
del f, data, fname

In [20]:
df_json = []
for i in range(monster_220k.shape[0]):
    occupation = copy.deepcopy(schema)
    occupation['hiringOrganization']['location'] = monster_220k['location'][i]
    occupation['title'] = monster_220k['job_title'][i]
    occupation['jobDescription'] = monster_220k['job_description'][i].split('\n')
    occupation['normalizedTitle']['onetCode'] = monster_220k['soc_code'][i]
    df_json.append(occupation)
    
with open("../output/monster_soc_output.json", "w") as write_file:
    for i in df_json:
        json.dump(i, write_file)
        write_file.write('\n')

In [21]:
## reads json data file

JobPostingType = Dict[Text, Any]
JobPostingGeneratorType = Generator[JobPostingType, None, None]
MetadataType = Dict[Text, Dict[Text, Any]]

class monsterSOCOutputParser(object):
    def __init__(self):
        fname = '../output/monster_soc_output.json'
        f = open(fname, 'r')
        self.lines = f.read().split('\n')
        self.transformer = VirginiaTransformer(partner_id = 'VA')
        
    def __iter__(self) -> JobPostingGeneratorType:
        for line in self.lines:
            if line:
                yield self.transformer._transform(json.loads(line))
    
    @property
    def metadata(self) -> MetadataType:
        return {'job postings': {
            'downloaded_from': 'jobs.monster.com',
            'month': '2020-08',
            'purpose': 'monster_analysis'
        }}
    
job_postings = monsterSOCOutputParser()

In [22]:
monster_220k['pattern'] = None
monster_220k['exact_match'] = None
monster_220k['soc_match'] = None
monster_220k['skills'] = None

In [23]:
skill_extractor_p = SkillEndingPatternExtractor(only_bulleted_lines=False)
skill_extractor_e = ExactMatchSkillExtractor(onet.competency_framework)
skill_extractor_s = SocScopedExactMatchSkillExtractor(onet)

counter = 0

for job_posting in job_postings:
    
    pattern_dict = {}   
    em_dict = {}
    soc_dict = {}
    
    skills = []
    
    for candidate_skill in skill_extractor_p.candidate_skills(job_posting):
        pattern_dict[candidate_skill.skill_name] = candidate_skill.context
        skills.append(candidate_skill.skill_name)
    monster_220k['pattern'][counter] = pattern_dict
    
    for candidate_skill in skill_extractor_e.candidate_skills(job_posting):
        em_dict[candidate_skill.skill_name] = candidate_skill.context
        skills.append(candidate_skill.skill_name)
    monster_220k['exact_match'][counter] = em_dict
    
    for candidate_skill in skill_extractor_s.candidate_skills(job_posting):
        soc_dict[candidate_skill.skill_name] = candidate_skill.context
        skills.append(candidate_skill.skill_name)
    monster_220k['soc_match'][counter] = em_dict
    
    monster_220k['skills'][counter] = list(set(skills))
    counter += 1

2020-08-02 16:51:30,379 : INFO : Found 32029 entries for lookup
2020-08-02 16:51:31,582 : INFO : Found 32029 entries for lookup
2020-08-02 16:51:32,727 : INFO : Yielding candidate skill communication skills in context College degree preferred, 2 - 5 years experience in print and / or online advertising sales and be able to show consistent sales results in previous positions, Knowledge of the IT industry is preferred, Track record of creativity in sales approaches and solutions, Track record of successfully meeting and exceeding sales goals in media sales relevant to 1105 Medias line of business, Excellent client presentation and communication skills as well as strong customer service and organizational skills, The ideal candidate is energetic, self - motivated, team - oriented, and customer - centric, Understanding of how to research potential customers and use online analytics from a sales perspective, Weekly local travel to meet with clients / prospects is required, Minimal non local

2020-08-02 16:51:32,995 : INFO : Yielding candidate skill communication skills in context Inc. ( 6029) Job FunctionQuality ( Generalist) Report Previous Next {" Caption ":""," Images ":[{" Src ":" http :// images. monster. com / mm / xjjsonx / cjt1 / Carousel - 5. png "," Title ": null ," Text ": null ," Caption ": null },{" Src ":" http :// images. monster. com / mm / xjjsonx / cjt1 / Carousel - 1. png "," Title ": null ," Text ": null ," Caption ": null },{" Src ":" http :// images. monster. com / mm / xjjsonx / cjt1 / Carousel - 2. png "," Title ": null ," Text ": null ," Caption ": null },{" Src ":" http :// images. monster. com / mm / xjjsonx / cjt1 / Carousel - 3. png "," Title ": null ," Text ": null ," Caption ": null },{" Src ":" http :// images. monster. com / mm / xjjsonx / cjt1 / Carousel - 4. png "," Title ": null ," Text ": null ," Caption ": null}]} ‹ › × College degree preferred, 2 - 5 years experience in print and / or online advertising sales and be able to show consi

# Save dataframe

In [24]:
monster_220k.head()

Unnamed: 0,country_code,job_description,job_title,job_type,location,organization,sector,soc_code,pattern,exact_match,soc_match,skills
0,US,TeamSoft is seeing an IT Support Specialist to...,IT Support Technician Job in Madison,Full Time Employee,"Madison, WI 53702",,IT/Software Development,19-1099.00,{'communication skills': 'College degree prefe...,{'call tracking software': 'Required Skills:• ...,{'call tracking software': 'Required Skills:• ...,"[microsoft office, organizational skills, call..."
1,US,The Wisconsin State Journal is seeking a flexi...,Business Reporter/Editor Job in Madison,Full Time,"Madison, WI 53708",Printing and Publishing,,19-1099.00,{'communication skills': 'As part of your onli...,{'platforms': 'Candidates must have strong new...,{'platforms': 'Candidates must have strong new...,"[platforms, self, organizational skills, commu..."
2,US,Report this job About the Job DePuy Synthes Co...,Johnson & Johnson Family of Companies Job Appl...,"Full Time, Employee",DePuy Synthes Companies is a member of Johnson...,Personal and Household Services,,17-2199.05,{'coaching skills': '• Strong leadership and c...,{'design': 'Directs staff in the design and ad...,{'design': 'Directs staff in the design and ad...,"[.• excellent communication skills, levels, co..."
3,US,Why Join Altec? If you’re considering a career...,Engineer - Quality Job in Dixon,Full Time,"Dixon, CA",Altec Industries,Experienced (Non-Manager),11-9039.00,{'verbal communication skills': 'MAJOR RESPONS...,{'design': 'MAJOR RESPONSIBILITIES:• Learns Al...,{'design': 'MAJOR RESPONSIBILITIES:• Learns Al...,"[rules, telecommunications, persuasion, skill,..."
4,US,Position ID# 76162 # Positions 1 State CT C...,Shift Supervisor - Part-Time Job in Camphill,Full Time Employee,"Camphill, PA",Retail,Project/Program Management,15-1199.00,{'communication skills': 'College degree prefe...,"{'self': 'College degree preferred, 2-5 years ...","{'self': 'College degree preferred, 2-5 years ...","[self, organizational skills, communication sk..."


In [25]:
dirPData = '../output/'
f_name = dirPData + 'df_output_skills.pickle'
with open(f_name, "wb") as f:
    pickle.dump(monster_220k, f)

In [26]:
# import pickle

# dirPData = ''
# f_name = dirPData + 'df_output.pickle'
# with open(f_name, "wb") as f:
#     pickle.dump(monster_220k, f)

dirPData = '../output/'
f_name = dirPData + 'df_output_skills.pickle'

with open(f_name, "rb") as f:
    dict_ = pickle.load(f)

df_output_skills = dict_
del f_name, dict_
    

In [29]:
df_output_skills.tail()

Unnamed: 0,country_code,job_description,job_title,job_type,location,organization,sector,soc_code,pattern,exact_match,soc_match,skills
21995,US,This is a major premier Cincinnati based finan...,Assistant Vice President - Controller Job in C...,Full Time,"Cincinnati, OH",,,19-1099.00,{'strong presentation skills': 'Proven strong ...,"{'monitors': 'Monitors costs and expenses, and...","{'monitors': 'Monitors costs and expenses, and...","[rules, monitoring, levels, organizational ski..."
21996,US,Luxury homebuilder in Cincinnati seeking multi...,Accountant Job in Cincinnati,Full Time,"Cincinnati, OH 45236",Construction - Residential & Commercial/Office,Manager (Manager/Supervisor of Staff),43-3099.00,{'communication skills': 'College degree prefe...,"{'self': 'College degree preferred, 2-5 years ...","{'self': 'College degree preferred, 2-5 years ...","[self, organizational skills, communication sk..."
21997,US,RE: Adobe AEM- Client - Loca...,AEM/CQ developer Job in Chicago,Full Time,"Chicago, IL 60602",,,17-2199.05,{'communication skills': 'Please send me resum...,{'self': 'Please send me resumes to vishakha@p...,{'self': 'Please send me resumes to vishakha@p...,"[self, organizational skills, communication sk..."
21998,US,Jernberg Industries was established in 1937 an...,Electrician - Experienced Forging Electrician ...,Full Time Employee,"Chicago, IL 60609","Jernberg Industries, Inc.",Installation/Maintenance/Repair,19-1029.00,{'diagnostic skills': 'We are seeking qualifie...,{'repairing': 'We are seeking qualified Candid...,{'repairing': 'We are seeking qualified Candid...,"[monitoring, troubleshooting, repairing, press..."
21999,US,Contract AdministratorCan you be the point per...,Contract Administrator Job in Cincinnati,Full Time,"Cincinnati, OH",,Experienced (Non-Manager),19-1029.00,{'communication skills': 'College degree prefe...,"{'self': 'College degree preferred, 2-5 years ...","{'self': 'College degree preferred, 2-5 years ...","[self, organizational skills, communication sk..."
