# Notes 2 Vect

The goal of this Notebook is to figure out if we can create an embedding for each specific note.

In [54]:
#Preamble
import sys
import numpy as np
import gensim
import logging as log
import csv
import pickle as pkl
from nltk.tokenize import word_tokenize

# Logging config
log.basicConfig(format='%(levelname)s : %(message)s', level=log.INFO)

Read in the .csv notes data:

In [35]:
def load_notes():
    "Load in the notes from the .csv file"
    global notes

    file_name = 'data/disch_full.csv'
    
    with open(file_name) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        next(csv_reader) # skip the header
        notes = [row[3] for row in csv_reader]
    
    log.info("Done loading notes with length {}".format(len(notes)))

In [56]:
def load_pickle_file(name):
    "Get the value from the pickle file and set it as the variable value"
    with open("data/{}.pkl".format(name), "rb") as pickle_file:        
        globals()[name] = np.array(pkl.load(pickle_file))

In [57]:
load_pickle_file("tagged_notes")

In [36]:
#load_notes()

2018-07-23 10:29:35,786 : INFO : Done loading notes with length 59652


##  Vectors

Now that all the notes are in the `notes` variable, we can begin training for our vectors. Reference material: [Article 1](https://medium.com/@mishra.thedeepak/doc2vec-simple-implementation-example-df2afbbfbad5) and [article 2](https://radimrehurek.com/gensim/models/doc2vec.html#gensim.models.doc2vec.TaggedDocument).

In [41]:
# tagged_notes = [gensim.models.doc2vec.TaggedDocument(words=word_tokenize(_d.lower()), tags=[str(i)]) for i, _d in enumerate(notes)]

In [52]:
def save_pickle_file(name, data):
    "Save out the data as a pickle file"
    with open("data/{}.pkl".format(name), "wb") as pickle_file:
        pkl.dump(data, pickle_file)

In [55]:
#save_pickle_file("tagged_notes", tagged_notes)

In [50]:
def make_model(max_epochs = 5, vec_size = 100, alpha = 0.025):
    "Make the model for notes 2 vect"
    global model
    
    model = gensim.models.doc2vec.Doc2Vec(vector_size=vec_size,
                                            alpha=alpha, 
                                            min_alpha=0.00025,
                                            min_count=1,
                                            dm=1,
                                            workers=2
                                         )
    model.build_vocab(tagged_notes)
    
    for epoch in range(max_epochs):
        log.info('iteration {0}'.format(epoch))
        model.train(tagged_notes,
                    total_examples=model.corpus_count,
                    epochs=model.iter)
        # decrease the learning rate
        model.alpha -= 0.0002
        # fix the learning rate, no decay
        model.min_alpha = model.alpha

    model.save("models/{}_{}_notes2vect.w2v".format(vec_size, max_epochs))
    log.info("Model Saved")

In [51]:
make_model(vec_size = 100)

2018-07-23 11:09:15,541 : INFO : collecting all words and their counts
2018-07-23 11:09:15,542 : INFO : PROGRESS: at example #0, processed 0 words (0/s), 0 word types, 0 tags
2018-07-23 11:09:18,015 : INFO : PROGRESS: at example #10000, processed 14397083 words (5821759/s), 61962 word types, 10000 tags
2018-07-23 11:09:20,158 : INFO : PROGRESS: at example #20000, processed 26410472 words (5609931/s), 82262 word types, 20000 tags
2018-07-23 11:09:22,300 : INFO : PROGRESS: at example #30000, processed 39530140 words (6130853/s), 102398 word types, 30000 tags
2018-07-23 11:09:24,861 : INFO : PROGRESS: at example #40000, processed 54252106 words (5756337/s), 122660 word types, 40000 tags
2018-07-23 11:09:30,102 : INFO : PROGRESS: at example #50000, processed 70128191 words (3029775/s), 141259 word types, 50000 tags
2018-07-23 11:09:31,802 : INFO : collected 150852 word types and 59652 unique tags from a corpus of 59652 examples and 79816095 words
2018-07-23 11:09:31,802 : INFO : Loading a 

iteration 0


2018-07-23 11:09:38,823 : INFO : EPOCH 1 - PROGRESS: at 1.18% examples, 868615 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:09:39,834 : INFO : EPOCH 1 - PROGRESS: at 2.29% examples, 827667 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:09:40,840 : INFO : EPOCH 1 - PROGRESS: at 3.41% examples, 827861 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:09:41,848 : INFO : EPOCH 1 - PROGRESS: at 4.52% examples, 822974 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:09:42,851 : INFO : EPOCH 1 - PROGRESS: at 5.77% examples, 843262 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:09:43,857 : INFO : EPOCH 1 - PROGRESS: at 6.89% examples, 847358 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:09:44,859 : INFO : EPOCH 1 - PROGRESS: at 8.17% examples, 859393 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:09:45,868 : INFO : EPOCH 1 - PROGRESS: at 9.47% examples, 871067 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:09:46,868 : INFO : EPOCH 1 - PROGRESS: at 10.88% examples, 879719 words/s, in_qsize 3, out

2018-07-23 11:10:49,691 : INFO : EPOCH 2 - PROGRESS: at 3.87% examples, 947446 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:10:50,700 : INFO : EPOCH 2 - PROGRESS: at 5.28% examples, 962428 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:10:51,702 : INFO : EPOCH 2 - PROGRESS: at 6.61% examples, 973997 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:10:52,703 : INFO : EPOCH 2 - PROGRESS: at 7.98% examples, 980163 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:10:53,710 : INFO : EPOCH 2 - PROGRESS: at 9.31% examples, 981785 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:10:54,711 : INFO : EPOCH 2 - PROGRESS: at 10.77% examples, 982120 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:10:55,713 : INFO : EPOCH 2 - PROGRESS: at 12.24% examples, 981159 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:10:56,718 : INFO : EPOCH 2 - PROGRESS: at 13.68% examples, 978291 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:10:57,718 : INFO : EPOCH 2 - PROGRESS: at 15.10% examples, 977948 words/s, in_qsize 4, 

2018-07-23 11:12:00,393 : INFO : EPOCH 3 - PROGRESS: at 4.72% examples, 861093 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:12:01,406 : INFO : EPOCH 3 - PROGRESS: at 5.87% examples, 860782 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:12:02,407 : INFO : EPOCH 3 - PROGRESS: at 6.90% examples, 852194 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:12:03,420 : INFO : EPOCH 3 - PROGRESS: at 8.18% examples, 861268 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:12:04,420 : INFO : EPOCH 3 - PROGRESS: at 9.36% examples, 862972 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:12:05,425 : INFO : EPOCH 3 - PROGRESS: at 10.69% examples, 866329 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:12:06,430 : INFO : EPOCH 3 - PROGRESS: at 11.96% examples, 863185 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:12:07,434 : INFO : EPOCH 3 - PROGRESS: at 13.41% examples, 871863 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:12:08,437 : INFO : EPOCH 3 - PROGRESS: at 14.59% examples, 868214 words/s, in_qsize 3, 

2018-07-23 11:13:11,352 : INFO : EPOCH 4 - PROGRESS: at 1.07% examples, 782743 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:13:12,360 : INFO : EPOCH 4 - PROGRESS: at 2.26% examples, 820102 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:13:13,361 : INFO : EPOCH 4 - PROGRESS: at 3.54% examples, 863303 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:13:14,366 : INFO : EPOCH 4 - PROGRESS: at 4.70% examples, 854715 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:13:15,371 : INFO : EPOCH 4 - PROGRESS: at 5.84% examples, 855245 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:13:16,375 : INFO : EPOCH 4 - PROGRESS: at 6.89% examples, 847175 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:13:17,377 : INFO : EPOCH 4 - PROGRESS: at 8.20% examples, 863637 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:13:18,384 : INFO : EPOCH 4 - PROGRESS: at 9.33% examples, 859903 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:13:19,388 : INFO : EPOCH 4 - PROGRESS: at 10.76% examples, 871063 words/s, in_qsize 3, out

2018-07-23 11:14:23,209 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-07-23 11:14:23,209 : INFO : EPOCH - 4 : training on 79816095 raw words (66491709 effective words) took 72.9s, 912500 effective words/s
2018-07-23 11:14:24,214 : INFO : EPOCH 5 - PROGRESS: at 1.05% examples, 782142 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:14:25,218 : INFO : EPOCH 5 - PROGRESS: at 2.31% examples, 841011 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:14:26,220 : INFO : EPOCH 5 - PROGRESS: at 3.58% examples, 877205 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:14:27,227 : INFO : EPOCH 5 - PROGRESS: at 4.94% examples, 901527 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:14:28,236 : INFO : EPOCH 5 - PROGRESS: at 6.25% examples, 920403 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:14:29,250 : INFO : EPOCH 5 - PROGRESS: at 7.14% examples, 878080 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:14:30,255 : INFO : EPOCH 5 - PROGRESS: at 8.26% examples, 869440 words/s, in_qsize 

2018-07-23 11:15:33,000 : INFO : EPOCH - 5 : training on 79816095 raw words (66492702 effective words) took 69.8s, 952779 effective words/s
2018-07-23 11:15:33,001 : INFO : training on a 399080475 raw words (332473471 effective words) took 355.2s, 936052 effective words/s
2018-07-23 11:15:33,002 : INFO : training model with 2 workers on 150852 vocabulary and 100 features, using sg=0 hs=0 sample=0.001 negative=5 window=5


iteration 1


2018-07-23 11:15:34,016 : INFO : EPOCH 1 - PROGRESS: at 1.28% examples, 931687 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:15:35,021 : INFO : EPOCH 1 - PROGRESS: at 2.42% examples, 874618 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:15:36,028 : INFO : EPOCH 1 - PROGRESS: at 3.06% examples, 742745 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:15:37,038 : INFO : EPOCH 1 - PROGRESS: at 4.41% examples, 802666 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:15:38,047 : INFO : EPOCH 1 - PROGRESS: at 5.81% examples, 849886 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:15:39,048 : INFO : EPOCH 1 - PROGRESS: at 7.13% examples, 876102 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:15:40,059 : INFO : EPOCH 1 - PROGRESS: at 8.50% examples, 892789 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:15:41,068 : INFO : EPOCH 1 - PROGRESS: at 9.91% examples, 906663 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:15:42,100 : INFO : EPOCH 1 - PROGRESS: at 11.26% examples, 903510 words/s, in_qsize 3, out

2018-07-23 11:16:45,174 : INFO : EPOCH 2 - PROGRESS: at 1.65% examples, 596246 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:16:46,183 : INFO : EPOCH 2 - PROGRESS: at 2.78% examples, 672441 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:16:47,184 : INFO : EPOCH 2 - PROGRESS: at 3.89% examples, 711125 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:16:48,192 : INFO : EPOCH 2 - PROGRESS: at 5.07% examples, 736948 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:16:49,193 : INFO : EPOCH 2 - PROGRESS: at 6.27% examples, 768975 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:16:50,195 : INFO : EPOCH 2 - PROGRESS: at 7.67% examples, 806908 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:16:51,201 : INFO : EPOCH 2 - PROGRESS: at 9.01% examples, 830151 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:16:52,203 : INFO : EPOCH 2 - PROGRESS: at 10.39% examples, 843694 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:16:53,204 : INFO : EPOCH 2 - PROGRESS: at 11.87% examples, 856747 words/s, in_qsize 3, ou

2018-07-23 11:17:56,193 : INFO : EPOCH - 2 : training on 79816095 raw words (66495366 effective words) took 73.0s, 910390 effective words/s
2018-07-23 11:17:57,196 : INFO : EPOCH 3 - PROGRESS: at 1.18% examples, 873540 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:17:58,210 : INFO : EPOCH 3 - PROGRESS: at 2.46% examples, 891938 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:17:59,214 : INFO : EPOCH 3 - PROGRESS: at 3.66% examples, 895107 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:18:00,224 : INFO : EPOCH 3 - PROGRESS: at 4.91% examples, 890813 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:18:01,234 : INFO : EPOCH 3 - PROGRESS: at 6.14% examples, 898857 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:18:02,241 : INFO : EPOCH 3 - PROGRESS: at 7.47% examples, 917478 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:18:03,251 : INFO : EPOCH 3 - PROGRESS: at 8.85% examples, 929112 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:18:04,254 : INFO : EPOCH 3 - PROGRESS: at 10.20% examples, 931

2018-07-23 11:19:06,691 : INFO : EPOCH 4 - PROGRESS: at 2.53% examples, 919695 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:19:07,697 : INFO : EPOCH 4 - PROGRESS: at 3.89% examples, 948121 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:19:08,707 : INFO : EPOCH 4 - PROGRESS: at 5.26% examples, 957326 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:19:09,718 : INFO : EPOCH 4 - PROGRESS: at 6.40% examples, 938659 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:19:10,720 : INFO : EPOCH 4 - PROGRESS: at 7.75% examples, 949580 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:19:11,720 : INFO : EPOCH 4 - PROGRESS: at 9.10% examples, 958610 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:19:12,723 : INFO : EPOCH 4 - PROGRESS: at 10.42% examples, 950377 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:19:13,724 : INFO : EPOCH 4 - PROGRESS: at 11.89% examples, 952261 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:19:14,730 : INFO : EPOCH 4 - PROGRESS: at 13.29% examples, 950837 words/s, in_qsize 3, o

2018-07-23 11:20:17,975 : INFO : EPOCH 5 - PROGRESS: at 6.64% examples, 978499 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:20:18,978 : INFO : EPOCH 5 - PROGRESS: at 8.00% examples, 982698 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:20:19,981 : INFO : EPOCH 5 - PROGRESS: at 9.39% examples, 989701 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:20:20,986 : INFO : EPOCH 5 - PROGRESS: at 10.87% examples, 989876 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:20:21,990 : INFO : EPOCH 5 - PROGRESS: at 12.41% examples, 992664 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:20:23,000 : INFO : EPOCH 5 - PROGRESS: at 13.92% examples, 994783 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:20:24,001 : INFO : EPOCH 5 - PROGRESS: at 15.42% examples, 996914 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:20:25,015 : INFO : EPOCH 5 - PROGRESS: at 16.84% examples, 995992 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:20:26,017 : INFO : EPOCH 5 - PROGRESS: at 18.28% examples, 997765 words/s, in_qsize 4

iteration 2


2018-07-23 11:21:24,902 : INFO : EPOCH 1 - PROGRESS: at 1.19% examples, 878758 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:21:25,903 : INFO : EPOCH 1 - PROGRESS: at 2.55% examples, 934089 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:21:26,905 : INFO : EPOCH 1 - PROGRESS: at 3.89% examples, 954200 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:21:27,907 : INFO : EPOCH 1 - PROGRESS: at 5.31% examples, 971367 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:21:28,915 : INFO : EPOCH 1 - PROGRESS: at 6.61% examples, 975151 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:21:29,925 : INFO : EPOCH 1 - PROGRESS: at 7.98% examples, 980885 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:21:30,932 : INFO : EPOCH 1 - PROGRESS: at 9.26% examples, 976026 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:21:31,935 : INFO : EPOCH 1 - PROGRESS: at 10.65% examples, 972414 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:21:32,936 : INFO : EPOCH 1 - PROGRESS: at 11.93% examples, 957290 words/s, in_qsize 4, ou

2018-07-23 11:22:38,288 : INFO : EPOCH 1 - PROGRESS: at 90.50% examples, 854152 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:22:39,301 : INFO : EPOCH 1 - PROGRESS: at 91.75% examples, 855452 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:22:40,305 : INFO : EPOCH 1 - PROGRESS: at 93.15% examples, 853047 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:22:41,322 : INFO : EPOCH 1 - PROGRESS: at 97.17% examples, 853892 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:22:42,003 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-07-23 11:22:42,010 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-07-23 11:22:42,011 : INFO : EPOCH - 1 : training on 79816095 raw words (66494807 effective words) took 78.1s, 851279 effective words/s
2018-07-23 11:22:43,016 : INFO : EPOCH 2 - PROGRESS: at 1.04% examples, 766992 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:22:44,029 : INFO : EPOCH 2 - PROGRESS: at 2.17% examples, 790721 words/s, in_qsize 3, out_qsize 0
20

2018-07-23 11:23:49,489 : INFO : EPOCH 2 - PROGRESS: at 78.88% examples, 804820 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:23:50,495 : INFO : EPOCH 2 - PROGRESS: at 80.04% examples, 806671 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:23:51,501 : INFO : EPOCH 2 - PROGRESS: at 81.02% examples, 806974 words/s, in_qsize 3, out_qsize 1
2018-07-23 11:23:52,519 : INFO : EPOCH 2 - PROGRESS: at 82.08% examples, 807876 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:23:53,528 : INFO : EPOCH 2 - PROGRESS: at 83.09% examples, 808303 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:23:54,538 : INFO : EPOCH 2 - PROGRESS: at 84.19% examples, 809559 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:23:55,539 : INFO : EPOCH 2 - PROGRESS: at 85.09% examples, 808376 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:23:56,553 : INFO : EPOCH 2 - PROGRESS: at 86.30% examples, 809992 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:23:57,555 : INFO : EPOCH 2 - PROGRESS: at 87.25% examples, 808804 words/s, in_qsiz

2018-07-23 11:25:00,709 : INFO : EPOCH 3 - PROGRESS: at 70.62% examples, 849624 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:25:01,718 : INFO : EPOCH 3 - PROGRESS: at 71.86% examples, 852394 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:25:02,732 : INFO : EPOCH 3 - PROGRESS: at 72.87% examples, 851165 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:25:03,734 : INFO : EPOCH 3 - PROGRESS: at 74.12% examples, 852847 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:25:04,740 : INFO : EPOCH 3 - PROGRESS: at 75.13% examples, 852034 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:25:05,741 : INFO : EPOCH 3 - PROGRESS: at 76.40% examples, 853606 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:25:06,756 : INFO : EPOCH 3 - PROGRESS: at 77.47% examples, 852780 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:25:07,758 : INFO : EPOCH 3 - PROGRESS: at 78.64% examples, 853029 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:25:08,766 : INFO : EPOCH 3 - PROGRESS: at 79.70% examples, 852889 words/s, in_qsiz

2018-07-23 11:26:11,746 : INFO : EPOCH 4 - PROGRESS: at 64.08% examples, 868866 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:26:12,748 : INFO : EPOCH 4 - PROGRESS: at 65.30% examples, 870230 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:26:13,753 : INFO : EPOCH 4 - PROGRESS: at 66.55% examples, 871597 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:26:14,763 : INFO : EPOCH 4 - PROGRESS: at 67.88% examples, 874399 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:26:15,766 : INFO : EPOCH 4 - PROGRESS: at 69.04% examples, 875670 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:26:16,768 : INFO : EPOCH 4 - PROGRESS: at 70.31% examples, 877976 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:26:17,771 : INFO : EPOCH 4 - PROGRESS: at 71.44% examples, 878860 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:26:18,779 : INFO : EPOCH 4 - PROGRESS: at 72.74% examples, 881358 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:26:19,789 : INFO : EPOCH 4 - PROGRESS: at 74.05% examples, 883172 words/s, in_qsiz

2018-07-23 11:27:22,585 : INFO : EPOCH 5 - PROGRESS: at 67.09% examples, 975211 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:27:23,593 : INFO : EPOCH 5 - PROGRESS: at 68.35% examples, 975326 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:27:24,593 : INFO : EPOCH 5 - PROGRESS: at 69.63% examples, 976722 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:27:25,595 : INFO : EPOCH 5 - PROGRESS: at 70.84% examples, 976507 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:27:26,602 : INFO : EPOCH 5 - PROGRESS: at 72.14% examples, 977675 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:27:27,614 : INFO : EPOCH 5 - PROGRESS: at 73.45% examples, 978265 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:27:28,619 : INFO : EPOCH 5 - PROGRESS: at 74.80% examples, 979508 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:27:29,619 : INFO : EPOCH 5 - PROGRESS: at 76.16% examples, 980237 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:27:30,627 : INFO : EPOCH 5 - PROGRESS: at 77.49% examples, 980657 words/s, in_qsiz

iteration 3


2018-07-23 11:27:45,044 : INFO : EPOCH 1 - PROGRESS: at 1.21% examples, 887487 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:27:46,058 : INFO : EPOCH 1 - PROGRESS: at 2.64% examples, 959019 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:27:47,064 : INFO : EPOCH 1 - PROGRESS: at 3.80% examples, 925988 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:27:48,066 : INFO : EPOCH 1 - PROGRESS: at 5.14% examples, 934317 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:27:49,071 : INFO : EPOCH 1 - PROGRESS: at 6.49% examples, 954978 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:27:50,080 : INFO : EPOCH 1 - PROGRESS: at 7.90% examples, 967418 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:27:51,083 : INFO : EPOCH 1 - PROGRESS: at 9.03% examples, 950972 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:27:52,086 : INFO : EPOCH 1 - PROGRESS: at 10.24% examples, 936274 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:27:53,088 : INFO : EPOCH 1 - PROGRESS: at 11.53% examples, 925398 words/s, in_qsize 4, ou

2018-07-23 11:28:57,950 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-07-23 11:28:57,958 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-07-23 11:28:57,958 : INFO : EPOCH - 1 : training on 79816095 raw words (66492378 effective words) took 73.9s, 899471 effective words/s
2018-07-23 11:28:58,962 : INFO : EPOCH 2 - PROGRESS: at 1.22% examples, 902837 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:28:59,968 : INFO : EPOCH 2 - PROGRESS: at 2.56% examples, 936403 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:29:00,969 : INFO : EPOCH 2 - PROGRESS: at 3.97% examples, 970816 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:29:01,972 : INFO : EPOCH 2 - PROGRESS: at 5.34% examples, 975909 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:29:02,977 : INFO : EPOCH 2 - PROGRESS: at 6.65% examples, 982201 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:29:03,981 : INFO : EPOCH 2 - PROGRESS: at 7.99% examples, 983101 words/s, in_qsize 3, out_qsize 0
2018-0

2018-07-23 11:30:07,266 : INFO : EPOCH 3 - PROGRESS: at 4.08% examples, 995950 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:30:08,277 : INFO : EPOCH 3 - PROGRESS: at 5.49% examples, 1005337 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:30:09,278 : INFO : EPOCH 3 - PROGRESS: at 6.84% examples, 1011157 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:30:10,284 : INFO : EPOCH 3 - PROGRESS: at 8.23% examples, 1012121 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:30:11,288 : INFO : EPOCH 3 - PROGRESS: at 9.70% examples, 1018984 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:30:12,295 : INFO : EPOCH 3 - PROGRESS: at 11.21% examples, 1018658 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:30:13,305 : INFO : EPOCH 3 - PROGRESS: at 12.77% examples, 1019615 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:30:14,316 : INFO : EPOCH 3 - PROGRESS: at 14.31% examples, 1019442 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:30:15,321 : INFO : EPOCH 3 - PROGRESS: at 15.81% examples, 1020953 words/s, in_q

2018-07-23 11:31:18,490 : INFO : EPOCH 4 - PROGRESS: at 9.60% examples, 1009261 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:31:19,493 : INFO : EPOCH 4 - PROGRESS: at 11.09% examples, 1008246 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:31:20,496 : INFO : EPOCH 4 - PROGRESS: at 12.57% examples, 1006485 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:31:21,509 : INFO : EPOCH 4 - PROGRESS: at 14.09% examples, 1005439 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:31:22,510 : INFO : EPOCH 4 - PROGRESS: at 15.55% examples, 1004626 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:31:23,517 : INFO : EPOCH 4 - PROGRESS: at 17.00% examples, 1006250 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:31:24,523 : INFO : EPOCH 4 - PROGRESS: at 18.40% examples, 1004568 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:31:25,533 : INFO : EPOCH 4 - PROGRESS: at 20.03% examples, 1005666 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:31:26,538 : INFO : EPOCH 4 - PROGRESS: at 21.82% examples, 1005066 words/s,

2018-07-23 11:32:29,048 : INFO : EPOCH 5 - PROGRESS: at 8.65% examples, 795678 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:32:30,058 : INFO : EPOCH 5 - PROGRESS: at 9.69% examples, 789504 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:32:31,065 : INFO : EPOCH 5 - PROGRESS: at 10.85% examples, 788320 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:32:32,070 : INFO : EPOCH 5 - PROGRESS: at 12.11% examples, 792525 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:32:33,080 : INFO : EPOCH 5 - PROGRESS: at 13.05% examples, 778380 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:32:34,095 : INFO : EPOCH 5 - PROGRESS: at 14.31% examples, 782645 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:32:35,104 : INFO : EPOCH 5 - PROGRESS: at 15.43% examples, 781357 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:32:36,104 : INFO : EPOCH 5 - PROGRESS: at 16.45% examples, 776391 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:32:37,109 : INFO : EPOCH 5 - PROGRESS: at 17.65% examples, 781225 words/s, in_qsize 

2018-07-23 11:33:41,941 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-07-23 11:33:41,948 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-07-23 11:33:41,949 : INFO : EPOCH - 5 : training on 79816095 raw words (66489970 effective words) took 81.0s, 821315 effective words/s
2018-07-23 11:33:41,949 : INFO : training on a 399080475 raw words (332458496 effective words) took 357.9s, 928873 effective words/s
2018-07-23 11:33:41,950 : INFO : training model with 2 workers on 150852 vocabulary and 100 features, using sg=0 hs=0 sample=0.001 negative=5 window=5


iteration 4


2018-07-23 11:33:42,953 : INFO : EPOCH 1 - PROGRESS: at 1.09% examples, 811929 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:33:43,956 : INFO : EPOCH 1 - PROGRESS: at 2.25% examples, 821297 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:33:44,957 : INFO : EPOCH 1 - PROGRESS: at 3.38% examples, 828222 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:33:45,958 : INFO : EPOCH 1 - PROGRESS: at 4.34% examples, 796240 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:33:46,961 : INFO : EPOCH 1 - PROGRESS: at 5.68% examples, 834060 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:33:47,989 : INFO : EPOCH 1 - PROGRESS: at 6.73% examples, 824966 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:33:48,992 : INFO : EPOCH 1 - PROGRESS: at 7.86% examples, 827460 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:33:49,996 : INFO : EPOCH 1 - PROGRESS: at 9.03% examples, 832526 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:33:50,998 : INFO : EPOCH 1 - PROGRESS: at 10.17% examples, 827533 words/s, in_qsize 3, out

2018-07-23 11:34:56,421 : INFO : EPOCH 1 - PROGRESS: at 89.19% examples, 840280 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:34:57,431 : INFO : EPOCH 1 - PROGRESS: at 90.18% examples, 838709 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:34:58,436 : INFO : EPOCH 1 - PROGRESS: at 91.24% examples, 838352 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:34:59,436 : INFO : EPOCH 1 - PROGRESS: at 92.88% examples, 839074 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:35:00,437 : INFO : EPOCH 1 - PROGRESS: at 95.24% examples, 838722 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:35:01,205 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-07-23 11:35:01,206 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-07-23 11:35:01,207 : INFO : EPOCH - 1 : training on 79816095 raw words (66496798 effective words) took 79.3s, 839022 effective words/s
2018-07-23 11:35:02,227 : INFO : EPOCH 2 - PROGRESS: at 1.02% examples, 733683 words/s, in_qsize 3, out_qsize 0
2

2018-07-23 11:36:07,603 : INFO : EPOCH 2 - PROGRESS: at 81.30% examples, 848084 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:36:08,611 : INFO : EPOCH 2 - PROGRESS: at 82.58% examples, 851334 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:36:09,617 : INFO : EPOCH 2 - PROGRESS: at 83.84% examples, 854042 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:36:10,627 : INFO : EPOCH 2 - PROGRESS: at 85.02% examples, 855394 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:36:11,628 : INFO : EPOCH 2 - PROGRESS: at 86.30% examples, 857325 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:36:12,642 : INFO : EPOCH 2 - PROGRESS: at 87.62% examples, 859383 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:36:13,648 : INFO : EPOCH 2 - PROGRESS: at 88.83% examples, 860045 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:36:14,649 : INFO : EPOCH 2 - PROGRESS: at 90.19% examples, 862011 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:36:15,657 : INFO : EPOCH 2 - PROGRESS: at 91.46% examples, 863442 words/s, in_qsiz

2018-07-23 11:37:18,316 : INFO : EPOCH 3 - PROGRESS: at 78.43% examples, 894220 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:37:19,316 : INFO : EPOCH 3 - PROGRESS: at 79.60% examples, 894589 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:37:20,329 : INFO : EPOCH 3 - PROGRESS: at 80.56% examples, 892761 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:37:21,336 : INFO : EPOCH 3 - PROGRESS: at 81.78% examples, 894887 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:37:22,342 : INFO : EPOCH 3 - PROGRESS: at 82.77% examples, 893610 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:37:23,345 : INFO : EPOCH 3 - PROGRESS: at 84.02% examples, 895594 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:37:24,349 : INFO : EPOCH 3 - PROGRESS: at 84.89% examples, 892942 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:37:25,357 : INFO : EPOCH 3 - PROGRESS: at 86.13% examples, 894012 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:37:26,359 : INFO : EPOCH 3 - PROGRESS: at 87.21% examples, 892768 words/s, in_qsiz

2018-07-23 11:38:29,154 : INFO : EPOCH 4 - PROGRESS: at 71.27% examples, 860296 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:38:30,157 : INFO : EPOCH 4 - PROGRESS: at 72.57% examples, 863116 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:38:31,160 : INFO : EPOCH 4 - PROGRESS: at 73.53% examples, 861144 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:38:32,173 : INFO : EPOCH 4 - PROGRESS: at 74.85% examples, 863695 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:38:33,186 : INFO : EPOCH 4 - PROGRESS: at 75.77% examples, 860536 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:38:34,191 : INFO : EPOCH 4 - PROGRESS: at 77.15% examples, 863393 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:38:35,209 : INFO : EPOCH 4 - PROGRESS: at 78.12% examples, 861190 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:38:36,210 : INFO : EPOCH 4 - PROGRESS: at 79.42% examples, 863172 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:38:37,213 : INFO : EPOCH 4 - PROGRESS: at 80.36% examples, 861874 words/s, in_qsiz

2018-07-23 11:39:40,237 : INFO : EPOCH 5 - PROGRESS: at 64.36% examples, 855767 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:39:41,251 : INFO : EPOCH 5 - PROGRESS: at 65.43% examples, 854920 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:39:42,260 : INFO : EPOCH 5 - PROGRESS: at 66.70% examples, 856977 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:39:43,267 : INFO : EPOCH 5 - PROGRESS: at 67.75% examples, 855845 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:39:44,270 : INFO : EPOCH 5 - PROGRESS: at 68.96% examples, 858313 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:39:45,271 : INFO : EPOCH 5 - PROGRESS: at 69.88% examples, 855727 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:39:46,287 : INFO : EPOCH 5 - PROGRESS: at 71.14% examples, 858448 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:39:47,298 : INFO : EPOCH 5 - PROGRESS: at 71.88% examples, 853626 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:39:48,298 : INFO : EPOCH 5 - PROGRESS: at 73.21% examples, 856989 words/s, in_qsiz

iteration 5


2018-07-23 11:40:08,191 : INFO : EPOCH 1 - PROGRESS: at 1.16% examples, 863907 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:40:09,198 : INFO : EPOCH 1 - PROGRESS: at 2.07% examples, 755007 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:40:10,200 : INFO : EPOCH 1 - PROGRESS: at 3.49% examples, 850899 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:40:11,204 : INFO : EPOCH 1 - PROGRESS: at 4.32% examples, 790125 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:40:12,212 : INFO : EPOCH 1 - PROGRESS: at 5.53% examples, 810322 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:40:13,230 : INFO : EPOCH 1 - PROGRESS: at 6.47% examples, 793305 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:40:14,237 : INFO : EPOCH 1 - PROGRESS: at 7.65% examples, 804869 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:40:15,247 : INFO : EPOCH 1 - PROGRESS: at 8.74% examples, 803939 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:40:16,249 : INFO : EPOCH 1 - PROGRESS: at 9.96% examples, 810632 words/s, in_qsize 3, out_

2018-07-23 11:41:21,189 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-07-23 11:41:21,201 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-07-23 11:41:21,202 : INFO : EPOCH - 1 : training on 79816095 raw words (66491423 effective words) took 74.0s, 898362 effective words/s
2018-07-23 11:41:22,218 : INFO : EPOCH 2 - PROGRESS: at 1.16% examples, 855133 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:41:23,222 : INFO : EPOCH 2 - PROGRESS: at 2.32% examples, 839411 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:41:24,230 : INFO : EPOCH 2 - PROGRESS: at 3.69% examples, 900330 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:41:25,241 : INFO : EPOCH 2 - PROGRESS: at 5.06% examples, 918367 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:41:26,243 : INFO : EPOCH 2 - PROGRESS: at 6.37% examples, 935573 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:41:27,253 : INFO : EPOCH 2 - PROGRESS: at 7.72% examples, 945522 words/s, in_qsize 3, out_qsize 0
2018-0

2018-07-23 11:42:31,365 : INFO : EPOCH 3 - PROGRESS: at 1.26% examples, 919828 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:42:32,365 : INFO : EPOCH 3 - PROGRESS: at 2.47% examples, 899321 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:42:33,366 : INFO : EPOCH 3 - PROGRESS: at 3.77% examples, 926699 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:42:34,378 : INFO : EPOCH 3 - PROGRESS: at 5.11% examples, 930891 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:42:35,379 : INFO : EPOCH 3 - PROGRESS: at 6.33% examples, 932476 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:42:36,382 : INFO : EPOCH 3 - PROGRESS: at 7.62% examples, 938149 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:42:37,392 : INFO : EPOCH 3 - PROGRESS: at 8.88% examples, 935903 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:42:38,394 : INFO : EPOCH 3 - PROGRESS: at 10.27% examples, 940796 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:42:39,403 : INFO : EPOCH 3 - PROGRESS: at 11.73% examples, 940051 words/s, in_qsize 3, ou

2018-07-23 11:43:42,574 : INFO : EPOCH 4 - PROGRESS: at 4.08% examples, 993983 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:43:43,582 : INFO : EPOCH 4 - PROGRESS: at 5.50% examples, 1006336 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:43:44,584 : INFO : EPOCH 4 - PROGRESS: at 6.83% examples, 1008841 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:43:45,590 : INFO : EPOCH 4 - PROGRESS: at 8.14% examples, 999845 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:43:46,602 : INFO : EPOCH 4 - PROGRESS: at 9.40% examples, 989282 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:43:47,607 : INFO : EPOCH 4 - PROGRESS: at 10.61% examples, 968032 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:43:48,616 : INFO : EPOCH 4 - PROGRESS: at 12.18% examples, 974220 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:43:49,617 : INFO : EPOCH 4 - PROGRESS: at 13.53% examples, 967410 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:43:50,618 : INFO : EPOCH 4 - PROGRESS: at 14.94% examples, 966866 words/s, in_qsize 4

2018-07-23 11:44:53,456 : INFO : EPOCH 5 - PROGRESS: at 3.89% examples, 948131 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:44:54,464 : INFO : EPOCH 5 - PROGRESS: at 5.23% examples, 949861 words/s, in_qsize 4, out_qsize 1
2018-07-23 11:44:55,468 : INFO : EPOCH 5 - PROGRESS: at 6.51% examples, 955220 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:44:56,478 : INFO : EPOCH 5 - PROGRESS: at 7.91% examples, 967399 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:44:57,481 : INFO : EPOCH 5 - PROGRESS: at 9.25% examples, 971747 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:44:58,484 : INFO : EPOCH 5 - PROGRESS: at 10.49% examples, 956096 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:44:59,491 : INFO : EPOCH 5 - PROGRESS: at 11.90% examples, 951624 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:45:00,500 : INFO : EPOCH 5 - PROGRESS: at 13.47% examples, 961424 words/s, in_qsize 4, out_qsize 1
2018-07-23 11:45:01,500 : INFO : EPOCH 5 - PROGRESS: at 14.91% examples, 963399 words/s, in_qsize 3, 

2018-07-23 11:46:02,771 : INFO : training model with 2 workers on 150852 vocabulary and 100 features, using sg=0 hs=0 sample=0.001 negative=5 window=5


iteration 6


2018-07-23 11:46:03,774 : INFO : EPOCH 1 - PROGRESS: at 1.46% examples, 1062978 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:46:04,781 : INFO : EPOCH 1 - PROGRESS: at 2.90% examples, 1062535 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:46:05,789 : INFO : EPOCH 1 - PROGRESS: at 4.23% examples, 1029643 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:46:06,790 : INFO : EPOCH 1 - PROGRESS: at 5.48% examples, 1004431 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:46:07,792 : INFO : EPOCH 1 - PROGRESS: at 6.65% examples, 981653 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:46:08,800 : INFO : EPOCH 1 - PROGRESS: at 7.96% examples, 978129 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:46:09,811 : INFO : EPOCH 1 - PROGRESS: at 9.12% examples, 961889 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:46:10,817 : INFO : EPOCH 1 - PROGRESS: at 10.49% examples, 957601 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:46:11,819 : INFO : EPOCH 1 - PROGRESS: at 11.94% examples, 957086 words/s, in_qsize 3

2018-07-23 11:47:16,475 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-07-23 11:47:16,487 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-07-23 11:47:16,487 : INFO : EPOCH - 1 : training on 79816095 raw words (66496004 effective words) took 73.7s, 902077 effective words/s
2018-07-23 11:47:17,496 : INFO : EPOCH 2 - PROGRESS: at 1.33% examples, 972643 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:47:18,505 : INFO : EPOCH 2 - PROGRESS: at 2.62% examples, 953466 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:47:19,507 : INFO : EPOCH 2 - PROGRESS: at 3.93% examples, 959297 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:47:20,510 : INFO : EPOCH 2 - PROGRESS: at 5.33% examples, 971471 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:47:21,511 : INFO : EPOCH 2 - PROGRESS: at 6.53% examples, 962237 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:47:22,519 : INFO : EPOCH 2 - PROGRESS: at 7.91% examples, 970862 words/s, in_qsize 3, out_qsize 0
2018-0

2018-07-23 11:48:26,772 : INFO : EPOCH 3 - PROGRESS: at 1.19% examples, 881160 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:48:27,782 : INFO : EPOCH 3 - PROGRESS: at 2.41% examples, 873667 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:48:28,791 : INFO : EPOCH 3 - PROGRESS: at 3.50% examples, 851621 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:48:29,792 : INFO : EPOCH 3 - PROGRESS: at 4.83% examples, 879628 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:48:30,793 : INFO : EPOCH 3 - PROGRESS: at 5.95% examples, 873661 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:48:31,796 : INFO : EPOCH 3 - PROGRESS: at 7.21% examples, 889871 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:48:32,796 : INFO : EPOCH 3 - PROGRESS: at 8.47% examples, 893709 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:48:33,805 : INFO : EPOCH 3 - PROGRESS: at 9.73% examples, 894543 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:48:34,807 : INFO : EPOCH 3 - PROGRESS: at 11.06% examples, 894069 words/s, in_qsize 3, out

2018-07-23 11:49:37,961 : INFO : EPOCH 4 - PROGRESS: at 1.32% examples, 965574 words/s, in_qsize 4, out_qsize 1
2018-07-23 11:49:38,964 : INFO : EPOCH 4 - PROGRESS: at 2.48% examples, 902264 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:49:39,969 : INFO : EPOCH 4 - PROGRESS: at 3.76% examples, 922120 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:49:40,985 : INFO : EPOCH 4 - PROGRESS: at 5.05% examples, 917523 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:49:41,989 : INFO : EPOCH 4 - PROGRESS: at 6.15% examples, 901032 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:49:42,990 : INFO : EPOCH 4 - PROGRESS: at 7.37% examples, 907466 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:49:43,991 : INFO : EPOCH 4 - PROGRESS: at 8.52% examples, 897061 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:49:44,995 : INFO : EPOCH 4 - PROGRESS: at 9.67% examples, 888203 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:49:45,999 : INFO : EPOCH 4 - PROGRESS: at 11.09% examples, 895445 words/s, in_qsize 4, out

2018-07-23 11:50:50,186 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-07-23 11:50:50,187 : INFO : EPOCH - 4 : training on 79816095 raw words (66493827 effective words) took 73.2s, 907972 effective words/s
2018-07-23 11:50:51,196 : INFO : EPOCH 5 - PROGRESS: at 1.32% examples, 966213 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:50:52,197 : INFO : EPOCH 5 - PROGRESS: at 2.59% examples, 950242 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:50:53,202 : INFO : EPOCH 5 - PROGRESS: at 3.95% examples, 966781 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:50:54,206 : INFO : EPOCH 5 - PROGRESS: at 5.35% examples, 977881 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:50:55,210 : INFO : EPOCH 5 - PROGRESS: at 6.56% examples, 967143 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:50:56,216 : INFO : EPOCH 5 - PROGRESS: at 7.90% examples, 970502 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:50:57,217 : INFO : EPOCH 5 - PROGRESS: at 9.04% examples, 955081 words/s, in_qsize 

2018-07-23 11:52:00,426 : INFO : EPOCH - 5 : training on 79816095 raw words (66494164 effective words) took 70.2s, 946719 effective words/s
2018-07-23 11:52:00,427 : INFO : training on a 399080475 raw words (332470063 effective words) took 357.7s, 929582 effective words/s
2018-07-23 11:52:00,428 : INFO : training model with 2 workers on 150852 vocabulary and 100 features, using sg=0 hs=0 sample=0.001 negative=5 window=5


iteration 7


2018-07-23 11:52:01,441 : INFO : EPOCH 1 - PROGRESS: at 1.32% examples, 961544 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:52:02,444 : INFO : EPOCH 1 - PROGRESS: at 2.50% examples, 908022 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:52:03,450 : INFO : EPOCH 1 - PROGRESS: at 3.81% examples, 930786 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:52:04,452 : INFO : EPOCH 1 - PROGRESS: at 5.21% examples, 949689 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:52:05,454 : INFO : EPOCH 1 - PROGRESS: at 6.52% examples, 960133 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:52:06,455 : INFO : EPOCH 1 - PROGRESS: at 7.86% examples, 966551 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:52:07,455 : INFO : EPOCH 1 - PROGRESS: at 9.21% examples, 972293 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:52:08,457 : INFO : EPOCH 1 - PROGRESS: at 10.69% examples, 975929 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:52:09,460 : INFO : EPOCH 1 - PROGRESS: at 12.19% examples, 977818 words/s, in_qsize 3, ou

2018-07-23 11:53:12,128 : INFO : EPOCH 2 - PROGRESS: at 1.33% examples, 979846 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:53:13,130 : INFO : EPOCH 2 - PROGRESS: at 2.71% examples, 991286 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:53:14,133 : INFO : EPOCH 2 - PROGRESS: at 3.97% examples, 971122 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:53:15,135 : INFO : EPOCH 2 - PROGRESS: at 5.22% examples, 955391 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:53:16,139 : INFO : EPOCH 2 - PROGRESS: at 6.28% examples, 928331 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:53:17,149 : INFO : EPOCH 2 - PROGRESS: at 7.61% examples, 937506 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:53:18,153 : INFO : EPOCH 2 - PROGRESS: at 8.77% examples, 925789 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:53:19,160 : INFO : EPOCH 2 - PROGRESS: at 10.06% examples, 923782 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:53:20,170 : INFO : EPOCH 2 - PROGRESS: at 11.47% examples, 922237 words/s, in_qsize 4, ou

2018-07-23 11:54:23,264 : INFO : EPOCH 3 - PROGRESS: at 3.67% examples, 898024 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:54:24,265 : INFO : EPOCH 3 - PROGRESS: at 5.02% examples, 914894 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:54:25,267 : INFO : EPOCH 3 - PROGRESS: at 6.35% examples, 935962 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:54:26,269 : INFO : EPOCH 3 - PROGRESS: at 7.39% examples, 912002 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:54:27,270 : INFO : EPOCH 3 - PROGRESS: at 8.67% examples, 915064 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:54:28,273 : INFO : EPOCH 3 - PROGRESS: at 9.84% examples, 904246 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:54:29,274 : INFO : EPOCH 3 - PROGRESS: at 11.35% examples, 916353 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:54:30,278 : INFO : EPOCH 3 - PROGRESS: at 12.72% examples, 916849 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:54:31,280 : INFO : EPOCH 3 - PROGRESS: at 14.13% examples, 918525 words/s, in_qsize 3, o

2018-07-23 11:55:34,461 : INFO : EPOCH 4 - PROGRESS: at 9.38% examples, 989626 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:55:35,471 : INFO : EPOCH 4 - PROGRESS: at 10.89% examples, 992000 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:55:36,480 : INFO : EPOCH 4 - PROGRESS: at 12.43% examples, 993957 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:55:37,481 : INFO : EPOCH 4 - PROGRESS: at 13.89% examples, 992954 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:55:38,482 : INFO : EPOCH 4 - PROGRESS: at 15.32% examples, 991343 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:55:39,495 : INFO : EPOCH 4 - PROGRESS: at 16.78% examples, 992280 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:55:40,495 : INFO : EPOCH 4 - PROGRESS: at 18.23% examples, 995568 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:55:41,496 : INFO : EPOCH 4 - PROGRESS: at 19.76% examples, 996942 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:55:42,501 : INFO : EPOCH 4 - PROGRESS: at 21.68% examples, 999322 words/s, in_qsize

2018-07-23 11:56:45,339 : INFO : EPOCH 5 - PROGRESS: at 13.97% examples, 997914 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:56:46,340 : INFO : EPOCH 5 - PROGRESS: at 15.42% examples, 997094 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:56:47,347 : INFO : EPOCH 5 - PROGRESS: at 16.90% examples, 1000052 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:56:48,349 : INFO : EPOCH 5 - PROGRESS: at 18.37% examples, 1003776 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:56:49,349 : INFO : EPOCH 5 - PROGRESS: at 19.93% examples, 1002902 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:56:50,351 : INFO : EPOCH 5 - PROGRESS: at 21.76% examples, 1002771 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:56:51,360 : INFO : EPOCH 5 - PROGRESS: at 23.56% examples, 1003209 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:56:52,361 : INFO : EPOCH 5 - PROGRESS: at 25.34% examples, 1006299 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:56:53,362 : INFO : EPOCH 5 - PROGRESS: at 27.15% examples, 1008885 words/s, 

iteration 8


2018-07-23 11:57:42,344 : INFO : EPOCH 1 - PROGRESS: at 1.19% examples, 877213 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:57:43,345 : INFO : EPOCH 1 - PROGRESS: at 2.58% examples, 947359 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:57:44,347 : INFO : EPOCH 1 - PROGRESS: at 4.00% examples, 978234 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:57:45,348 : INFO : EPOCH 1 - PROGRESS: at 5.41% examples, 991725 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:57:46,359 : INFO : EPOCH 1 - PROGRESS: at 6.76% examples, 996859 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:57:47,367 : INFO : EPOCH 1 - PROGRESS: at 8.15% examples, 1001886 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:57:48,369 : INFO : EPOCH 1 - PROGRESS: at 9.51% examples, 1001207 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:57:49,372 : INFO : EPOCH 1 - PROGRESS: at 11.03% examples, 1003890 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:57:50,376 : INFO : EPOCH 1 - PROGRESS: at 12.51% examples, 1001560 words/s, in_qsize 4

2018-07-23 11:58:53,604 : INFO : EPOCH 2 - PROGRESS: at 5.30% examples, 964816 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:58:54,605 : INFO : EPOCH 2 - PROGRESS: at 6.49% examples, 956690 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:58:55,610 : INFO : EPOCH 2 - PROGRESS: at 7.71% examples, 946942 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:58:56,611 : INFO : EPOCH 2 - PROGRESS: at 8.96% examples, 943667 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:58:57,619 : INFO : EPOCH 2 - PROGRESS: at 10.32% examples, 943634 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:58:58,625 : INFO : EPOCH 2 - PROGRESS: at 11.64% examples, 934062 words/s, in_qsize 3, out_qsize 0
2018-07-23 11:58:59,626 : INFO : EPOCH 2 - PROGRESS: at 13.01% examples, 933285 words/s, in_qsize 4, out_qsize 0
2018-07-23 11:59:00,629 : INFO : EPOCH 2 - PROGRESS: at 14.44% examples, 936607 words/s, in_qsize 4, out_qsize 1
2018-07-23 11:59:01,640 : INFO : EPOCH 2 - PROGRESS: at 15.92% examples, 943285 words/s, in_qsize 3,

2018-07-23 12:00:04,021 : INFO : EPOCH 3 - PROGRESS: at 5.30% examples, 965149 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:00:05,035 : INFO : EPOCH 3 - PROGRESS: at 6.41% examples, 942757 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:00:06,036 : INFO : EPOCH 3 - PROGRESS: at 7.61% examples, 934743 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:00:07,043 : INFO : EPOCH 3 - PROGRESS: at 8.90% examples, 936734 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:00:08,044 : INFO : EPOCH 3 - PROGRESS: at 10.22% examples, 934989 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:00:09,049 : INFO : EPOCH 3 - PROGRESS: at 11.65% examples, 934527 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:00:10,049 : INFO : EPOCH 3 - PROGRESS: at 13.08% examples, 937613 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:00:11,055 : INFO : EPOCH 3 - PROGRESS: at 14.47% examples, 938521 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:00:12,062 : INFO : EPOCH 3 - PROGRESS: at 15.96% examples, 944977 words/s, in_qsize 3,

2018-07-23 12:01:15,096 : INFO : EPOCH 4 - PROGRESS: at 5.09% examples, 929191 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:01:16,100 : INFO : EPOCH 4 - PROGRESS: at 6.40% examples, 943717 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:01:17,104 : INFO : EPOCH 4 - PROGRESS: at 7.79% examples, 958615 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:01:18,113 : INFO : EPOCH 4 - PROGRESS: at 9.10% examples, 960927 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:01:19,117 : INFO : EPOCH 4 - PROGRESS: at 10.53% examples, 962523 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:01:20,119 : INFO : EPOCH 4 - PROGRESS: at 12.07% examples, 968928 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:01:21,132 : INFO : EPOCH 4 - PROGRESS: at 13.66% examples, 975582 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:01:22,136 : INFO : EPOCH 4 - PROGRESS: at 15.16% examples, 979865 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:01:23,147 : INFO : EPOCH 4 - PROGRESS: at 16.53% examples, 977313 words/s, in_qsize 4,

2018-07-23 12:02:25,962 : INFO : EPOCH 5 - PROGRESS: at 7.65% examples, 940804 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:02:26,966 : INFO : EPOCH 5 - PROGRESS: at 8.96% examples, 943546 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:02:27,971 : INFO : EPOCH 5 - PROGRESS: at 10.35% examples, 946663 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:02:28,972 : INFO : EPOCH 5 - PROGRESS: at 11.86% examples, 951567 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:02:29,975 : INFO : EPOCH 5 - PROGRESS: at 13.34% examples, 954966 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:02:30,978 : INFO : EPOCH 5 - PROGRESS: at 14.77% examples, 957729 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:02:31,979 : INFO : EPOCH 5 - PROGRESS: at 16.13% examples, 955860 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:02:32,981 : INFO : EPOCH 5 - PROGRESS: at 17.56% examples, 959717 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:02:33,988 : INFO : EPOCH 5 - PROGRESS: at 18.96% examples, 961632 words/s, in_qsize 

iteration 9


2018-07-23 12:03:30,354 : INFO : EPOCH 1 - PROGRESS: at 1.18% examples, 872975 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:03:31,364 : INFO : EPOCH 1 - PROGRESS: at 2.41% examples, 873236 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:03:32,371 : INFO : EPOCH 1 - PROGRESS: at 3.64% examples, 890071 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:03:33,378 : INFO : EPOCH 1 - PROGRESS: at 5.01% examples, 911148 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:03:34,388 : INFO : EPOCH 1 - PROGRESS: at 6.23% examples, 913342 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:03:35,389 : INFO : EPOCH 1 - PROGRESS: at 7.51% examples, 922792 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:03:36,394 : INFO : EPOCH 1 - PROGRESS: at 8.82% examples, 927876 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:03:37,397 : INFO : EPOCH 1 - PROGRESS: at 10.20% examples, 933298 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:03:38,401 : INFO : EPOCH 1 - PROGRESS: at 11.64% examples, 933809 words/s, in_qsize 3, ou

2018-07-23 12:04:41,370 : INFO : EPOCH - 1 : training on 79816095 raw words (66495458 effective words) took 72.0s, 923316 effective words/s
2018-07-23 12:04:42,373 : INFO : EPOCH 2 - PROGRESS: at 1.14% examples, 850150 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:04:43,377 : INFO : EPOCH 2 - PROGRESS: at 2.53% examples, 922323 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:04:44,385 : INFO : EPOCH 2 - PROGRESS: at 3.78% examples, 927857 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:04:45,392 : INFO : EPOCH 2 - PROGRESS: at 5.14% examples, 936744 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:04:46,399 : INFO : EPOCH 2 - PROGRESS: at 6.45% examples, 949089 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:04:47,406 : INFO : EPOCH 2 - PROGRESS: at 7.70% examples, 945054 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:04:48,407 : INFO : EPOCH 2 - PROGRESS: at 8.97% examples, 944499 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:04:49,410 : INFO : EPOCH 2 - PROGRESS: at 10.31% examples, 943

2018-07-23 12:05:54,756 : INFO : EPOCH 2 - PROGRESS: at 90.87% examples, 869712 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:05:55,762 : INFO : EPOCH 2 - PROGRESS: at 92.09% examples, 870473 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:05:56,763 : INFO : EPOCH 2 - PROGRESS: at 94.28% examples, 870930 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:05:57,768 : INFO : EPOCH 2 - PROGRESS: at 99.60% examples, 869606 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:05:57,837 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-07-23 12:05:57,842 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-07-23 12:05:57,843 : INFO : EPOCH - 2 : training on 79816095 raw words (66491565 effective words) took 76.5s, 869498 effective words/s
2018-07-23 12:05:58,849 : INFO : EPOCH 3 - PROGRESS: at 1.14% examples, 847990 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:05:59,855 : INFO : EPOCH 3 - PROGRESS: at 2.47% examples, 897792 words/s, in_qsize 3, out_qsize 0
20

2018-07-23 12:07:05,277 : INFO : EPOCH 3 - PROGRESS: at 80.73% examples, 828167 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:07:06,281 : INFO : EPOCH 3 - PROGRESS: at 81.71% examples, 827990 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:07:07,284 : INFO : EPOCH 3 - PROGRESS: at 82.83% examples, 829408 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:07:08,295 : INFO : EPOCH 3 - PROGRESS: at 83.90% examples, 830081 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:07:09,299 : INFO : EPOCH 3 - PROGRESS: at 84.88% examples, 829667 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:07:10,304 : INFO : EPOCH 3 - PROGRESS: at 85.87% examples, 828977 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:07:11,313 : INFO : EPOCH 3 - PROGRESS: at 86.94% examples, 828278 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:07:12,314 : INFO : EPOCH 3 - PROGRESS: at 87.93% examples, 827746 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:07:13,314 : INFO : EPOCH 3 - PROGRESS: at 89.00% examples, 827329 words/s, in_qsiz

2018-07-23 12:08:15,714 : INFO : EPOCH 4 - PROGRESS: at 75.79% examples, 907070 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:08:16,717 : INFO : EPOCH 4 - PROGRESS: at 77.02% examples, 907302 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:08:17,718 : INFO : EPOCH 4 - PROGRESS: at 78.28% examples, 908222 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:08:18,718 : INFO : EPOCH 4 - PROGRESS: at 79.54% examples, 909360 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:08:19,720 : INFO : EPOCH 4 - PROGRESS: at 80.61% examples, 909063 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:08:20,721 : INFO : EPOCH 4 - PROGRESS: at 81.80% examples, 910587 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:08:21,730 : INFO : EPOCH 4 - PROGRESS: at 82.95% examples, 910996 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:08:22,733 : INFO : EPOCH 4 - PROGRESS: at 84.06% examples, 911168 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:08:23,738 : INFO : EPOCH 4 - PROGRESS: at 85.22% examples, 911866 words/s, in_qsiz

2018-07-23 12:09:26,638 : INFO : EPOCH 5 - PROGRESS: at 73.87% examples, 913451 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:09:27,638 : INFO : EPOCH 5 - PROGRESS: at 75.11% examples, 914538 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:09:28,641 : INFO : EPOCH 5 - PROGRESS: at 76.44% examples, 915726 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:09:29,657 : INFO : EPOCH 5 - PROGRESS: at 77.76% examples, 917078 words/s, in_qsize 3, out_qsize 0
2018-07-23 12:09:30,658 : INFO : EPOCH 5 - PROGRESS: at 79.05% examples, 917940 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:09:31,666 : INFO : EPOCH 5 - PROGRESS: at 80.23% examples, 918533 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:09:32,676 : INFO : EPOCH 5 - PROGRESS: at 81.37% examples, 919197 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:09:33,682 : INFO : EPOCH 5 - PROGRESS: at 82.63% examples, 921115 words/s, in_qsize 4, out_qsize 0
2018-07-23 12:09:34,684 : INFO : EPOCH 5 - PROGRESS: at 83.84% examples, 922501 words/s, in_qsiz

## Testing

Now that the new embedding has been saved, we can go ahead and test it out. The best way test out if the embedding is useful is to try and and see what it tells us as similar notes.

In [65]:
def check_model():
    "Check to see if the model is working properly"
    check_model= gensim.models.doc2vec.Doc2Vec.load("models/100_10_notes2vect.w2v")
    test_data = word_tokenize("Patient was showing suicidal ideation.".lower())
    v1 = check_model.infer_vector(test_data)
    
    # Vector for our test_data sentence:
    print("V1_infer", v1)
    
    # List of similar documents:
    similar_doc = check_model.docvecs.most_similar('1')
    print(similar_doc)

    # Find vector of doc in training data using tags or in other words, 
    print(check_model.docvecs['1'])

In [66]:
check_model()

2018-07-23 14:00:51,008 : INFO : loading Doc2Vec object from models/100_10_notes2vect.w2v
2018-07-23 14:00:54,168 : INFO : loading vocabulary recursively from models/100_10_notes2vect.w2v.vocabulary.* with mmap=None
2018-07-23 14:00:54,169 : INFO : loading trainables recursively from models/100_10_notes2vect.w2v.trainables.* with mmap=None
2018-07-23 14:00:54,169 : INFO : loading syn1neg from models/100_10_notes2vect.w2v.trainables.syn1neg.npy with mmap=None
2018-07-23 14:00:54,207 : INFO : loading wv recursively from models/100_10_notes2vect.w2v.wv.* with mmap=None
2018-07-23 14:00:54,208 : INFO : loading vectors from models/100_10_notes2vect.w2v.wv.vectors.npy with mmap=None
2018-07-23 14:00:54,244 : INFO : loading docvecs recursively from models/100_10_notes2vect.w2v.docvecs.* with mmap=None
2018-07-23 14:00:54,244 : INFO : loaded models/100_10_notes2vect.w2v
2018-07-23 14:00:54,547 : INFO : precomputing L2-norms of doc weight vectors


V1_infer [-0.04915233 -0.04058329 -0.12431081  0.01129368 -0.02803    -0.01759184
 -0.3289515  -0.15440083  0.07726275  0.2979058   0.15708292 -0.06525695
  0.29606253  0.00235566 -0.05838712 -0.02736545 -0.06457333 -0.13388766
 -0.15850554  0.14162636 -0.07013088  0.08949224 -0.18784636  0.01174972
  0.17019647  0.20091398  0.03485787 -0.32370985  0.23605071 -0.01658516
  0.07874801  0.11165143 -0.15882064 -0.19501252  0.06144822 -0.04502968
  0.17133741  0.2115282  -0.32174623  0.05651753 -0.06534992  0.13401635
 -0.02671036  0.05301908  0.14028467 -0.11235095  0.29776222  0.01607168
  0.25071466 -0.03235956 -0.1707105   0.17692511  0.01732027  0.08230979
 -0.2680088   0.09284586 -0.11809757  0.1295313   0.26465446  0.0627472
  0.12594716 -0.19669251  0.04560137  0.13884176 -0.23192766  0.22427078
  0.1743475   0.0708419  -0.11796094  0.03599552 -0.13917784  0.25136024
  0.11222767 -0.02752433  0.08283218  0.00751723 -0.0377498   0.00039315
  0.33839566  0.04578145 -0.14352646  0.052

In [64]:
tagged_notes[4340]

array([list(['admission', 'date', 'discharge', 'date', 'date', 'of', 'birth', 'sex', 'f', 'service', 'acove', 'history', 'of', 'present', 'illness', 'the', 'patient', 'is', 'a', 'year', 'old', 'woman', 'with', 'a', 'history', 'of', 'severe', 'o2', 'dependent', 'chronic', 'obstructive', 'pulmonary', 'disease', 'fev1', 'recurrent', 'pneumonia', 'who', 'transferred', 'from', 'hospital', 'hospital', 'for', 'further', 'evaluation', 'and', 'treatment', 'of', 'a', 'chronic', 'obstructive', 'pulmonary', 'disease', 'pneumonia', 'the', 'patient', 'was', 'recently', 'admitted', 'to', 'this', 'hospital', 'on', 'with', 'a', 'right', 'lower', 'lobe', 'pneumonia', 'and', 'she', 'was', 'treated', 'with', 'azithromycin', 'ceftriaxone', 'and', 'then', 'discharged', 'home', 'on', 'ceftin', 'a', 'follow', 'up', 'chest', 'x', 'ray', 'in', 'the', 'morning', 'showed', 'resolution', 'of', 'this', 'right', 'lower', 'lobe', 'pneumonia', 'but', 'the', 'patient', 'continued', 'to', 'have', 'sputum', 'production',

In [67]:
check_model= gensim.models.doc2vec.Doc2Vec.load("models/100_10_notes2vect.w2v")

2018-07-23 14:31:13,048 : INFO : loading Doc2Vec object from models/100_10_notes2vect.w2v
2018-07-23 14:31:16,137 : INFO : loading vocabulary recursively from models/100_10_notes2vect.w2v.vocabulary.* with mmap=None
2018-07-23 14:31:16,138 : INFO : loading trainables recursively from models/100_10_notes2vect.w2v.trainables.* with mmap=None
2018-07-23 14:31:16,139 : INFO : loading syn1neg from models/100_10_notes2vect.w2v.trainables.syn1neg.npy with mmap=None
2018-07-23 14:31:16,176 : INFO : loading wv recursively from models/100_10_notes2vect.w2v.wv.* with mmap=None
2018-07-23 14:31:16,177 : INFO : loading vectors from models/100_10_notes2vect.w2v.wv.vectors.npy with mmap=None
2018-07-23 14:31:16,213 : INFO : loading docvecs recursively from models/100_10_notes2vect.w2v.docvecs.* with mmap=None
2018-07-23 14:31:16,214 : INFO : loaded models/100_10_notes2vect.w2v


In [71]:
len(check_model.docvecs)

59652

In [72]:
len(notes)

59652