In [2]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

In [3]:
import warnings; warnings.simplefilter('ignore')
import gensim as gen
import json
import codecs
import regex as re
import os.path
from scipy import spatial

import local_settings
import django
django.setup()

from sefaria.model import *
from sefaria.system.exceptions import InputError, PartialRefInputError



In [3]:
import hebrew_spellcheck

In [4]:
word_expander = hebrew_spellcheck.word_expander

In [36]:
stopwords = codecs.open('./hebrew_stopwords.txt', encoding='utf8').read().strip().split('\n')
stopwords_regex = u"(?:\s|^)({})(?=\s|$)".format(u"|".join(stopwords))
stopwords_regex = re.compile(stopwords_regex)

In [37]:
phrases = codecs.open('./select_phrases.txt', encoding='utf8').read().strip().split('\n')
phrases_regex = u"(\s|^)({})(?=\s|$)".format(u"|".join(phrases))
phrases_regex = re.compile(phrases_regex)

In [38]:
def get_talmud_topic_ranged():
    """
    Reads every file in the sugyot directory.  
    Each file in this directory contains semantically meaningful ranged refs for Talmud
    :return: List of all Talmudic Ranged Refs
    """
    all_gemara_topic_ranges = []
    for filename in os.listdir('./sugyot/'):
        with codecs.open('./sugyot/'+filename, 'rb', encoding='utf8') as csvfile:
            reader = csv.reader(csvfile)
            reader.next()
            all_gemara_topic_ranges += [row[0] for row in reader]
    return all_gemara_topic_ranges


def create_list_off_talmud_books():
    """
    Uses Sefaria Project to create a list of titles of every Talmud Tractate
    :return: Set of all Talmud book titles
    """
    indexes = library.all_index_records()
    talmud_books = set(library.get_indexes_in_category("Bavli"))
    talmud_books_modified = {u'Avodah Zarah',  u'Bava Batra', u'Bava Kamma',
     u'Bava Metzia', u'Beitzah', u'Berakhot', u'Chagigah', u'Eruvin', u'Gittin', u'Horayot',
     u'Ketubot', u'Kiddushin', u'Makkot', u'Megillah', u'Menachot', u'Moed Katan',
     u'Nazir', u'Nedarim', u'Pesachim', u'Rosh Hashanah', u'Sanhedrin', u'Shabbat', u'Shevuot',
     u'Sotah', u'Sukkah', u'Taanit',  u'Yevamot', u'Yoma', u'Zevachim'}
    return talmud_books_modified


def create_list_off_tanakh_books():
    """
    Uses Sefaria Project to create a list of titles of every book in Tanakh 
    :return: Set of all Tanakh book titles
    """
    indexes = library.all_index_records()
    tanakh_books = set(library.get_indexes_in_category("Tanakh"))
    tanakh_books_modified = set([u'Judges', u'Deuteronomy', u'Genesis', u'Exodus',
                             u'Leviticus', u'II Kings', u'Joshua',
                             u'I Samuel', u'Numbers', u'I Kings', u'II Samuel'])
    return tanakh_books_modified
    


def get_tanakh_topic_ranges():
    """
    Creates semantically meaningful ranged refs for Tanakh using Herzog's breakdown
    :return: List of all Tanakh Ranged Refs
    """
    with codecs.open('level_3_wo_overlaps.json', 'r', encoding='utf8' ) as the_file:
        segs = json.load(the_file, encoding='utf8')
    tanakh_topic_ranged_refs = []

    for seg in segs:
        b_ref = seg['b_ref']
        e_ref = seg['e_ref']
        book = b_ref.rsplit(' ', 1)[0]
        b_chapter = b_ref.rsplit(' ', 1)[1].split(':')[0]
        b_verse = b_ref.rsplit(' ', 1)[1].split(':')[1]
        e_chapter = e_ref.rsplit(' ', 1)[1].split(':')[0]
        e_verse = e_ref.rsplit(' ', 1)[1].split(':')[1]

        verses = "{}:{}-{}".format(b_chapter, b_verse, e_verse) if b_chapter == e_chapter else "{}:{}-{}:{}".format(b_chapter, b_verse, e_chapter, e_verse)
        tanakh_topic_ranged_refs.append("{} {}".format(book, verses))
    
    return tanakh_topic_ranged_refs
    

def segment_range_dicts(topic_ranged_refs):
    """
    Creates two dictionaries.  These are mappings between ranged refs and segments refs.  
    This receives a list of ranged refs.  It iterates over every ranged ref and creates a mapping between the ranged refs and the segment refs.
    :param topic_ranged_refs: list of ranged refs
    :return: tuple of dicts. The first is dict mapping ranged refs to all segment refs.  The second is a dict mapping each segment ref to its corresponding ranged ref.
    """
    ranged_to_segment = {}
    segment_to_ranged = {}

    for topic_ranged_ref in topic_ranged_refs:
        ranged_to_segment[topic_ranged_ref] = {}
        topic_seg_refs = Ref(topic_ranged_ref).range_list()
        for seg_ref in topic_seg_refs:
            ranged_to_segment[topic_ranged_ref][seg_ref.normal()] = ""
            segment_to_ranged[seg_ref.normal()] = topic_ranged_ref
            
    return ranged_to_segment, segment_to_ranged

def is_from_category(ref, books_in_category):
    """
    Checks to see if a tref belongs to a list of book within a particular category
    For Example:  Is Genesis 12:3 within Tanakh
    :param ref: tref to be examined
    :param books_in_category: List of books in category
    :return: Boolean Value determining if Tref is within category
    """
    try:
        if Ref(ref).index.title in books_in_category:
            return True
        else:
            return False
    except InputError:
        return False
    

def pull_out_suffix(string):
    """
    Receives a string of Hebrew Text.  Iterates over every hebrew word and splits each
    word into its root and suffix.  
    :param string: String of Hebrew Text
    :return: String with Hebrew compound words split into Root and Suffix
    """
    string = string.split()
    string = ' '.join([word_expander.get(word, word) for word in string])
    return string

def remove_stopwords(string):
    """
    Replaces Stopwords with a space character  
    :param string: String of Hebrew Text
    :return: String without stopwords
    """
    return re.sub(stopwords_regex, u' ', string)

def remove_dicta_prefix(string):
    """
    Removes prefixes that were detected by Dicta
    :param string: String of Hebrew Text
    :return: String without prefixes
    """
    return re.sub(ur'[\u05d0-\u05ea]+┉', u'', string)

def remove_punctuation(data):
    """
    Removes various punctation from Hebrew text.
    :param data: String of Hebrew Text
    :return: String without punctuation
    """
    data = re.sub(ur'־', u' ', data)
    data = re.sub(ur'\([^)]+\)', u' ', data)
    data = re.sub(ur'<[^>]+>', u' ', data)
    data = re.sub(ur'\[[^\]]+\]', u' ', data)
    data = re.sub(ur'[^ \u05d0-\u05ea"\'״׳]', u' ', data)
    data = re.sub(ur'(^|\s)["\'״׳]+', u' ', data)
    data = re.sub(ur'["\'״׳]+(\s|$)', u' ', data)
    return data

def strip_stopwords_and_remove_punctuation(data):
    """
    This method takes a string of hebrew text and does all necessary cleaning for Word2Vec.
    :param data: String of Hebrew Text
    :return: String ready for Word2Vec model
    """
    data = data.strip().split(u'~~')[1]
    data = remove_dicta_prefix(data)
    data = remove_punctuation(data)
    data = pull_out_suffix(data)
    data = remove_stopwords(data)
    data = u' '.join(data.split())
    return data



def create_multiple_word_phrases(data):
    """
    Combines selected multiple word phrases with underscore. 
    For example:  New York ----> New_York
    This allows Word2Vec to handle multiple word phrases
    :param data: String of Hebrew Text
    :return: String with connected multiple word phrases
    """
    def connect_with_underscore(matchobj):
        return matchobj.group(1) + matchobj.group(2).replace(u" ", u"_")
    return re.sub(phrases_regex, connect_with_underscore, data)


def this_is_a_bad_line(data):
    """
    Checks to see if a line from the file should be included in the Word2Vec model.
    There are some lines within the Dicta Prefix file that we cannot or do not want to include.
    :param data: line of hebrew text from Dicta File
    :return: Boolean Value determining if this line valid
    """
    if u'~~' not in data:
        return True
    if data.strip().split(u'~~')[1].strip().startswith(u"<br><br><big><strong>הדרן עלך"):
        return True
    
    return False
    

def get_segments(filename):
    """
    Combs through the entire Sefaris Hebrew Library and cleans the text for Doc2Vec.
    Creates a dict:
        Key:  Ref
        Value:  The text of that ref cleaned and ready for Doc2Vec
    :param filename: Dicta Prefix Filename
    :return: Dict for Doc2Vec
    """
    
    all_tanakh_books = create_list_off_tanakh_books()
    all_talmud_books = create_list_off_talmud_books()
    
    all_data = {}
    
    
    tanakh_topic_ranged_refs = get_tanakh_topic_ranges()
    talmud_topic_ranged_refs = get_talmud_topic_ranged()
    
    tanakh_ranged_to_segment, tanakh_segment_to_ranged = segment_range_dicts(tanakh_topic_ranged_refs)
    talmud_ranged_to_segment, talmud_segment_to_ranged = segment_range_dicts(talmud_topic_ranged_refs)


    for index, data in enumerate(codecs.open(filename, encoding='utf8')):
        
        if this_is_a_bad_line(data):
            continue
            
        ref = data.split(u'~')[0]
        data = strip_stopwords_and_remove_punctuation(data)
        data = create_multiple_word_phrases(data)
        
          
        if index%100000 == 0:
            print index
            
        if is_from_category(ref, all_tanakh_books):
            this_ref_ranged_reg = tanakh_segment_to_ranged[ref]        
            tanakh_ranged_to_segment[this_ref_ranged_reg][ref] = data
        elif is_from_category(ref, all_talmud_books):
            this_ref_ranged_reg = talmud_segment_to_ranged[ref]
            talmud_ranged_to_segment[this_ref_ranged_reg][ref] = data
        else:
            all_data[ref] = data
            
    for text_ranged_ref in tanakh_topic_ranged_refs:
        object_ref = Ref(text_ranged_ref)
        all_text_subrefs = [seg_ref.normal() for seg_ref in object_ref.range_list()]
        all_verses = [tanakh_ranged_to_segment[text_ranged_ref][seg_ref] for seg_ref in all_text_subrefs]
        all_data[text_ranged_ref] = u' '.join(all_verses)
        
    for text_ranged_ref in talmud_topic_ranged_refs:
        object_ref = Ref(text_ranged_ref)
        all_text_subrefs = [seg_ref.normal() for seg_ref in object_ref.range_list()]
        all_verses = [talmud_ranged_to_segment[text_ranged_ref][seg_ref] for seg_ref in all_text_subrefs]
        all_data[text_ranged_ref] = u' '.join(all_verses)
    
    return all_data

In [39]:
segments = get_segments('./sefaria-export_prefix_refs.txt')

0
100000
200000
300000
400000
500000
600000
700000
800000
900000
1000000
1100000
1200000
1300000
1400000
1500000
1600000


In [1]:
all_clean_docs_filename = 'cleaned_docs_for_doc2vec.txt'
with codecs.open(all_clean_docs_filename, 'wb', encoding='utf8') as the_file:
    for k, v in segments.items():
        the_file.write(u""+k+u"||||"+v+u"\n")
        

Initiate a SegmentGenerator object to pass to the model

In [30]:
class SegmentGenerator(object):
    def __init__(self, segments_filename):
        self.segments_filename = segments_filename

    def __iter__(self):
        with codecs.open(self.segments_filename, 'rb', encoding='utf8') as the_file:
            for line in the_file:
                ref = line.split(u"||||")[0]
                data = line.split(u"||||")[1]
                yield gen.models.doc2vec.TaggedDocument(gen.utils.simple_preprocess(data), [ref])

In [31]:
segments_generator = SegmentGenerator(all_clean_docs_filename)

### Model intiation

In [32]:
model = gen.models.doc2vec.Doc2Vec(vector_size=100, min_count=2, epochs=40, dm=0, dbow_words=1)

Now we have initiated the Word2Vec model. The next step is to call the **build_vocab** method for the preliminary scan of the text. Call the method and print how many words are in the vocabulary of our text

In [33]:
model.build_vocab(segments_generator)
print("Number of words in vocabulary: {}".format(len(model.wv.vocab)))

INFO collecting all words and their counts
INFO PROGRESS: at example #0, processed 0 words (0/s), 0 word types, 0 tags
INFO PROGRESS: at example #10000, processed 480183 words (493516/s), 29343 word types, 10000 tags
INFO PROGRESS: at example #20000, processed 951957 words (444592/s), 39961 word types, 20000 tags
INFO PROGRESS: at example #30000, processed 1425416 words (464963/s), 48038 word types, 30000 tags
INFO PROGRESS: at example #40000, processed 1894717 words (512670/s), 54567 word types, 40000 tags
INFO PROGRESS: at example #50000, processed 2365377 words (501974/s), 60171 word types, 50000 tags
INFO PROGRESS: at example #60000, processed 2823417 words (497557/s), 64944 word types, 60000 tags
INFO PROGRESS: at example #70000, processed 3297189 words (506217/s), 69025 word types, 70000 tags
INFO PROGRESS: at example #80000, processed 3766242 words (410361/s), 73037 word types, 80000 tags
INFO PROGRESS: at example #90000, processed 4241438 words (506678/s), 76961 word types, 900

INFO PROGRESS: at example #800000, processed 37611106 words (377172/s), 194861 word types, 800000 tags
INFO PROGRESS: at example #810000, processed 38059767 words (388978/s), 195819 word types, 810000 tags
INFO PROGRESS: at example #820000, processed 38515852 words (259422/s), 196805 word types, 820000 tags
INFO PROGRESS: at example #830000, processed 38986308 words (354775/s), 197946 word types, 830000 tags
INFO PROGRESS: at example #840000, processed 39459622 words (365770/s), 198984 word types, 840000 tags
INFO PROGRESS: at example #850000, processed 39914003 words (392453/s), 199844 word types, 850000 tags
INFO PROGRESS: at example #860000, processed 40391398 words (438661/s), 200825 word types, 860000 tags
INFO PROGRESS: at example #870000, processed 40861805 words (459527/s), 201860 word types, 870000 tags
INFO PROGRESS: at example #880000, processed 41321675 words (402163/s), 203070 word types, 880000 tags
INFO PROGRESS: at example #890000, processed 41820841 words (486125/s), 2

INFO deleting the raw counts dictionary of 259021 items
INFO sample=0.001 downsamples 24 most-common words
INFO downsampling leaves estimated 71459864 word corpus (98.1% of prior 72845375)
INFO estimated required memory for 151713 words and 100 dimensions: 1127306700 bytes
INFO resetting layer weights


Number of words in vocabulary: 151713


In the previous step we created the vocabulary for our model, it is now time to train! Don't forget to add the following parameters:
1. total_examples=model.corpus_count 
2. epochs=model.epochs

In [34]:
model.train(segments_generator, total_examples=model.corpus_count, epochs=10)

INFO training model with 3 workers on 151713 vocabulary and 100 features, using sg=1 hs=0 sample=0.001 negative=5 window=5
INFO EPOCH 1 - PROGRESS: at 0.13% examples, 94930 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 0.30% examples, 104231 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 0.49% examples, 115582 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 0.70% examples, 120362 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 0.90% examples, 123805 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 1.11% examples, 126287 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 1.31% examples, 127989 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 1.50% examples, 128699 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 1.71% examples, 130050 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 1.91% examples, 131641 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 2.09% exam

INFO EPOCH 1 - PROGRESS: at 18.38% examples, 131651 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 18.57% examples, 131715 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 18.76% examples, 131764 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 18.95% examples, 131835 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 19.15% examples, 131887 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 19.36% examples, 132015 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 19.55% examples, 132038 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 19.73% examples, 132024 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 19.95% examples, 132070 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 20.16% examples, 132135 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 20.37% examples, 132188 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 20.57% examples, 132234 words/s, in_q

INFO EPOCH 1 - PROGRESS: at 37.72% examples, 134598 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 37.92% examples, 134633 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 38.13% examples, 134657 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 38.32% examples, 134666 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 38.51% examples, 134708 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 38.71% examples, 134723 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 38.92% examples, 134731 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 39.12% examples, 134737 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 39.33% examples, 134778 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 39.55% examples, 134766 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 39.77% examples, 134793 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 39.98% examples, 134819 words/s, in_q

INFO EPOCH 1 - PROGRESS: at 57.13% examples, 135872 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 57.29% examples, 135844 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 57.48% examples, 135858 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 57.69% examples, 135852 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 57.90% examples, 135876 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 58.11% examples, 135884 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 58.31% examples, 135881 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 58.53% examples, 135880 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 58.73% examples, 135903 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 58.94% examples, 135919 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 59.15% examples, 135905 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 59.37% examples, 135938 words/s, in_q

INFO EPOCH 1 - PROGRESS: at 76.36% examples, 136216 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 76.55% examples, 136211 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 76.74% examples, 136217 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 76.95% examples, 136240 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 77.17% examples, 136250 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 77.37% examples, 136274 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 77.58% examples, 136303 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 77.79% examples, 136320 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 77.99% examples, 136335 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 78.21% examples, 136353 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 78.30% examples, 136185 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 78.50% examples, 136206 words/s, in_q

INFO EPOCH 1 - PROGRESS: at 95.99% examples, 137620 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 96.21% examples, 137637 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 96.42% examples, 137649 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 96.63% examples, 137662 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 96.85% examples, 137675 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 97.04% examples, 137687 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 97.24% examples, 137701 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 97.42% examples, 137703 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 97.64% examples, 137729 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 97.83% examples, 137731 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 98.05% examples, 137746 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 1 - PROGRESS: at 98.25% examples, 137758 words/s, in_q

INFO EPOCH 2 - PROGRESS: at 15.07% examples, 140606 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 15.27% examples, 140555 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 15.48% examples, 140514 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 15.68% examples, 140518 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 15.87% examples, 140588 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 16.06% examples, 140538 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 16.26% examples, 140509 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 16.47% examples, 140503 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 16.68% examples, 140500 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 16.89% examples, 140473 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 17.08% examples, 140473 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 17.27% examples, 140504 words/s, in_q

INFO EPOCH 2 - PROGRESS: at 34.31% examples, 139143 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 34.48% examples, 138950 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 34.66% examples, 138852 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 34.87% examples, 138787 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 35.04% examples, 138725 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 35.23% examples, 138655 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 35.42% examples, 138624 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 35.55% examples, 138379 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 35.69% examples, 138198 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 35.85% examples, 138064 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 36.03% examples, 138000 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 36.21% examples, 137960 words/s, in_q

INFO EPOCH 2 - PROGRESS: at 53.35% examples, 138047 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 53.55% examples, 138052 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 53.76% examples, 138092 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 53.95% examples, 138075 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 54.13% examples, 138070 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 54.35% examples, 138080 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 54.55% examples, 138084 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 54.77% examples, 138084 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 54.97% examples, 138079 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 55.17% examples, 138079 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 55.37% examples, 138078 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 55.58% examples, 138080 words/s, in_q

INFO EPOCH 2 - PROGRESS: at 72.53% examples, 137772 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 72.73% examples, 137789 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 72.94% examples, 137763 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 73.13% examples, 137776 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 73.32% examples, 137779 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 73.53% examples, 137780 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 73.73% examples, 137782 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 73.93% examples, 137809 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 74.12% examples, 137794 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 74.33% examples, 137787 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 74.53% examples, 137796 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 74.75% examples, 137788 words/s, in_q

INFO EPOCH 2 - PROGRESS: at 91.67% examples, 137641 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 91.86% examples, 137635 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 92.06% examples, 137641 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 92.26% examples, 137646 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 92.44% examples, 137646 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 92.64% examples, 137656 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 92.85% examples, 137666 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 93.06% examples, 137666 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 93.25% examples, 137667 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 93.44% examples, 137676 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 93.65% examples, 137689 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 2 - PROGRESS: at 93.84% examples, 137680 words/s, in_q

INFO EPOCH 3 - PROGRESS: at 10.23% examples, 137619 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 10.41% examples, 137621 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 10.62% examples, 137655 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 10.84% examples, 137613 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 11.06% examples, 137693 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 11.27% examples, 137825 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 11.45% examples, 137797 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 11.62% examples, 137640 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 11.74% examples, 136836 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 11.89% examples, 136395 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 12.06% examples, 136131 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 12.26% examples, 136064 words/s, in_q

INFO EPOCH 3 - PROGRESS: at 27.20% examples, 126849 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 27.41% examples, 126920 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 27.62% examples, 126995 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 27.78% examples, 127004 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 27.99% examples, 127072 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 28.23% examples, 127149 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 28.43% examples, 127273 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 28.64% examples, 127317 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 28.84% examples, 127423 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 29.02% examples, 127473 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 29.22% examples, 127578 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 29.41% examples, 127665 words/s, in_q

INFO EPOCH 3 - PROGRESS: at 46.30% examples, 130486 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 46.47% examples, 130443 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 46.67% examples, 130474 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 46.82% examples, 130411 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 47.04% examples, 130441 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 47.26% examples, 130459 words/s, in_qsize 4, out_qsize 1
INFO EPOCH 3 - PROGRESS: at 47.45% examples, 130431 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 47.64% examples, 130494 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 47.82% examples, 130507 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 48.02% examples, 130540 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 48.19% examples, 130560 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 48.34% examples, 130459 words/s, in_q

INFO EPOCH 3 - PROGRESS: at 63.50% examples, 127933 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 63.69% examples, 127988 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 63.88% examples, 128003 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 64.08% examples, 128044 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 64.26% examples, 128041 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 64.46% examples, 128066 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 64.66% examples, 128088 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 64.85% examples, 128110 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 65.07% examples, 128130 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 65.28% examples, 128167 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 65.48% examples, 128201 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 65.69% examples, 128247 words/s, in_q

INFO EPOCH 3 - PROGRESS: at 81.55% examples, 128115 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 81.70% examples, 128087 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 81.88% examples, 128109 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 82.05% examples, 128078 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 82.23% examples, 128107 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 82.40% examples, 128074 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 82.61% examples, 128101 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 82.78% examples, 128061 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 82.98% examples, 128080 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 83.18% examples, 128099 words/s, in_qsize 5, out_qsize 1
INFO EPOCH 3 - PROGRESS: at 83.37% examples, 128127 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 83.56% examples, 128162 words/s, in_q

INFO EPOCH 3 - PROGRESS: at 98.98% examples, 127672 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 99.17% examples, 127689 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 99.37% examples, 127709 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 99.56% examples, 127741 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 99.73% examples, 127758 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 3 - PROGRESS: at 99.97% examples, 127807 words/s, in_qsize 3, out_qsize 0
INFO worker thread finished; awaiting finish of 2 more threads
INFO worker thread finished; awaiting finish of 1 more threads
INFO worker thread finished; awaiting finish of 0 more threads
INFO EPOCH - 3 : training on 72952683 raw words (72992918 effective words) took 571.1s, 127815 effective words/s
INFO EPOCH 4 - PROGRESS: at 0.15% examples, 106490 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 0.35% examples, 121518 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGR

INFO EPOCH 4 - PROGRESS: at 16.28% examples, 126397 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 16.43% examples, 126114 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 16.59% examples, 125852 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 16.76% examples, 125823 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 16.95% examples, 125936 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 17.12% examples, 125892 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 17.30% examples, 125955 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 17.51% examples, 126096 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 17.70% examples, 126204 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 17.87% examples, 126119 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 18.04% examples, 126085 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 18.20% examples, 126090 words/s, in_q

INFO EPOCH 4 - PROGRESS: at 34.69% examples, 129610 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 34.87% examples, 129551 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 35.06% examples, 129588 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 35.27% examples, 129654 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 35.48% examples, 129740 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 35.64% examples, 129723 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 35.81% examples, 129641 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 35.95% examples, 129335 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 36.08% examples, 129100 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 36.21% examples, 128976 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 36.42% examples, 129026 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 36.60% examples, 129014 words/s, in_q

INFO EPOCH 4 - PROGRESS: at 51.86% examples, 126873 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 52.03% examples, 126870 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 52.23% examples, 126899 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 52.41% examples, 126927 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 52.63% examples, 126964 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 52.84% examples, 127014 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 53.05% examples, 127061 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 53.26% examples, 127114 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 53.45% examples, 127151 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 53.64% examples, 127189 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 53.86% examples, 127236 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 54.05% examples, 127303 words/s, in_q

INFO EPOCH 4 - PROGRESS: at 70.03% examples, 127593 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 70.22% examples, 127622 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 70.41% examples, 127666 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 70.60% examples, 127688 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 70.82% examples, 127714 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 71.00% examples, 127702 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 71.15% examples, 127653 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 71.27% examples, 127553 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 71.43% examples, 127514 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 71.62% examples, 127539 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 71.77% examples, 127486 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 71.88% examples, 127326 words/s, in_q

INFO EPOCH 4 - PROGRESS: at 87.50% examples, 127271 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 87.65% examples, 127207 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 87.83% examples, 127209 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 88.02% examples, 127211 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 88.21% examples, 127222 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 88.36% examples, 127137 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 88.52% examples, 127115 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 88.73% examples, 127134 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 88.90% examples, 127127 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 89.11% examples, 127140 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 89.29% examples, 127175 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 4 - PROGRESS: at 89.37% examples, 127041 words/s, in_q

INFO EPOCH 5 - PROGRESS: at 4.79% examples, 128930 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 4.98% examples, 129063 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 5.18% examples, 129422 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 5.39% examples, 129818 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 5.58% examples, 130257 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 5.77% examples, 130319 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 5.95% examples, 130407 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 6.11% examples, 129815 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 6.31% examples, 129783 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 6.51% examples, 130080 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 6.70% examples, 129905 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 6.91% examples, 130339 words/s, in_qsize 5, out_

INFO EPOCH 5 - PROGRESS: at 23.88% examples, 135276 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 24.06% examples, 135278 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 24.26% examples, 135301 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 24.46% examples, 135333 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 24.66% examples, 135411 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 24.84% examples, 135474 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 25.03% examples, 135472 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 25.23% examples, 135474 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 25.44% examples, 135500 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 25.65% examples, 135524 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 25.85% examples, 135613 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 26.06% examples, 135624 words/s, in_q

INFO EPOCH 5 - PROGRESS: at 42.54% examples, 134556 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 42.73% examples, 134558 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 42.92% examples, 134570 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 43.12% examples, 134596 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 43.31% examples, 134570 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 43.46% examples, 134411 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 43.60% examples, 134278 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 43.77% examples, 134204 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 43.97% examples, 134219 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 44.19% examples, 134227 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 44.39% examples, 134241 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 44.59% examples, 134250 words/s, in_q

INFO EPOCH 5 - PROGRESS: at 61.71% examples, 135093 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 61.91% examples, 135097 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 62.13% examples, 135087 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 62.34% examples, 135124 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 62.55% examples, 135126 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 62.77% examples, 135158 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 62.97% examples, 135154 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 63.18% examples, 135160 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 63.38% examples, 135180 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 63.57% examples, 135189 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 63.76% examples, 135204 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 63.95% examples, 135205 words/s, in_q

INFO EPOCH 5 - PROGRESS: at 81.00% examples, 135781 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 81.20% examples, 135789 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 81.39% examples, 135794 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 81.60% examples, 135807 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 81.78% examples, 135810 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 81.98% examples, 135809 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 82.19% examples, 135805 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 82.37% examples, 135826 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 82.56% examples, 135806 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 82.77% examples, 135811 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 82.98% examples, 135802 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 5 - PROGRESS: at 83.19% examples, 135822 words/s, in_q

INFO worker thread finished; awaiting finish of 2 more threads
INFO worker thread finished; awaiting finish of 1 more threads
INFO EPOCH 5 - PROGRESS: at 100.00% examples, 136125 words/s, in_qsize 0, out_qsize 1
INFO worker thread finished; awaiting finish of 0 more threads
INFO EPOCH - 5 : training on 72952683 raw words (72992332 effective words) took 536.2s, 136124 effective words/s
INFO EPOCH 6 - PROGRESS: at 0.15% examples, 105143 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 0.34% examples, 119216 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 0.49% examples, 120495 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 0.70% examples, 122459 words/s, in_qsize 5, out_qsize 1
INFO EPOCH 6 - PROGRESS: at 0.90% examples, 125293 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 1.10% examples, 126486 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 1.28% examples, 128035 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS:

INFO EPOCH 6 - PROGRESS: at 19.07% examples, 141974 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 19.26% examples, 141937 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 19.46% examples, 141994 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 19.65% examples, 141897 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 19.86% examples, 141918 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 20.05% examples, 141885 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 20.27% examples, 141922 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 20.48% examples, 141906 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 20.68% examples, 141883 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 20.87% examples, 141868 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 21.09% examples, 141933 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 21.28% examples, 141886 words/s, in_q

INFO EPOCH 6 - PROGRESS: at 38.87% examples, 142721 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 39.08% examples, 142710 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 39.30% examples, 142708 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 39.52% examples, 142734 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 39.74% examples, 142725 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 39.95% examples, 142739 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 40.16% examples, 142764 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 40.37% examples, 142766 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 40.57% examples, 142740 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 40.78% examples, 142747 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 40.98% examples, 142806 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 41.15% examples, 142781 words/s, in_q

INFO EPOCH 6 - PROGRESS: at 58.84% examples, 142713 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 59.06% examples, 142717 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 59.28% examples, 142713 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 59.49% examples, 142697 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 59.69% examples, 142694 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 59.89% examples, 142710 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 60.10% examples, 142724 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 60.30% examples, 142723 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 60.53% examples, 142723 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 60.72% examples, 142731 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 60.93% examples, 142721 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 61.13% examples, 142727 words/s, in_q

INFO EPOCH 6 - PROGRESS: at 78.74% examples, 142903 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 78.95% examples, 142915 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 79.14% examples, 142928 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 79.33% examples, 142931 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 79.53% examples, 142936 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 79.73% examples, 142940 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 79.93% examples, 142940 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 80.14% examples, 142949 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 80.35% examples, 142931 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 80.56% examples, 142949 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 80.76% examples, 142959 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 80.95% examples, 142957 words/s, in_q

INFO EPOCH 6 - PROGRESS: at 97.82% examples, 142007 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 98.04% examples, 141994 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 98.23% examples, 141993 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 98.44% examples, 141981 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 98.61% examples, 141967 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 98.79% examples, 141967 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 98.98% examples, 141951 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 99.18% examples, 141948 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 99.37% examples, 141940 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 99.54% examples, 141930 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 99.73% examples, 141929 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 6 - PROGRESS: at 99.97% examples, 141958 words/s, in_q

INFO EPOCH 7 - PROGRESS: at 16.32% examples, 136815 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 16.53% examples, 136851 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 16.73% examples, 136836 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 16.93% examples, 136872 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 17.12% examples, 136932 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 17.32% examples, 136978 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 17.51% examples, 136967 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 17.71% examples, 137003 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 17.92% examples, 137033 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 18.11% examples, 137085 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 18.31% examples, 137151 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 18.50% examples, 137130 words/s, in_q

INFO EPOCH 7 - PROGRESS: at 35.49% examples, 136910 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 35.68% examples, 136944 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 35.87% examples, 136924 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 36.07% examples, 136921 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 36.25% examples, 136928 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 36.46% examples, 136983 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 36.65% examples, 136974 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 36.82% examples, 136972 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 37.03% examples, 136961 words/s, in_qsize 5, out_qsize 1
INFO EPOCH 7 - PROGRESS: at 37.24% examples, 136986 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 37.44% examples, 136998 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 37.65% examples, 137010 words/s, in_q

INFO EPOCH 7 - PROGRESS: at 54.85% examples, 136909 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 55.05% examples, 136931 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 55.24% examples, 136931 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 55.43% examples, 136936 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 55.65% examples, 136932 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 55.86% examples, 136947 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 56.06% examples, 136980 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 56.26% examples, 136968 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 56.45% examples, 136974 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 56.66% examples, 136974 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 56.86% examples, 136982 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 57.06% examples, 137012 words/s, in_q

INFO EPOCH 7 - PROGRESS: at 74.33% examples, 137259 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 74.53% examples, 137273 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 74.75% examples, 137266 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 74.93% examples, 137261 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 75.12% examples, 137262 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 75.32% examples, 137264 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 75.51% examples, 137274 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 75.71% examples, 137265 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 75.90% examples, 137272 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 76.11% examples, 137280 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 76.31% examples, 137291 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 76.51% examples, 137289 words/s, in_q

INFO EPOCH 7 - PROGRESS: at 92.85% examples, 136744 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 93.06% examples, 136744 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 93.25% examples, 136748 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 93.44% examples, 136755 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 93.66% examples, 136763 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 93.87% examples, 136767 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 94.08% examples, 136759 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 94.27% examples, 136759 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 94.48% examples, 136759 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 94.68% examples, 136754 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 94.88% examples, 136736 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 7 - PROGRESS: at 95.08% examples, 136740 words/s, in_q

INFO EPOCH 8 - PROGRESS: at 11.37% examples, 137221 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 11.55% examples, 137331 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 11.73% examples, 137301 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 11.94% examples, 137358 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 12.14% examples, 137321 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 12.34% examples, 137403 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 12.53% examples, 137544 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 12.73% examples, 137353 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 12.94% examples, 137423 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 13.16% examples, 137488 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 13.34% examples, 137466 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 13.53% examples, 137455 words/s, in_q

INFO EPOCH 8 - PROGRESS: at 30.55% examples, 137566 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 30.77% examples, 137555 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 30.98% examples, 137588 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 31.18% examples, 137615 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 31.38% examples, 137600 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 31.57% examples, 137608 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 31.78% examples, 137605 words/s, in_qsize 5, out_qsize 1
INFO EPOCH 8 - PROGRESS: at 31.98% examples, 137630 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 32.17% examples, 137652 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 32.38% examples, 137639 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 32.60% examples, 137655 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 32.79% examples, 137657 words/s, in_q

INFO EPOCH 8 - PROGRESS: at 49.71% examples, 137261 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 49.90% examples, 137255 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 50.10% examples, 137239 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 50.27% examples, 137138 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 50.40% examples, 136977 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 50.59% examples, 136918 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 50.77% examples, 136843 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 50.96% examples, 136842 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 51.13% examples, 136791 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 51.33% examples, 136815 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 51.52% examples, 136818 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 51.74% examples, 136843 words/s, in_q

INFO EPOCH 8 - PROGRESS: at 67.70% examples, 134458 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 67.89% examples, 134457 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 68.08% examples, 134464 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 68.26% examples, 134410 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 68.46% examples, 134408 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 68.65% examples, 134420 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 68.85% examples, 134439 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 69.04% examples, 134444 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 69.24% examples, 134457 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 69.43% examples, 134487 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 69.59% examples, 134429 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 69.78% examples, 134426 words/s, in_q

INFO EPOCH 8 - PROGRESS: at 86.07% examples, 134116 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 86.29% examples, 134129 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 86.48% examples, 134140 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 86.69% examples, 134141 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 86.89% examples, 134147 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 87.08% examples, 134168 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 87.26% examples, 134175 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 87.44% examples, 134188 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 87.66% examples, 134200 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 87.88% examples, 134204 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 88.08% examples, 134213 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 8 - PROGRESS: at 88.26% examples, 134216 words/s, in_q

INFO EPOCH 9 - PROGRESS: at 4.45% examples, 133897 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 4.66% examples, 133958 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 4.86% examples, 134191 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 5.05% examples, 134296 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 5.25% examples, 134677 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 5.44% examples, 134265 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 5.63% examples, 134297 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 5.84% examples, 134638 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 6.01% examples, 134658 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 6.21% examples, 134568 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 6.43% examples, 134899 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 6.62% examples, 134841 words/s, in_qsize 6, out_

INFO EPOCH 9 - PROGRESS: at 23.56% examples, 136853 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 23.76% examples, 136856 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 23.98% examples, 136892 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 24.16% examples, 136893 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 24.35% examples, 136937 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 24.54% examples, 136914 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 24.72% examples, 136984 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 24.91% examples, 136993 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 25.10% examples, 136988 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 25.30% examples, 136987 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 25.50% examples, 136980 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 25.71% examples, 137015 words/s, in_q

INFO EPOCH 9 - PROGRESS: at 42.83% examples, 137638 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 43.02% examples, 137629 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 43.21% examples, 137620 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 43.43% examples, 137634 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 43.61% examples, 137632 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 43.82% examples, 137640 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 44.01% examples, 137629 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 44.22% examples, 137596 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 44.40% examples, 137593 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 44.60% examples, 137597 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 44.80% examples, 137597 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 44.99% examples, 137603 words/s, in_q

INFO EPOCH 9 - PROGRESS: at 62.09% examples, 137297 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 62.30% examples, 137312 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 62.50% examples, 137312 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 62.71% examples, 137311 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 62.93% examples, 137322 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 63.14% examples, 137318 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 63.34% examples, 137326 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 63.53% examples, 137316 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 63.70% examples, 137312 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 63.91% examples, 137314 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 64.11% examples, 137337 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 64.31% examples, 137327 words/s, in_q

INFO EPOCH 9 - PROGRESS: at 81.25% examples, 137311 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 81.44% examples, 137310 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 81.65% examples, 137340 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 81.84% examples, 137355 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 82.05% examples, 137363 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 82.23% examples, 137375 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 82.44% examples, 137373 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 82.66% examples, 137394 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 82.87% examples, 137407 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 83.10% examples, 137425 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 83.29% examples, 137430 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 9 - PROGRESS: at 83.48% examples, 137450 words/s, in_q

INFO worker thread finished; awaiting finish of 1 more threads
INFO worker thread finished; awaiting finish of 0 more threads
INFO EPOCH - 9 : training on 72952683 raw words (72993772 effective words) took 531.2s, 137415 effective words/s
INFO EPOCH 10 - PROGRESS: at 0.15% examples, 106556 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 0.35% examples, 125055 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 0.54% examples, 128942 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 0.74% examples, 131156 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 0.94% examples, 131453 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 1.15% examples, 133206 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 1.34% examples, 134837 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 1.54% examples, 135618 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 1.74% examples, 135461 words/s, in_qsize 5, out_qsiz

INFO EPOCH 10 - PROGRESS: at 18.60% examples, 135979 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 18.80% examples, 135928 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 18.97% examples, 135954 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 19.17% examples, 135982 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 19.37% examples, 136045 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 19.57% examples, 136065 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 19.76% examples, 136072 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 19.96% examples, 136066 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 20.16% examples, 136055 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 20.36% examples, 136049 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 20.55% examples, 136119 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 20.75% examples, 136093 w

INFO EPOCH 10 - PROGRESS: at 37.74% examples, 136981 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 37.94% examples, 136986 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 38.14% examples, 136977 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 38.33% examples, 137000 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 38.53% examples, 137015 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 38.71% examples, 137017 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 38.92% examples, 137057 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 39.10% examples, 137014 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 39.31% examples, 137035 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 39.52% examples, 137031 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 39.74% examples, 137020 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 39.95% examples, 137035 w

INFO EPOCH 10 - PROGRESS: at 56.71% examples, 136798 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 56.91% examples, 136798 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 57.11% examples, 136810 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 57.29% examples, 136813 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 57.48% examples, 136773 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 57.68% examples, 136751 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 57.84% examples, 136659 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 58.01% examples, 136628 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 58.21% examples, 136646 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 58.39% examples, 136608 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 58.60% examples, 136606 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 58.81% examples, 136621 w

INFO EPOCH 10 - PROGRESS: at 75.06% examples, 135516 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 75.28% examples, 135520 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 75.47% examples, 135541 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 75.67% examples, 135550 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 75.85% examples, 135555 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 76.04% examples, 135554 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 76.24% examples, 135565 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 76.43% examples, 135575 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 76.64% examples, 135580 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 76.84% examples, 135580 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 77.05% examples, 135590 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 77.26% examples, 135598 w

INFO EPOCH 10 - PROGRESS: at 93.97% examples, 136098 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 94.16% examples, 136093 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 94.34% examples, 136090 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 94.53% examples, 136080 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 94.73% examples, 136085 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 94.94% examples, 136098 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 95.13% examples, 136091 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 95.34% examples, 136109 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 95.54% examples, 136111 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 95.72% examples, 136107 words/s, in_qsize 5, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 95.90% examples, 136096 words/s, in_qsize 6, out_qsize 0
INFO EPOCH 10 - PROGRESS: at 96.12% examples, 136092 w

In [35]:
test_topics = [u"פסח", u"תפילה", u"משה", u"אברהם", u"צדקה", u"שבת", u"רות", u"אסתר", u"תשובה", u"חטא", u"חגים", u"קהילה", u"ישראל", u"צדק", u"התגלות", u"אהבה", u"מין", u"תפילין", u"סביבה"]

In [38]:
def get_ref_score(ref):
    score = 0
    try:
        score = 1-spatial.distance.cosine(model.docvecs[ref], model[u'פסח']) 
    except KeyError:
        pass
    return score


test_topics_dict = {}
for topic in test_topics:
    test_topics_dict[topic] = {}
    topic_sources = model.docvecs.most_similar([model[topic]], topn=3000)
    topic_sources = [x[0] for x in topic_sources]
    set_of_related = set(topic_sources)
    for tref in topic_sources:
        try:
            oref = Ref(tref)
        except PartialRefInputError:
            continue
        for l in oref.linkset():
            for other_ref in l.refs:
                if other_ref != tref:
                    set_of_related.add(other_ref)
    print len(set_of_related)
    final_scores = {}
    for tref in set_of_related:
        try:
            if not Ref(tref).is_segment_level():
                continue
        except InputError:
            continue

        final_scores[tref] = get_ref_score(tref)
        try:
            oref = Ref(tref)
        except InputError:
            print tref
            continue
        for l in oref.linkset():
            for other_ref in l.refs:
                if other_ref != tref:
                    try:
                        other_oref = Ref(other_ref)
                    except InputError:
                        continue
                    if other_oref.is_segment_level():
                        final_scores[tref] += get_ref_score(other_ref)
    final_scores = sorted(final_scores.items(), key=lambda x: x[1], reverse=True)
    for top_source in final_scores[:100]:
        test_topics_dict[topic][top_source[0]] = top_source[1]   
        

6645
6100
5354
7285
6111
6109
7745
7344
3993
5056
6052
3521
5853
8124
4154
5105
5900
6217
7724


In [39]:
with codecs.open('test_topics.json', 'w', encoding='utf8') as the_file:
    json.dump(test_topics_dict, the_file, indent=2, ensure_ascii=False)   