In [1]:
import numpy as np
import nltk
import matplotlib.pyplot as plt
import re
import sys
import contractions

In [2]:
# Step 1 

!python fetch_gutenberg.py > data/gutenberg.txt

[nltk_data] Downloading package gutenberg to
[nltk_data]     /home/tilemahos/nltk_data...
[nltk_data]   Package gutenberg is already up-to-date!


In [3]:
# Step 2

# Read the gutenberg.txt file word by word
# and create a disctionary that holds as keys the tokens(words) and as values the number of appearance
Dict = {}
with open('./data/gutenberg.txt') as file:
    # reading each line   
    for line in file:
        # reading each word       
        for word in line.split():
            if word not in Dict:
                Dict[word] = 1
            else:
                Dict[word] = Dict[word] + 1

# Filter rare tokens
Dict = {key: val for key, val in Dict.items() if val >= 5}
        
# Write the Dict in an output file
with open("./vocab/vocab.txt",'w') as f:
    for word in Dict.keys():
        f.write("{}\t{}\n".format(word,Dict[word]))

In [4]:
# Step 3

# Create chars.syms containing all ascii characters indexed
with open("./vocab/chars.syms",'w') as f:
    f.write("<eps>\t0\n")
    for i in range(97,123):
        f.write("{}\t{}\n".format(chr(i),i-96))
 
 # Create words.syms containing all words in corpus indexed
with open("./vocab/words.syms",'w') as f:
    i = 0
    f.write("<eps>\t0\n")
    i = i+1
    for word in Dict.keys():
        f.write("{}\t{}\n".format(word,i))
        i = i+1

In [5]:
# Step 4

# Levenhstein .fst

with open("./fsts/L.fst",'w') as f:
    for i in range(97,123):
        f.write("0\t0\t{}\t{}\t0\n".format(chr(i),chr(i))) # chr -> chr
        f.write("0\t0\t{}\t<eps>\t1\n".format(chr(i))) # chr -> <eps>
        f.write("0\t0\t<eps>\t{}\t1\n".format(chr(i))) # <eps> -> chr
        for j in range(97,123):
            if j != i:
                f.write("0\t0\t{}\t{}\t1\n".format(chr(i),chr(j))) # chr -> chr
    f.write("0\t0\n") # end state
                
!fstcompile -isymbols=./vocab/chars.syms -osymbols=./vocab/chars.syms ./fsts/L.fst ./fsts/L.binfst
!fstdraw -isymbols=./vocab/chars.syms -osymbols=./vocab/chars.syms -portrait ./fsts/L.binfst | dot -Tpng >./fsts/L.png
!display ./fsts/L.png

In [6]:
# Step 5

# Dictionary acceptor .fst

with open("./fsts/V.fst",'w') as f:
    j = 0
    for word in Dict.keys():
        i = 0
        for letter in word:
            if i == 0:
                f.write("{}\t{}\t{}\t{}\t0\n".format(i,j+1,letter,word)) # word[0] -> word
                j = j+1
            else:    
                f.write("{}\t{}\t{}\t<eps>\t0\n".format(j,j+1,letter)) # word[i>0] -> <eps>
                j = j+1
            i = i+1
        f.write("{}\t0\n".format(j)) # end state

In [7]:
!fstcompile -isymbols=./vocab/chars.syms -osymbols=./vocab/words.syms ./fsts/V.fst ./fsts/V.binfst
!fstrmepsilon ./fsts/V.binfst | fstdeterminize | fstminimize >./fsts/V_opt.binfst
# !fstdraw -isymbols=./fsts/chars.syms -osymbols=./fsts/words.syms -portrait ./fsts/V_opt.binfst | dot -Tpng >./fsts/V_opt.png
# !display ./fsts/V_opt.png

In [8]:
# Step 6

# Compose L (Levensthein) with V (Dictionary acceptor) to build the naive spell checker S

!fstarcsort --sort_type=olabel ./fsts/L.binfst ./fsts/L.binfst
!fstarcsort --sort_type=ilabel ./fsts/V_opt.binfst ./fsts/V_opt.binfst
!fstcompose ./fsts/L.binfst ./fsts/V_opt.binfst ./fsts/S.binfst

!./predict.sh ./fsts/S.binfst cwt 


cut

In [9]:
!./predict.sh ./fsts/S.binfst cit 

city

In [10]:
!./predict.sh ./fsts/S.binfst antheaterry 

entreaty

In [11]:
# Step 7 

# Correcting the first 20 words of spell_test.txt with S

import subprocess

with open('./data/spell_test.txt') as file:
    # reading each line 
    j = 0
    for line in file:
        j = j+1
        if j == 2:
            break
        i = 0
        # reading each word       
        for word in line.split():
            i = i+1
            if i == 1:
                print("True \n{}".format(word))
            elif i == 2:
                print("False \n{}".format(word))
                print("Corrected ")
                subprocess.call(['bash','predict.sh', './fsts/S.binfst' , word])
                print('\n')
            else:
                break

True 
contented:
False 
contenpted
Corrected 
contented



**Part 1**

In [12]:
# Step 8

!./word_edits.sh abandonned abandoned

n	<eps>


In [13]:
# Create a file with the edits in wiki.txt

'''
with open('./data/wiki.txt') as file:
    # reading each line 
    for line in file:
        # reading words       
        words = line.split()
        subprocess.call(['bash','word_edits_savetofile.sh', words[0] , words[1]])
            
'''

In [14]:
# Create Dict with frequency(value) of each edit(key)

Edits = {}
with open('./data/edits.txt') as file:
    # reading each line   
    for line in file:
        # reading each edit      
        edit =  tuple(line.split())
        if edit not in Edits:
            Edits[edit] = 1
        else:
            Edits[edit] = Edits[edit] + 1

print(len(Edits))
print(24 + 24 + 24*23)

342
600


In [15]:
from math import log10

# E .fst (edit weight == -log10(edit freq))

total = sum(Edits.values())

with open("./fsts/E.fst",'w') as f:
    for i in range(97,123):
        f.write("0\t0\t{}\t{}\t0\n".format(chr(i),chr(i))) # chr -> chr
        
        if (chr(i), '<eps>') not in Edits:
            f.write("0\t0\t{}\t<eps>\t10000\n".format(chr(i))) # chr -> <eps>
        else:
            f.write("0\t0\t{}\t<eps>\t{}\n".format(chr(i),-log10(Edits[(chr(i),'<eps>')]/total))) # chr -> <eps>
            
        if ('<eps>', chr(i)) not in Edits:
            f.write("0\t0\t<eps>\t{}\t10000\n".format(chr(i))) # <eps> -> chr
        else:
            f.write("0\t0\t<eps>\t{}\t{}\n".format(chr(i),-log10(Edits[('<eps>', chr(i))]/total))) # <eps> -> chr
        
        for j in range(97,123):
            if j != i:
                if (chr(i), chr(j)) not in Edits:
                    f.write("0\t0\t{}\t{}\t10000\n".format(chr(i),chr(j))) # chr -> chr
                else:
                    f.write("0\t0\t{}\t{}\t{}\n".format(chr(i),chr(j),-log10(Edits[(chr(i), chr(j))]/total))) # chr -> chr
    
    f.write("0\t0\n") # end state
                
!fstcompile -isymbols=./vocab/chars.syms -osymbols=./vocab/chars.syms ./fsts/E.fst ./fsts/E.binfst
# !fstdraw -isymbols=./vocab/chars.syms -osymbols=./vocab/chars.syms -portrait ./fsts/L.binfst | dot -Tpng >./fsts/L.png
# !display ./fsts/L.png

In [16]:
# Compose E with V (Dictionary acceptor) to build the spell checker EV

!fstarcsort --sort_type=olabel ./fsts/E.binfst ./fsts/E.binfst
!fstarcsort --sort_type=ilabel ./fsts/V_opt.binfst ./fsts/V_opt.binfst
!fstcompose ./fsts/E.binfst ./fsts/V_opt.binfst ./fsts/EV.binfst

!./predict.sh ./fsts/EV.binfst cwt 

wit

In [17]:
!./predict.sh ./fsts/EV.binfst cit 

clit

In [18]:
!./predict.sh ./fsts/EV.binfst antheaterry 

theatre

In [19]:

# Correcting the first 20 words of spell_test.txt with EV

import subprocess

with open('./data/spell_test.txt') as file:
    # reading each line 
    j = 0
    for line in file:
        j = j+1
        if j == 2:
            break
        i = 0
        # reading each word       
        for word in line.split():
            i = i+1
            if i == 1:
                print("True \n{}".format(word))
            elif i == 2:
                print("False \n{}".format(word))
                print("Corrected ")
                subprocess.call(['bash','predict.sh', './fsts/EV.binfst' , word])
                print('\n')
            else:
                break

True 
contented:
False 
contenpted
Corrected 
contented



In [22]:
# Step 9

total_words = sum(Dict.values())

with open("./fsts/W.fst",'w') as f:
    for word in Dict.keys():
        f.write("0\t0\t{}\t{}\t{}\n".format(word,word,-log10(Dict[word]/total_words))) # word -> word
    f.write("0\t0\n") # end state            
    
!fstcompile -isymbols=./vocab/words.syms -osymbols=./vocab/words.syms ./fsts/W.fst ./fsts/W.binfst

In [23]:
# Compose L (Levensthein) with V (Dictionary acceptor) and W (word frequency acceptor)to build the spell checker LVW

!fstarcsort --sort_type=olabel ./fsts/S.binfst ./fsts/S.binfst
!fstarcsort --sort_type=ilabel ./fsts/W.binfst ./fsts/W.binfst
!fstcompose ./fsts/S.binfst ./fsts/W.binfst ./fsts/LVW.binfst

!./predict.sh ./fsts/LVW.binfst cwt 

it

In [24]:
!./predict.sh ./fsts/LVW.binfst cit 

it

In [25]:
# Compose E with V (Dictionary acceptor) and W (word frequency acceptor)to build the spell checker EVW

!fstarcsort --sort_type=olabel ./fsts/EV.binfst ./fsts/EV.binfst
!fstarcsort --sort_type=ilabel ./fsts/W.binfst ./fsts/W.binfst
!fstcompose ./fsts/EV.binfst ./fsts/W.binfst ./fsts/EVW.binfst

!./predict.sh ./fsts/EVW.binfst cwt 

with

In [26]:
!./predict.sh ./fsts/EVW.binfst cit 

it

In [27]:
!fstarcsort --sort_type=olabel ./fsts/V_opt.binfst ./fsts/V_opt.binfst
!fstarcsort --sort_type=ilabel ./fsts/W.binfst ./fsts/W.binfst
!fstcompose ./fsts/V_opt.binfst ./fsts/W.binfst ./fsts/VW.binfst

# !fstdraw -isymbols=./vocab/chars.syms -osymbols=./vocab/words.syms -portrait ./fsts/VW.binfst | dot -Tpng >./fsts/VW.png 
# !display ./fsts/VW.png

# !fstdraw -isymbols=./vocab/chars.syms -osymbols=./vocab/words.syms -portrait ./fsts/V_opt.binfst | dot -Tpng >./fsts/V_opt.png 
# !display ./fsts/V_opt.png

In [36]:
# Step 10

!python run_evaluation.py fsts/S.binfst

contenpted -> contented: contented                                              
contende -> contend: contented                                                  
contended -> contended: contented                                               
contentid -> contented: contented                                               
begining -> beginning: beginning                                                
problam -> problem: problem                                                     
proble -> problem: problem                                                      
promblem -> problem: problem                                                    
proplen -> problem: problem                                                     
dirven -> dire: driven                                                          
exstacy -> ecstasy: ecstasy                                                     
ecstacy -> ecstasy: ecstasy                                                     
guic -> guil: juice         

scarcly -> scarcely: scarcely                                                   
scarecly -> scarcely: scarcely                                                  
scarely -> scarcely: scarcely                                                   
scarsely -> scarcely: scarcely                                                  
questionaire -> questioning: questionnaire                                      
experance -> experience: experience                                             
experiance -> experience: experience                                            
possable -> possible: possible                                                  
reafreshment -> refreshment: refreshment                                        
refreshmant -> refreshment: refreshment                                         
refresment -> refreshment: refreshment                                          
refressmunt -> refreshment: refreshment                                         
embaras -> embark: embarrass

In [37]:
!python run_evaluation.py fsts/LVW.binfst

contenpted -> contented: contented                                              
contende -> contend: contented                                                  
contended -> contended: contented                                               
contentid -> contented: contented                                               
begining -> beginning: beginning                                                
problam -> problem: problem                                                     
proble -> people: problem                                                       
promblem -> problem: problem                                                    
proplen -> people: problem                                                      
dirven -> given: driven                                                         
exstacy -> stay: ecstasy                                                        
ecstacy -> ecstasy: ecstasy                                                     
guic -> in: juice           

scarcly -> scarcely: scarcely                                                   
scarecly -> scarcely: scarcely                                                  
scarely -> scarcely: scarcely                                                   
scarsely -> scarcely: scarcely                                                  
questionaire -> question: questionnaire                                         
experance -> experience: experience                                             
experiance -> experience: experience                                            
possable -> possible: possible                                                  
reafreshment -> refreshment: refreshment                                        
refreshmant -> refreshment: refreshment                                         
refresment -> refreshment: refreshment                                          
refressmunt -> refreshment: refreshment                                         
embaras -> ears: embarrass  

In [1]:
!python run_evaluation.py fsts/EV.binfst

contenpted -> contented: contented                                              
contende -> contend: contented                                                  
contended -> contended: contented                                               
contentid -> contented: contented                                               
begining -> beginning: beginning                                                
problam -> problem: problem                                                     
proble -> problem: problem                                                      
promblem -> problem: problem                                                    
proplen -> people: problem                                                      
dirven -> driven: driven                                                        
exstacy -> exactly: ecstasy                                                     
ecstacy -> ecstasy: ecstasy                                                     
guic -> guil: juice         

scarcly -> scarcely: scarcely                                                   
scarecly -> scarcely: scarcely                                                  
scarely -> scarcely: scarcely                                                   
scarsely -> scarcely: scarcely                                                  
questionaire -> question: questionnaire                                         
experance -> experience: experience                                             
experiance -> experience: experience                                            
possable -> possible: possible                                                  
reafreshment -> refreshment: refreshment                                        
refreshmant -> refreshment: refreshment                                         
refresment -> refreshment: refreshment                                          
refressmunt -> refreshment: refreshment                                         
embaras -> embark: embarrass

In [39]:
!python run_evaluation.py fsts/EVW.binfst

contenpted -> contented: contented                                              
contende -> contend: contented                                                  
contended -> contended: contented                                               
contentid -> contented: contented                                               
begining -> beginning: beginning                                                
problam -> problem: problem                                                     
proble -> problem: problem                                                      
promblem -> problem: problem                                                    
proplen -> people: problem                                                      
dirven -> driven: driven                                                        
exstacy -> exactly: ecstasy                                                     
ecstacy -> ecstasy: ecstasy                                                     
guic -> i: juice            

scarcly -> scarcely: scarcely                                                   
scarecly -> scarcely: scarcely                                                  
scarely -> scarcely: scarcely                                                   
scarsely -> scarcely: scarcely                                                  
questionaire -> question: questionnaire                                         
experance -> experience: experience                                             
experiance -> experience: experience                                            
possable -> possible: possible                                                  
reafreshment -> refreshment: refreshment                                        
refreshmant -> refreshment: refreshment                                         
refresment -> refreshment: refreshment                                          
refressmunt -> refreshment: refreshment                                         
embaras -> embraces: embarra

 **Part 2**

In [2]:
# Step 12

Words = []
with open('./data/gutenberg.txt') as file:
    # reading each line   
    for line in file:
        # reading each word       
        for word in line.split():
            Words.append(word)
            
print(Words[0:3])

['emma', 'by', 'jane']


In [3]:
from gensim.test.utils import common_texts

print(common_texts[0:2])

[['human', 'interface', 'computer'], ['survey', 'user', 'computer', 'system', 'response', 'time']]


In [23]:
# Train a w2v model on gutenberg 

!python w2v_train.py

INFO - 15:41:46: collecting all words and their counts
INFO - 15:41:46: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - 15:41:46: PROGRESS: at sentence #10000, processed 113514 words, keeping 6066 word types
INFO - 15:41:46: PROGRESS: at sentence #20000, processed 227309 words, keeping 8494 word types
INFO - 15:41:46: PROGRESS: at sentence #30000, processed 324666 words, keeping 9843 word types
INFO - 15:41:46: PROGRESS: at sentence #40000, processed 427437 words, keeping 11782 word types
INFO - 15:41:46: PROGRESS: at sentence #50000, processed 537200 words, keeping 13113 word types
INFO - 15:41:46: PROGRESS: at sentence #60000, processed 647732 words, keeping 14316 word types
INFO - 15:41:46: PROGRESS: at sentence #70000, processed 751683 words, keeping 16047 word types
INFO - 15:41:46: PROGRESS: at sentence #80000, processed 847972 words, keeping 16841 word types
INFO - 15:41:46: PROGRESS: at sentence #90000, processed 957402 words, keeping 17336 word types
I

INFO - 15:41:53: worker thread finished; awaiting finish of 3 more threads
INFO - 15:41:53: worker thread finished; awaiting finish of 2 more threads
INFO - 15:41:53: worker thread finished; awaiting finish of 1 more threads
INFO - 15:41:53: worker thread finished; awaiting finish of 0 more threads
INFO - 15:41:53: EPOCH - 2 : training on 2134975 raw words (1470434 effective words) took 0.7s, 1968922 effective words/s
Loss after epoch 1: 0.0
INFO - 15:41:54: worker thread finished; awaiting finish of 3 more threads
INFO - 15:41:54: worker thread finished; awaiting finish of 2 more threads
INFO - 15:41:54: worker thread finished; awaiting finish of 1 more threads
INFO - 15:41:54: worker thread finished; awaiting finish of 0 more threads
INFO - 15:41:54: EPOCH - 3 : training on 2134975 raw words (1469933 effective words) took 0.7s, 1970040 effective words/s
Loss after epoch 2: 0.0
INFO - 15:41:55: worker thread finished; awaiting finish of 3 more threads
INFO - 15:41:55: worker thread fi

INFO - 15:42:07: worker thread finished; awaiting finish of 3 more threads
INFO - 15:42:07: worker thread finished; awaiting finish of 2 more threads
INFO - 15:42:07: worker thread finished; awaiting finish of 1 more threads
INFO - 15:42:07: worker thread finished; awaiting finish of 0 more threads
INFO - 15:42:07: EPOCH - 21 : training on 2134975 raw words (1470299 effective words) took 0.7s, 2023698 effective words/s
Loss after epoch 20: 0.0
INFO - 15:42:08: worker thread finished; awaiting finish of 3 more threads
INFO - 15:42:08: worker thread finished; awaiting finish of 2 more threads
INFO - 15:42:08: worker thread finished; awaiting finish of 1 more threads
INFO - 15:42:08: worker thread finished; awaiting finish of 0 more threads
INFO - 15:42:08: EPOCH - 22 : training on 2134975 raw words (1469976 effective words) took 0.8s, 1907105 effective words/s
Loss after epoch 21: 0.0
INFO - 15:42:09: worker thread finished; awaiting finish of 3 more threads
INFO - 15:42:09: worker threa

INFO - 15:42:22: worker thread finished; awaiting finish of 3 more threads
INFO - 15:42:22: worker thread finished; awaiting finish of 2 more threads
INFO - 15:42:22: worker thread finished; awaiting finish of 1 more threads
INFO - 15:42:22: worker thread finished; awaiting finish of 0 more threads
INFO - 15:42:22: EPOCH - 40 : training on 2134975 raw words (1469664 effective words) took 0.8s, 1940794 effective words/s
Loss after epoch 39: 0.0
INFO - 15:42:23: worker thread finished; awaiting finish of 3 more threads
INFO - 15:42:23: worker thread finished; awaiting finish of 2 more threads
INFO - 15:42:23: worker thread finished; awaiting finish of 1 more threads
INFO - 15:42:23: worker thread finished; awaiting finish of 0 more threads
INFO - 15:42:23: EPOCH - 41 : training on 2134975 raw words (1470949 effective words) took 0.7s, 1971908 effective words/s
Loss after epoch 40: 0.0
INFO - 15:42:23: worker thread finished; awaiting finish of 3 more threads
INFO - 15:42:23: worker threa

INFO - 15:42:36: worker thread finished; awaiting finish of 3 more threads
INFO - 15:42:36: worker thread finished; awaiting finish of 2 more threads
INFO - 15:42:36: worker thread finished; awaiting finish of 1 more threads
INFO - 15:42:36: worker thread finished; awaiting finish of 0 more threads
INFO - 15:42:36: EPOCH - 59 : training on 2134975 raw words (1469953 effective words) took 0.7s, 2060662 effective words/s
Loss after epoch 58: 0.0
INFO - 15:42:37: worker thread finished; awaiting finish of 3 more threads
INFO - 15:42:37: worker thread finished; awaiting finish of 2 more threads
INFO - 15:42:37: worker thread finished; awaiting finish of 1 more threads
INFO - 15:42:37: worker thread finished; awaiting finish of 0 more threads
INFO - 15:42:37: EPOCH - 60 : training on 2134975 raw words (1470207 effective words) took 0.8s, 1941262 effective words/s
Loss after epoch 59: 0.0
INFO - 15:42:37: worker thread finished; awaiting finish of 3 more threads
INFO - 15:42:37: worker threa

INFO - 15:42:50: worker thread finished; awaiting finish of 3 more threads
INFO - 15:42:50: worker thread finished; awaiting finish of 2 more threads
INFO - 15:42:50: worker thread finished; awaiting finish of 1 more threads
INFO - 15:42:50: worker thread finished; awaiting finish of 0 more threads
INFO - 15:42:50: EPOCH - 78 : training on 2134975 raw words (1469698 effective words) took 0.7s, 1967409 effective words/s
Loss after epoch 77: 0.0
INFO - 15:42:51: worker thread finished; awaiting finish of 3 more threads
INFO - 15:42:51: worker thread finished; awaiting finish of 2 more threads
INFO - 15:42:51: worker thread finished; awaiting finish of 1 more threads
INFO - 15:42:51: worker thread finished; awaiting finish of 0 more threads
INFO - 15:42:51: EPOCH - 79 : training on 2134975 raw words (1470309 effective words) took 0.8s, 1849916 effective words/s
Loss after epoch 78: 0.0
INFO - 15:42:52: worker thread finished; awaiting finish of 3 more threads
INFO - 15:42:52: worker threa

INFO - 15:43:04: worker thread finished; awaiting finish of 3 more threads
INFO - 15:43:04: worker thread finished; awaiting finish of 2 more threads
INFO - 15:43:04: worker thread finished; awaiting finish of 1 more threads
INFO - 15:43:04: worker thread finished; awaiting finish of 0 more threads
INFO - 15:43:04: EPOCH - 97 : training on 2134975 raw words (1469960 effective words) took 0.7s, 2029280 effective words/s
Loss after epoch 96: 0.0
INFO - 15:43:05: worker thread finished; awaiting finish of 3 more threads
INFO - 15:43:05: worker thread finished; awaiting finish of 2 more threads
INFO - 15:43:05: worker thread finished; awaiting finish of 1 more threads
INFO - 15:43:05: worker thread finished; awaiting finish of 0 more threads
INFO - 15:43:05: EPOCH - 98 : training on 2134975 raw words (1470123 effective words) took 0.7s, 2017882 effective words/s
Loss after epoch 97: 0.0
INFO - 15:43:06: worker thread finished; awaiting finish of 3 more threads
INFO - 15:43:06: worker threa

INFO - 15:43:19: worker thread finished; awaiting finish of 3 more threads
INFO - 15:43:19: worker thread finished; awaiting finish of 2 more threads
INFO - 15:43:19: worker thread finished; awaiting finish of 1 more threads
INFO - 15:43:19: worker thread finished; awaiting finish of 0 more threads
INFO - 15:43:19: EPOCH - 116 : training on 2134975 raw words (1469336 effective words) took 0.7s, 2002920 effective words/s
Loss after epoch 115: 0.0
INFO - 15:43:19: worker thread finished; awaiting finish of 3 more threads
INFO - 15:43:19: worker thread finished; awaiting finish of 2 more threads
INFO - 15:43:19: worker thread finished; awaiting finish of 1 more threads
INFO - 15:43:19: worker thread finished; awaiting finish of 0 more threads
INFO - 15:43:19: EPOCH - 117 : training on 2134975 raw words (1470161 effective words) took 0.7s, 2033296 effective words/s
Loss after epoch 116: 0.0
INFO - 15:43:20: worker thread finished; awaiting finish of 3 more threads
INFO - 15:43:20: worker t

INFO - 15:43:32: worker thread finished; awaiting finish of 1 more threads
INFO - 15:43:32: worker thread finished; awaiting finish of 0 more threads
INFO - 15:43:32: EPOCH - 134 : training on 2134975 raw words (1470538 effective words) took 0.7s, 2027920 effective words/s
Loss after epoch 133: 0.0
INFO - 15:43:33: worker thread finished; awaiting finish of 3 more threads
INFO - 15:43:33: worker thread finished; awaiting finish of 2 more threads
INFO - 15:43:33: worker thread finished; awaiting finish of 1 more threads
INFO - 15:43:33: worker thread finished; awaiting finish of 0 more threads
INFO - 15:43:33: EPOCH - 135 : training on 2134975 raw words (1470333 effective words) took 0.7s, 2002363 effective words/s
Loss after epoch 134: 0.0
INFO - 15:43:33: worker thread finished; awaiting finish of 3 more threads
INFO - 15:43:33: worker thread finished; awaiting finish of 2 more threads
INFO - 15:43:33: worker thread finished; awaiting finish of 1 more threads
INFO - 15:43:33: worker t

INFO - 15:43:46: worker thread finished; awaiting finish of 3 more threads
INFO - 15:43:46: worker thread finished; awaiting finish of 2 more threads
INFO - 15:43:46: worker thread finished; awaiting finish of 1 more threads
INFO - 15:43:46: worker thread finished; awaiting finish of 0 more threads
INFO - 15:43:46: EPOCH - 153 : training on 2134975 raw words (1471109 effective words) took 0.7s, 2032819 effective words/s
Loss after epoch 152: 0.0
INFO - 15:43:47: worker thread finished; awaiting finish of 3 more threads
INFO - 15:43:47: worker thread finished; awaiting finish of 2 more threads
INFO - 15:43:47: worker thread finished; awaiting finish of 1 more threads
INFO - 15:43:47: worker thread finished; awaiting finish of 0 more threads
INFO - 15:43:47: EPOCH - 154 : training on 2134975 raw words (1470207 effective words) took 0.7s, 2009816 effective words/s
Loss after epoch 153: 0.0
INFO - 15:43:47: worker thread finished; awaiting finish of 3 more threads
INFO - 15:43:47: worker t

INFO - 15:44:00: worker thread finished; awaiting finish of 3 more threads
INFO - 15:44:00: worker thread finished; awaiting finish of 2 more threads
INFO - 15:44:00: worker thread finished; awaiting finish of 1 more threads
INFO - 15:44:00: worker thread finished; awaiting finish of 0 more threads
INFO - 15:44:00: EPOCH - 172 : training on 2134975 raw words (1470057 effective words) took 0.8s, 1923382 effective words/s
Loss after epoch 171: 0.0
INFO - 15:44:01: worker thread finished; awaiting finish of 3 more threads
INFO - 15:44:01: worker thread finished; awaiting finish of 2 more threads
INFO - 15:44:01: worker thread finished; awaiting finish of 1 more threads
INFO - 15:44:01: worker thread finished; awaiting finish of 0 more threads
INFO - 15:44:01: EPOCH - 173 : training on 2134975 raw words (1470255 effective words) took 0.8s, 1920709 effective words/s
Loss after epoch 172: 0.0
INFO - 15:44:02: worker thread finished; awaiting finish of 3 more threads
INFO - 15:44:02: worker t

INFO - 15:44:16: worker thread finished; awaiting finish of 3 more threads
INFO - 15:44:16: worker thread finished; awaiting finish of 2 more threads
INFO - 15:44:16: worker thread finished; awaiting finish of 1 more threads
INFO - 15:44:16: worker thread finished; awaiting finish of 0 more threads
INFO - 15:44:16: EPOCH - 191 : training on 2134975 raw words (1470182 effective words) took 0.8s, 1825819 effective words/s
Loss after epoch 190: 0.0
INFO - 15:44:16: worker thread finished; awaiting finish of 3 more threads
INFO - 15:44:16: worker thread finished; awaiting finish of 2 more threads
INFO - 15:44:16: worker thread finished; awaiting finish of 1 more threads
INFO - 15:44:16: worker thread finished; awaiting finish of 0 more threads
INFO - 15:44:16: EPOCH - 192 : training on 2134975 raw words (1470465 effective words) took 0.7s, 2015343 effective words/s
Loss after epoch 191: 0.0
INFO - 15:44:17: worker thread finished; awaiting finish of 3 more threads
INFO - 15:44:17: worker t

INFO - 15:44:31: worker thread finished; awaiting finish of 3 more threads
INFO - 15:44:31: worker thread finished; awaiting finish of 2 more threads
INFO - 15:44:31: worker thread finished; awaiting finish of 1 more threads
INFO - 15:44:31: worker thread finished; awaiting finish of 0 more threads
INFO - 15:44:31: EPOCH - 209 : training on 2134975 raw words (1469865 effective words) took 1.0s, 1494410 effective words/s
Loss after epoch 208: 0.0
INFO - 15:44:32: worker thread finished; awaiting finish of 3 more threads
INFO - 15:44:32: worker thread finished; awaiting finish of 2 more threads
INFO - 15:44:32: worker thread finished; awaiting finish of 1 more threads
INFO - 15:44:32: worker thread finished; awaiting finish of 0 more threads
INFO - 15:44:32: EPOCH - 210 : training on 2134975 raw words (1470292 effective words) took 1.0s, 1513493 effective words/s
Loss after epoch 209: 0.0
INFO - 15:44:33: worker thread finished; awaiting finish of 3 more threads
INFO - 15:44:33: worker t

INFO - 15:44:50: worker thread finished; awaiting finish of 3 more threads
INFO - 15:44:50: worker thread finished; awaiting finish of 2 more threads
INFO - 15:44:50: worker thread finished; awaiting finish of 1 more threads
INFO - 15:44:50: worker thread finished; awaiting finish of 0 more threads
INFO - 15:44:50: EPOCH - 227 : training on 2134975 raw words (1469135 effective words) took 0.9s, 1560237 effective words/s
Loss after epoch 226: 0.0
INFO - 15:44:50: worker thread finished; awaiting finish of 3 more threads
INFO - 15:44:51: worker thread finished; awaiting finish of 2 more threads
INFO - 15:44:51: worker thread finished; awaiting finish of 1 more threads
INFO - 15:44:51: worker thread finished; awaiting finish of 0 more threads
INFO - 15:44:51: EPOCH - 228 : training on 2134975 raw words (1470039 effective words) took 1.0s, 1522012 effective words/s
Loss after epoch 227: 0.0
INFO - 15:44:51: worker thread finished; awaiting finish of 3 more threads
INFO - 15:44:51: worker t

INFO - 15:45:07: worker thread finished; awaiting finish of 1 more threads
INFO - 15:45:07: worker thread finished; awaiting finish of 0 more threads
INFO - 15:45:07: EPOCH - 245 : training on 2134975 raw words (1469915 effective words) took 1.0s, 1515419 effective words/s
Loss after epoch 244: 0.0
INFO - 15:45:08: worker thread finished; awaiting finish of 3 more threads
INFO - 15:45:08: worker thread finished; awaiting finish of 2 more threads
INFO - 15:45:08: worker thread finished; awaiting finish of 1 more threads
INFO - 15:45:08: worker thread finished; awaiting finish of 0 more threads
INFO - 15:45:08: EPOCH - 246 : training on 2134975 raw words (1470148 effective words) took 1.0s, 1524158 effective words/s
Loss after epoch 245: 0.0
INFO - 15:45:09: worker thread finished; awaiting finish of 3 more threads
INFO - 15:45:09: worker thread finished; awaiting finish of 2 more threads
INFO - 15:45:09: worker thread finished; awaiting finish of 1 more threads
INFO - 15:45:09: worker t

INFO - 15:45:25: worker thread finished; awaiting finish of 3 more threads
INFO - 15:45:25: worker thread finished; awaiting finish of 2 more threads
INFO - 15:45:25: worker thread finished; awaiting finish of 1 more threads
INFO - 15:45:25: worker thread finished; awaiting finish of 0 more threads
INFO - 15:45:25: EPOCH - 264 : training on 2134975 raw words (1470252 effective words) took 1.0s, 1490822 effective words/s
Loss after epoch 263: 0.0
INFO - 15:45:26: worker thread finished; awaiting finish of 3 more threads
INFO - 15:45:26: worker thread finished; awaiting finish of 2 more threads
INFO - 15:45:26: worker thread finished; awaiting finish of 1 more threads
INFO - 15:45:26: worker thread finished; awaiting finish of 0 more threads
INFO - 15:45:26: EPOCH - 265 : training on 2134975 raw words (1469895 effective words) took 1.0s, 1516126 effective words/s
Loss after epoch 264: 0.0
INFO - 15:45:27: worker thread finished; awaiting finish of 3 more threads
INFO - 15:45:27: worker t

INFO - 15:45:44: worker thread finished; awaiting finish of 3 more threads
INFO - 15:45:44: worker thread finished; awaiting finish of 2 more threads
INFO - 15:45:44: worker thread finished; awaiting finish of 1 more threads
INFO - 15:45:44: worker thread finished; awaiting finish of 0 more threads
INFO - 15:45:44: EPOCH - 283 : training on 2134975 raw words (1469223 effective words) took 0.9s, 1564187 effective words/s
Loss after epoch 282: 0.0
INFO - 15:45:45: worker thread finished; awaiting finish of 3 more threads
INFO - 15:45:45: worker thread finished; awaiting finish of 2 more threads
INFO - 15:45:45: worker thread finished; awaiting finish of 1 more threads
INFO - 15:45:45: worker thread finished; awaiting finish of 0 more threads
INFO - 15:45:45: EPOCH - 284 : training on 2134975 raw words (1470082 effective words) took 1.0s, 1523179 effective words/s
Loss after epoch 283: 0.0
INFO - 15:45:46: worker thread finished; awaiting finish of 3 more threads
INFO - 15:45:46: worker t

INFO - 15:46:01: EPOCH 301 - PROGRESS: at 91.88% examples, 1371612 words/s, in_qsize 6, out_qsize 1
INFO - 15:46:01: worker thread finished; awaiting finish of 3 more threads
INFO - 15:46:01: worker thread finished; awaiting finish of 2 more threads
INFO - 15:46:01: worker thread finished; awaiting finish of 1 more threads
INFO - 15:46:01: worker thread finished; awaiting finish of 0 more threads
INFO - 15:46:01: EPOCH - 301 : training on 2134975 raw words (1471080 effective words) took 1.1s, 1386418 effective words/s
Loss after epoch 300: 0.0
INFO - 15:46:02: EPOCH 302 - PROGRESS: at 95.35% examples, 1411384 words/s, in_qsize 7, out_qsize 0
INFO - 15:46:02: worker thread finished; awaiting finish of 3 more threads
INFO - 15:46:02: worker thread finished; awaiting finish of 2 more threads
INFO - 15:46:02: worker thread finished; awaiting finish of 1 more threads
INFO - 15:46:02: worker thread finished; awaiting finish of 0 more threads
INFO - 15:46:02: EPOCH - 302 : training on 2134975

INFO - 15:46:17: worker thread finished; awaiting finish of 3 more threads
INFO - 15:46:17: worker thread finished; awaiting finish of 2 more threads
INFO - 15:46:17: worker thread finished; awaiting finish of 1 more threads
INFO - 15:46:17: worker thread finished; awaiting finish of 0 more threads
INFO - 15:46:17: EPOCH - 318 : training on 2134975 raw words (1470639 effective words) took 0.9s, 1641303 effective words/s
Loss after epoch 317: 0.0
INFO - 15:46:18: worker thread finished; awaiting finish of 3 more threads
INFO - 15:46:18: worker thread finished; awaiting finish of 2 more threads
INFO - 15:46:18: worker thread finished; awaiting finish of 1 more threads
INFO - 15:46:18: worker thread finished; awaiting finish of 0 more threads
INFO - 15:46:18: EPOCH - 319 : training on 2134975 raw words (1470402 effective words) took 0.8s, 1959862 effective words/s
Loss after epoch 318: 0.0
INFO - 15:46:18: worker thread finished; awaiting finish of 3 more threads
INFO - 15:46:18: worker t

INFO - 15:46:31: worker thread finished; awaiting finish of 3 more threads
INFO - 15:46:31: worker thread finished; awaiting finish of 2 more threads
INFO - 15:46:31: worker thread finished; awaiting finish of 1 more threads
INFO - 15:46:31: worker thread finished; awaiting finish of 0 more threads
INFO - 15:46:31: EPOCH - 337 : training on 2134975 raw words (1470082 effective words) took 0.7s, 1996624 effective words/s
Loss after epoch 336: 0.0
INFO - 15:46:32: worker thread finished; awaiting finish of 3 more threads
INFO - 15:46:32: worker thread finished; awaiting finish of 2 more threads
INFO - 15:46:32: worker thread finished; awaiting finish of 1 more threads
INFO - 15:46:32: worker thread finished; awaiting finish of 0 more threads
INFO - 15:46:32: EPOCH - 338 : training on 2134975 raw words (1470479 effective words) took 0.8s, 1930282 effective words/s
Loss after epoch 337: 0.0
INFO - 15:46:33: worker thread finished; awaiting finish of 3 more threads
INFO - 15:46:33: worker t

INFO - 15:46:46: worker thread finished; awaiting finish of 3 more threads
INFO - 15:46:46: worker thread finished; awaiting finish of 2 more threads
INFO - 15:46:46: worker thread finished; awaiting finish of 1 more threads
INFO - 15:46:46: worker thread finished; awaiting finish of 0 more threads
INFO - 15:46:46: EPOCH - 356 : training on 2134975 raw words (1469806 effective words) took 0.7s, 1967948 effective words/s
Loss after epoch 355: 0.0
INFO - 15:46:46: worker thread finished; awaiting finish of 3 more threads
INFO - 15:46:46: worker thread finished; awaiting finish of 2 more threads
INFO - 15:46:46: worker thread finished; awaiting finish of 1 more threads
INFO - 15:46:46: worker thread finished; awaiting finish of 0 more threads
INFO - 15:46:46: EPOCH - 357 : training on 2134975 raw words (1469680 effective words) took 0.7s, 1986518 effective words/s
Loss after epoch 356: 0.0
INFO - 15:46:47: worker thread finished; awaiting finish of 3 more threads
INFO - 15:46:47: worker t

INFO - 15:47:00: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:00: worker thread finished; awaiting finish of 2 more threads
INFO - 15:47:00: worker thread finished; awaiting finish of 1 more threads
INFO - 15:47:00: worker thread finished; awaiting finish of 0 more threads
INFO - 15:47:00: EPOCH - 375 : training on 2134975 raw words (1470071 effective words) took 0.7s, 1960999 effective words/s
Loss after epoch 374: 0.0
INFO - 15:47:00: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:00: worker thread finished; awaiting finish of 2 more threads
INFO - 15:47:00: worker thread finished; awaiting finish of 1 more threads
INFO - 15:47:00: worker thread finished; awaiting finish of 0 more threads
INFO - 15:47:00: EPOCH - 376 : training on 2134975 raw words (1470747 effective words) took 0.7s, 2016595 effective words/s
Loss after epoch 375: 0.0
INFO - 15:47:01: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:01: worker t

INFO - 15:47:14: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:14: worker thread finished; awaiting finish of 2 more threads
INFO - 15:47:14: worker thread finished; awaiting finish of 1 more threads
INFO - 15:47:14: worker thread finished; awaiting finish of 0 more threads
INFO - 15:47:14: EPOCH - 394 : training on 2134975 raw words (1470022 effective words) took 0.7s, 2036997 effective words/s
Loss after epoch 393: 0.0
INFO - 15:47:15: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:15: worker thread finished; awaiting finish of 2 more threads
INFO - 15:47:15: worker thread finished; awaiting finish of 1 more threads
INFO - 15:47:15: worker thread finished; awaiting finish of 0 more threads
INFO - 15:47:15: EPOCH - 395 : training on 2134975 raw words (1470406 effective words) took 0.7s, 2050785 effective words/s
Loss after epoch 394: 0.0
INFO - 15:47:15: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:15: worker t

INFO - 15:47:28: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:28: worker thread finished; awaiting finish of 2 more threads
INFO - 15:47:28: worker thread finished; awaiting finish of 1 more threads
INFO - 15:47:28: worker thread finished; awaiting finish of 0 more threads
INFO - 15:47:28: EPOCH - 413 : training on 2134975 raw words (1470384 effective words) took 0.8s, 1957723 effective words/s
Loss after epoch 412: 0.0
INFO - 15:47:29: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:29: worker thread finished; awaiting finish of 2 more threads
INFO - 15:47:29: worker thread finished; awaiting finish of 1 more threads
INFO - 15:47:29: worker thread finished; awaiting finish of 0 more threads
INFO - 15:47:29: EPOCH - 414 : training on 2134975 raw words (1470943 effective words) took 0.8s, 1921537 effective words/s
Loss after epoch 413: 0.0
INFO - 15:47:30: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:30: worker t

INFO - 15:47:42: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:42: worker thread finished; awaiting finish of 2 more threads
INFO - 15:47:42: worker thread finished; awaiting finish of 1 more threads
INFO - 15:47:42: worker thread finished; awaiting finish of 0 more threads
INFO - 15:47:42: EPOCH - 432 : training on 2134975 raw words (1470445 effective words) took 0.8s, 1959206 effective words/s
Loss after epoch 431: 0.0
INFO - 15:47:43: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:43: worker thread finished; awaiting finish of 2 more threads
INFO - 15:47:43: worker thread finished; awaiting finish of 1 more threads
INFO - 15:47:43: worker thread finished; awaiting finish of 0 more threads
INFO - 15:47:43: EPOCH - 433 : training on 2134975 raw words (1469682 effective words) took 0.7s, 1978639 effective words/s
Loss after epoch 432: 0.0
INFO - 15:47:44: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:44: worker t

INFO - 15:47:57: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:57: worker thread finished; awaiting finish of 2 more threads
INFO - 15:47:57: worker thread finished; awaiting finish of 1 more threads
INFO - 15:47:57: worker thread finished; awaiting finish of 0 more threads
INFO - 15:47:57: EPOCH - 451 : training on 2134975 raw words (1469939 effective words) took 0.7s, 2023779 effective words/s
Loss after epoch 450: 0.0
INFO - 15:47:57: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:57: worker thread finished; awaiting finish of 2 more threads
INFO - 15:47:57: worker thread finished; awaiting finish of 1 more threads
INFO - 15:47:57: worker thread finished; awaiting finish of 0 more threads
INFO - 15:47:57: EPOCH - 452 : training on 2134975 raw words (1469220 effective words) took 0.7s, 1989129 effective words/s
Loss after epoch 451: 0.0
INFO - 15:47:58: worker thread finished; awaiting finish of 3 more threads
INFO - 15:47:58: worker t

INFO - 15:48:11: worker thread finished; awaiting finish of 3 more threads
INFO - 15:48:11: worker thread finished; awaiting finish of 2 more threads
INFO - 15:48:11: worker thread finished; awaiting finish of 1 more threads
INFO - 15:48:11: worker thread finished; awaiting finish of 0 more threads
INFO - 15:48:11: EPOCH - 470 : training on 2134975 raw words (1470458 effective words) took 0.7s, 2026852 effective words/s
Loss after epoch 469: 0.0
INFO - 15:48:12: worker thread finished; awaiting finish of 3 more threads
INFO - 15:48:12: worker thread finished; awaiting finish of 2 more threads
INFO - 15:48:12: worker thread finished; awaiting finish of 1 more threads
INFO - 15:48:12: worker thread finished; awaiting finish of 0 more threads
INFO - 15:48:12: EPOCH - 471 : training on 2134975 raw words (1470242 effective words) took 0.7s, 2001512 effective words/s
Loss after epoch 470: 0.0
INFO - 15:48:12: worker thread finished; awaiting finish of 3 more threads
INFO - 15:48:12: worker t

INFO - 15:48:25: worker thread finished; awaiting finish of 3 more threads
INFO - 15:48:25: worker thread finished; awaiting finish of 2 more threads
INFO - 15:48:25: worker thread finished; awaiting finish of 1 more threads
INFO - 15:48:25: worker thread finished; awaiting finish of 0 more threads
INFO - 15:48:25: EPOCH - 489 : training on 2134975 raw words (1471309 effective words) took 0.8s, 1950983 effective words/s
Loss after epoch 488: 0.0
INFO - 15:48:26: worker thread finished; awaiting finish of 3 more threads
INFO - 15:48:26: worker thread finished; awaiting finish of 2 more threads
INFO - 15:48:26: worker thread finished; awaiting finish of 1 more threads
INFO - 15:48:26: worker thread finished; awaiting finish of 0 more threads
INFO - 15:48:26: EPOCH - 490 : training on 2134975 raw words (1470086 effective words) took 0.8s, 1953931 effective words/s
Loss after epoch 489: 0.0
INFO - 15:48:27: worker thread finished; awaiting finish of 3 more threads
INFO - 15:48:27: worker t

INFO - 15:48:40: worker thread finished; awaiting finish of 3 more threads
INFO - 15:48:40: worker thread finished; awaiting finish of 2 more threads
INFO - 15:48:40: worker thread finished; awaiting finish of 1 more threads
INFO - 15:48:40: worker thread finished; awaiting finish of 0 more threads
INFO - 15:48:40: EPOCH - 508 : training on 2134975 raw words (1469737 effective words) took 0.7s, 1992115 effective words/s
Loss after epoch 507: 0.0
INFO - 15:48:40: worker thread finished; awaiting finish of 3 more threads
INFO - 15:48:40: worker thread finished; awaiting finish of 2 more threads
INFO - 15:48:40: worker thread finished; awaiting finish of 1 more threads
INFO - 15:48:40: worker thread finished; awaiting finish of 0 more threads
INFO - 15:48:40: EPOCH - 509 : training on 2134975 raw words (1469855 effective words) took 0.8s, 1877176 effective words/s
Loss after epoch 508: 0.0
INFO - 15:48:41: worker thread finished; awaiting finish of 3 more threads
INFO - 15:48:41: worker t

INFO - 15:48:54: worker thread finished; awaiting finish of 3 more threads
INFO - 15:48:54: worker thread finished; awaiting finish of 2 more threads
INFO - 15:48:54: worker thread finished; awaiting finish of 1 more threads
INFO - 15:48:54: worker thread finished; awaiting finish of 0 more threads
INFO - 15:48:54: EPOCH - 527 : training on 2134975 raw words (1470550 effective words) took 0.7s, 1973511 effective words/s
Loss after epoch 526: 0.0
INFO - 15:48:54: worker thread finished; awaiting finish of 3 more threads
INFO - 15:48:54: worker thread finished; awaiting finish of 2 more threads
INFO - 15:48:54: worker thread finished; awaiting finish of 1 more threads
INFO - 15:48:54: worker thread finished; awaiting finish of 0 more threads
INFO - 15:48:54: EPOCH - 528 : training on 2134975 raw words (1469772 effective words) took 0.7s, 1993531 effective words/s
Loss after epoch 527: 0.0
INFO - 15:48:55: worker thread finished; awaiting finish of 3 more threads
INFO - 15:48:55: worker t

INFO - 15:49:08: worker thread finished; awaiting finish of 3 more threads
INFO - 15:49:08: worker thread finished; awaiting finish of 2 more threads
INFO - 15:49:08: worker thread finished; awaiting finish of 1 more threads
INFO - 15:49:08: worker thread finished; awaiting finish of 0 more threads
INFO - 15:49:08: EPOCH - 546 : training on 2134975 raw words (1470271 effective words) took 0.8s, 1925284 effective words/s
Loss after epoch 545: 0.0
INFO - 15:49:09: worker thread finished; awaiting finish of 3 more threads
INFO - 15:49:09: worker thread finished; awaiting finish of 2 more threads
INFO - 15:49:09: worker thread finished; awaiting finish of 1 more threads
INFO - 15:49:09: worker thread finished; awaiting finish of 0 more threads
INFO - 15:49:09: EPOCH - 547 : training on 2134975 raw words (1470075 effective words) took 0.7s, 2024090 effective words/s
Loss after epoch 546: 0.0
INFO - 15:49:09: worker thread finished; awaiting finish of 3 more threads
INFO - 15:49:09: worker t

INFO - 15:49:22: worker thread finished; awaiting finish of 3 more threads
INFO - 15:49:22: worker thread finished; awaiting finish of 2 more threads
INFO - 15:49:22: worker thread finished; awaiting finish of 1 more threads
INFO - 15:49:22: worker thread finished; awaiting finish of 0 more threads
INFO - 15:49:22: EPOCH - 565 : training on 2134975 raw words (1469606 effective words) took 0.7s, 2088681 effective words/s
Loss after epoch 564: 0.0
INFO - 15:49:23: worker thread finished; awaiting finish of 3 more threads
INFO - 15:49:23: worker thread finished; awaiting finish of 2 more threads
INFO - 15:49:23: worker thread finished; awaiting finish of 1 more threads
INFO - 15:49:23: worker thread finished; awaiting finish of 0 more threads
INFO - 15:49:23: EPOCH - 566 : training on 2134975 raw words (1470475 effective words) took 0.8s, 1949666 effective words/s
Loss after epoch 565: 0.0
INFO - 15:49:24: worker thread finished; awaiting finish of 3 more threads
INFO - 15:49:24: worker t

INFO - 15:49:36: worker thread finished; awaiting finish of 3 more threads
INFO - 15:49:36: worker thread finished; awaiting finish of 2 more threads
INFO - 15:49:36: worker thread finished; awaiting finish of 1 more threads
INFO - 15:49:36: worker thread finished; awaiting finish of 0 more threads
INFO - 15:49:36: EPOCH - 584 : training on 2134975 raw words (1470321 effective words) took 0.7s, 2007646 effective words/s
Loss after epoch 583: 0.0
INFO - 15:49:37: worker thread finished; awaiting finish of 3 more threads
INFO - 15:49:37: worker thread finished; awaiting finish of 2 more threads
INFO - 15:49:37: worker thread finished; awaiting finish of 1 more threads
INFO - 15:49:37: worker thread finished; awaiting finish of 0 more threads
INFO - 15:49:37: EPOCH - 585 : training on 2134975 raw words (1470028 effective words) took 0.7s, 2049493 effective words/s
Loss after epoch 584: 0.0
INFO - 15:49:38: worker thread finished; awaiting finish of 3 more threads
INFO - 15:49:38: worker t

INFO - 15:49:50: worker thread finished; awaiting finish of 0 more threads
INFO - 15:49:50: EPOCH - 602 : training on 2134975 raw words (1470379 effective words) took 0.7s, 2047143 effective words/s
Loss after epoch 601: 0.0
INFO - 15:49:50: worker thread finished; awaiting finish of 3 more threads
INFO - 15:49:50: worker thread finished; awaiting finish of 2 more threads
INFO - 15:49:50: worker thread finished; awaiting finish of 1 more threads
INFO - 15:49:50: worker thread finished; awaiting finish of 0 more threads
INFO - 15:49:50: EPOCH - 603 : training on 2134975 raw words (1470296 effective words) took 0.7s, 1978388 effective words/s
Loss after epoch 602: 0.0
INFO - 15:49:51: worker thread finished; awaiting finish of 3 more threads
INFO - 15:49:51: worker thread finished; awaiting finish of 2 more threads
INFO - 15:49:51: worker thread finished; awaiting finish of 1 more threads
INFO - 15:49:51: worker thread finished; awaiting finish of 0 more threads
INFO - 15:49:51: EPOCH - 

INFO - 15:50:04: worker thread finished; awaiting finish of 3 more threads
INFO - 15:50:04: worker thread finished; awaiting finish of 2 more threads
INFO - 15:50:04: worker thread finished; awaiting finish of 1 more threads
INFO - 15:50:04: worker thread finished; awaiting finish of 0 more threads
INFO - 15:50:04: EPOCH - 621 : training on 2134975 raw words (1470359 effective words) took 0.8s, 1922695 effective words/s
Loss after epoch 620: 0.0
INFO - 15:50:05: worker thread finished; awaiting finish of 3 more threads
INFO - 15:50:05: worker thread finished; awaiting finish of 2 more threads
INFO - 15:50:05: worker thread finished; awaiting finish of 1 more threads
INFO - 15:50:05: worker thread finished; awaiting finish of 0 more threads
INFO - 15:50:05: EPOCH - 622 : training on 2134975 raw words (1470207 effective words) took 0.7s, 2009468 effective words/s
Loss after epoch 621: 0.0
INFO - 15:50:05: worker thread finished; awaiting finish of 3 more threads
INFO - 15:50:05: worker t

INFO - 15:50:18: worker thread finished; awaiting finish of 3 more threads
INFO - 15:50:18: worker thread finished; awaiting finish of 2 more threads
INFO - 15:50:18: worker thread finished; awaiting finish of 1 more threads
INFO - 15:50:18: worker thread finished; awaiting finish of 0 more threads
INFO - 15:50:18: EPOCH - 640 : training on 2134975 raw words (1470182 effective words) took 0.7s, 2044637 effective words/s
Loss after epoch 639: 0.0
INFO - 15:50:19: worker thread finished; awaiting finish of 3 more threads
INFO - 15:50:19: worker thread finished; awaiting finish of 2 more threads
INFO - 15:50:19: worker thread finished; awaiting finish of 1 more threads
INFO - 15:50:19: worker thread finished; awaiting finish of 0 more threads
INFO - 15:50:19: EPOCH - 641 : training on 2134975 raw words (1469790 effective words) took 0.7s, 2004628 effective words/s
Loss after epoch 640: 0.0
INFO - 15:50:20: worker thread finished; awaiting finish of 3 more threads
INFO - 15:50:20: worker t

INFO - 15:50:32: worker thread finished; awaiting finish of 3 more threads
INFO - 15:50:32: worker thread finished; awaiting finish of 2 more threads
INFO - 15:50:32: worker thread finished; awaiting finish of 1 more threads
INFO - 15:50:32: worker thread finished; awaiting finish of 0 more threads
INFO - 15:50:32: EPOCH - 659 : training on 2134975 raw words (1469702 effective words) took 0.7s, 2044043 effective words/s
Loss after epoch 658: 0.0
INFO - 15:50:33: worker thread finished; awaiting finish of 3 more threads
INFO - 15:50:33: worker thread finished; awaiting finish of 2 more threads
INFO - 15:50:33: worker thread finished; awaiting finish of 1 more threads
INFO - 15:50:33: worker thread finished; awaiting finish of 0 more threads
INFO - 15:50:33: EPOCH - 660 : training on 2134975 raw words (1470565 effective words) took 0.7s, 1975220 effective words/s
Loss after epoch 659: 0.0
INFO - 15:50:34: worker thread finished; awaiting finish of 3 more threads
INFO - 15:50:34: worker t

INFO - 15:50:47: worker thread finished; awaiting finish of 3 more threads
INFO - 15:50:47: worker thread finished; awaiting finish of 2 more threads
INFO - 15:50:47: worker thread finished; awaiting finish of 1 more threads
INFO - 15:50:47: worker thread finished; awaiting finish of 0 more threads
INFO - 15:50:47: EPOCH - 678 : training on 2134975 raw words (1469755 effective words) took 0.8s, 1932224 effective words/s
Loss after epoch 677: 0.0
INFO - 15:50:47: worker thread finished; awaiting finish of 3 more threads
INFO - 15:50:47: worker thread finished; awaiting finish of 2 more threads
INFO - 15:50:47: worker thread finished; awaiting finish of 1 more threads
INFO - 15:50:47: worker thread finished; awaiting finish of 0 more threads
INFO - 15:50:47: EPOCH - 679 : training on 2134975 raw words (1470014 effective words) took 0.8s, 1883374 effective words/s
Loss after epoch 678: 0.0
INFO - 15:50:48: worker thread finished; awaiting finish of 3 more threads
INFO - 15:50:48: worker t

INFO - 15:51:01: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:01: worker thread finished; awaiting finish of 2 more threads
INFO - 15:51:01: worker thread finished; awaiting finish of 1 more threads
INFO - 15:51:01: worker thread finished; awaiting finish of 0 more threads
INFO - 15:51:01: EPOCH - 697 : training on 2134975 raw words (1471515 effective words) took 0.7s, 1993139 effective words/s
Loss after epoch 696: 0.0
INFO - 15:51:02: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:02: worker thread finished; awaiting finish of 2 more threads
INFO - 15:51:02: worker thread finished; awaiting finish of 1 more threads
INFO - 15:51:02: worker thread finished; awaiting finish of 0 more threads
INFO - 15:51:02: EPOCH - 698 : training on 2134975 raw words (1470154 effective words) took 0.7s, 1992174 effective words/s
Loss after epoch 697: 0.0
INFO - 15:51:02: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:02: worker t

INFO - 15:51:15: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:15: worker thread finished; awaiting finish of 2 more threads
INFO - 15:51:15: worker thread finished; awaiting finish of 1 more threads
INFO - 15:51:15: worker thread finished; awaiting finish of 0 more threads
INFO - 15:51:15: EPOCH - 715 : training on 2134975 raw words (1469892 effective words) took 0.8s, 1849019 effective words/s
Loss after epoch 714: 0.0
INFO - 15:51:16: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:16: worker thread finished; awaiting finish of 2 more threads
INFO - 15:51:16: worker thread finished; awaiting finish of 1 more threads
INFO - 15:51:16: worker thread finished; awaiting finish of 0 more threads
INFO - 15:51:16: EPOCH - 716 : training on 2134975 raw words (1470213 effective words) took 0.8s, 1905005 effective words/s
Loss after epoch 715: 0.0
INFO - 15:51:17: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:17: worker t

INFO - 15:51:29: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:29: worker thread finished; awaiting finish of 2 more threads
INFO - 15:51:29: worker thread finished; awaiting finish of 1 more threads
INFO - 15:51:29: worker thread finished; awaiting finish of 0 more threads
INFO - 15:51:29: EPOCH - 734 : training on 2134975 raw words (1470097 effective words) took 0.7s, 1962872 effective words/s
Loss after epoch 733: 0.0
INFO - 15:51:30: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:30: worker thread finished; awaiting finish of 2 more threads
INFO - 15:51:30: worker thread finished; awaiting finish of 1 more threads
INFO - 15:51:30: worker thread finished; awaiting finish of 0 more threads
INFO - 15:51:30: EPOCH - 735 : training on 2134975 raw words (1470395 effective words) took 0.8s, 1935964 effective words/s
Loss after epoch 734: 0.0
INFO - 15:51:31: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:31: worker t

INFO - 15:51:43: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:43: worker thread finished; awaiting finish of 2 more threads
INFO - 15:51:43: worker thread finished; awaiting finish of 1 more threads
INFO - 15:51:43: worker thread finished; awaiting finish of 0 more threads
INFO - 15:51:43: EPOCH - 753 : training on 2134975 raw words (1470099 effective words) took 0.7s, 2011327 effective words/s
Loss after epoch 752: 0.0
INFO - 15:51:44: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:44: worker thread finished; awaiting finish of 2 more threads
INFO - 15:51:44: worker thread finished; awaiting finish of 1 more threads
INFO - 15:51:44: worker thread finished; awaiting finish of 0 more threads
INFO - 15:51:44: EPOCH - 754 : training on 2134975 raw words (1470256 effective words) took 0.7s, 2035554 effective words/s
Loss after epoch 753: 0.0
INFO - 15:51:45: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:45: worker t

INFO - 15:51:57: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:57: worker thread finished; awaiting finish of 2 more threads
INFO - 15:51:57: worker thread finished; awaiting finish of 1 more threads
INFO - 15:51:57: worker thread finished; awaiting finish of 0 more threads
INFO - 15:51:57: EPOCH - 772 : training on 2134975 raw words (1470188 effective words) took 0.9s, 1633689 effective words/s
Loss after epoch 771: 0.0
INFO - 15:51:58: worker thread finished; awaiting finish of 3 more threads
INFO - 15:51:58: worker thread finished; awaiting finish of 2 more threads
INFO - 15:51:58: worker thread finished; awaiting finish of 1 more threads
INFO - 15:51:58: worker thread finished; awaiting finish of 0 more threads
INFO - 15:51:58: EPOCH - 773 : training on 2134975 raw words (1469953 effective words) took 0.9s, 1673988 effective words/s
Loss after epoch 772: 0.0
INFO - 15:51:59: EPOCH 774 - PROGRESS: at 86.14% examples, 1309320 words/s, in_qsize 6, out_qsize 1


INFO - 15:52:14: worker thread finished; awaiting finish of 3 more threads
INFO - 15:52:14: worker thread finished; awaiting finish of 2 more threads
INFO - 15:52:14: worker thread finished; awaiting finish of 1 more threads
INFO - 15:52:14: worker thread finished; awaiting finish of 0 more threads
INFO - 15:52:14: EPOCH - 790 : training on 2134975 raw words (1470244 effective words) took 1.0s, 1487878 effective words/s
Loss after epoch 789: 0.0
INFO - 15:52:15: worker thread finished; awaiting finish of 3 more threads
INFO - 15:52:15: worker thread finished; awaiting finish of 2 more threads
INFO - 15:52:15: worker thread finished; awaiting finish of 1 more threads
INFO - 15:52:15: worker thread finished; awaiting finish of 0 more threads
INFO - 15:52:15: EPOCH - 791 : training on 2134975 raw words (1470258 effective words) took 1.0s, 1511686 effective words/s
Loss after epoch 790: 0.0
INFO - 15:52:16: worker thread finished; awaiting finish of 3 more threads
INFO - 15:52:16: worker t

INFO - 15:52:31: worker thread finished; awaiting finish of 3 more threads
INFO - 15:52:31: worker thread finished; awaiting finish of 2 more threads
INFO - 15:52:31: worker thread finished; awaiting finish of 1 more threads
INFO - 15:52:31: EPOCH 807 - PROGRESS: at 100.00% examples, 1468237 words/s, in_qsize 0, out_qsize 1
INFO - 15:52:31: worker thread finished; awaiting finish of 0 more threads
INFO - 15:52:31: EPOCH - 807 : training on 2134975 raw words (1470988 effective words) took 1.0s, 1467980 effective words/s
Loss after epoch 806: 0.0
INFO - 15:52:32: worker thread finished; awaiting finish of 3 more threads
INFO - 15:52:32: worker thread finished; awaiting finish of 2 more threads
INFO - 15:52:32: worker thread finished; awaiting finish of 1 more threads
INFO - 15:52:32: worker thread finished; awaiting finish of 0 more threads
INFO - 15:52:32: EPOCH - 808 : training on 2134975 raw words (1469985 effective words) took 1.0s, 1478835 effective words/s
Loss after epoch 807: 0.0

INFO - 15:52:49: worker thread finished; awaiting finish of 3 more threads
INFO - 15:52:49: EPOCH 825 - PROGRESS: at 99.10% examples, 1458665 words/s, in_qsize 2, out_qsize 1
INFO - 15:52:49: worker thread finished; awaiting finish of 2 more threads
INFO - 15:52:49: worker thread finished; awaiting finish of 1 more threads
INFO - 15:52:49: worker thread finished; awaiting finish of 0 more threads
INFO - 15:52:49: EPOCH - 825 : training on 2134975 raw words (1470227 effective words) took 1.0s, 1459136 effective words/s
Loss after epoch 824: 0.0
INFO - 15:52:50: worker thread finished; awaiting finish of 3 more threads
INFO - 15:52:50: worker thread finished; awaiting finish of 2 more threads
INFO - 15:52:50: worker thread finished; awaiting finish of 1 more threads
INFO - 15:52:50: worker thread finished; awaiting finish of 0 more threads
INFO - 15:52:50: EPOCH - 826 : training on 2134975 raw words (1469137 effective words) took 1.0s, 1490537 effective words/s
Loss after epoch 825: 0.0


INFO - 15:53:06: worker thread finished; awaiting finish of 3 more threads
INFO - 15:53:06: worker thread finished; awaiting finish of 2 more threads
INFO - 15:53:06: worker thread finished; awaiting finish of 1 more threads
INFO - 15:53:06: worker thread finished; awaiting finish of 0 more threads
INFO - 15:53:06: EPOCH - 843 : training on 2134975 raw words (1469907 effective words) took 1.0s, 1542247 effective words/s
Loss after epoch 842: 0.0
INFO - 15:53:07: worker thread finished; awaiting finish of 3 more threads
INFO - 15:53:07: worker thread finished; awaiting finish of 2 more threads
INFO - 15:53:07: worker thread finished; awaiting finish of 1 more threads
INFO - 15:53:07: worker thread finished; awaiting finish of 0 more threads
INFO - 15:53:07: EPOCH - 844 : training on 2134975 raw words (1469780 effective words) took 1.0s, 1512088 effective words/s
Loss after epoch 843: 0.0
INFO - 15:53:08: worker thread finished; awaiting finish of 3 more threads
INFO - 15:53:08: worker t

INFO - 15:53:24: worker thread finished; awaiting finish of 3 more threads
INFO - 15:53:24: worker thread finished; awaiting finish of 2 more threads
INFO - 15:53:24: worker thread finished; awaiting finish of 1 more threads
INFO - 15:53:24: worker thread finished; awaiting finish of 0 more threads
INFO - 15:53:24: EPOCH - 861 : training on 2134975 raw words (1470854 effective words) took 1.1s, 1345404 effective words/s
Loss after epoch 860: 0.0
INFO - 15:53:25: EPOCH 862 - PROGRESS: at 93.77% examples, 1389893 words/s, in_qsize 7, out_qsize 0
INFO - 15:53:25: worker thread finished; awaiting finish of 3 more threads
INFO - 15:53:25: worker thread finished; awaiting finish of 2 more threads
INFO - 15:53:25: worker thread finished; awaiting finish of 1 more threads
INFO - 15:53:25: worker thread finished; awaiting finish of 0 more threads
INFO - 15:53:25: EPOCH - 862 : training on 2134975 raw words (1469810 effective words) took 1.0s, 1406655 effective words/s
Loss after epoch 861: 0.0


INFO - 15:53:40: worker thread finished; awaiting finish of 3 more threads
INFO - 15:53:40: worker thread finished; awaiting finish of 2 more threads
INFO - 15:53:40: worker thread finished; awaiting finish of 1 more threads
INFO - 15:53:40: worker thread finished; awaiting finish of 0 more threads
INFO - 15:53:40: EPOCH - 879 : training on 2134975 raw words (1470387 effective words) took 0.7s, 1992961 effective words/s
Loss after epoch 878: 0.0
INFO - 15:53:41: worker thread finished; awaiting finish of 3 more threads
INFO - 15:53:41: worker thread finished; awaiting finish of 2 more threads
INFO - 15:53:41: worker thread finished; awaiting finish of 1 more threads
INFO - 15:53:41: worker thread finished; awaiting finish of 0 more threads
INFO - 15:53:41: EPOCH - 880 : training on 2134975 raw words (1470886 effective words) took 0.7s, 1988042 effective words/s
Loss after epoch 879: 0.0
INFO - 15:53:42: worker thread finished; awaiting finish of 3 more threads
INFO - 15:53:42: worker t

INFO - 15:53:55: worker thread finished; awaiting finish of 3 more threads
INFO - 15:53:55: worker thread finished; awaiting finish of 2 more threads
INFO - 15:53:55: worker thread finished; awaiting finish of 1 more threads
INFO - 15:53:55: worker thread finished; awaiting finish of 0 more threads
INFO - 15:53:55: EPOCH - 898 : training on 2134975 raw words (1469645 effective words) took 0.7s, 2080256 effective words/s
Loss after epoch 897: 0.0
INFO - 15:53:56: worker thread finished; awaiting finish of 3 more threads
INFO - 15:53:56: worker thread finished; awaiting finish of 2 more threads
INFO - 15:53:56: worker thread finished; awaiting finish of 1 more threads
INFO - 15:53:56: worker thread finished; awaiting finish of 0 more threads
INFO - 15:53:56: EPOCH - 899 : training on 2134975 raw words (1470051 effective words) took 0.7s, 2030709 effective words/s
Loss after epoch 898: 0.0
INFO - 15:53:56: worker thread finished; awaiting finish of 3 more threads
INFO - 15:53:56: worker t

INFO - 15:54:09: worker thread finished; awaiting finish of 3 more threads
INFO - 15:54:09: worker thread finished; awaiting finish of 2 more threads
INFO - 15:54:09: worker thread finished; awaiting finish of 1 more threads
INFO - 15:54:09: worker thread finished; awaiting finish of 0 more threads
INFO - 15:54:09: EPOCH - 917 : training on 2134975 raw words (1470105 effective words) took 0.7s, 2051564 effective words/s
Loss after epoch 916: 0.0
INFO - 15:54:10: worker thread finished; awaiting finish of 3 more threads
INFO - 15:54:10: worker thread finished; awaiting finish of 2 more threads
INFO - 15:54:10: worker thread finished; awaiting finish of 1 more threads
INFO - 15:54:10: worker thread finished; awaiting finish of 0 more threads
INFO - 15:54:10: EPOCH - 918 : training on 2134975 raw words (1470254 effective words) took 0.7s, 2021348 effective words/s
Loss after epoch 917: 0.0
INFO - 15:54:10: worker thread finished; awaiting finish of 3 more threads
INFO - 15:54:10: worker t

INFO - 15:54:23: worker thread finished; awaiting finish of 3 more threads
INFO - 15:54:23: worker thread finished; awaiting finish of 2 more threads
INFO - 15:54:23: worker thread finished; awaiting finish of 1 more threads
INFO - 15:54:23: worker thread finished; awaiting finish of 0 more threads
INFO - 15:54:23: EPOCH - 936 : training on 2134975 raw words (1469768 effective words) took 0.7s, 2002880 effective words/s
Loss after epoch 935: 0.0
INFO - 15:54:24: worker thread finished; awaiting finish of 3 more threads
INFO - 15:54:24: worker thread finished; awaiting finish of 2 more threads
INFO - 15:54:24: worker thread finished; awaiting finish of 1 more threads
INFO - 15:54:24: worker thread finished; awaiting finish of 0 more threads
INFO - 15:54:24: EPOCH - 937 : training on 2134975 raw words (1470472 effective words) took 0.7s, 2026626 effective words/s
Loss after epoch 936: 0.0
INFO - 15:54:24: worker thread finished; awaiting finish of 3 more threads
INFO - 15:54:24: worker t

INFO - 15:54:37: worker thread finished; awaiting finish of 3 more threads
INFO - 15:54:37: worker thread finished; awaiting finish of 2 more threads
INFO - 15:54:37: worker thread finished; awaiting finish of 1 more threads
INFO - 15:54:37: worker thread finished; awaiting finish of 0 more threads
INFO - 15:54:37: EPOCH - 955 : training on 2134975 raw words (1470165 effective words) took 0.7s, 1962888 effective words/s
Loss after epoch 954: 0.0
INFO - 15:54:38: worker thread finished; awaiting finish of 3 more threads
INFO - 15:54:38: worker thread finished; awaiting finish of 2 more threads
INFO - 15:54:38: worker thread finished; awaiting finish of 1 more threads
INFO - 15:54:38: worker thread finished; awaiting finish of 0 more threads
INFO - 15:54:38: EPOCH - 956 : training on 2134975 raw words (1469785 effective words) took 0.8s, 1934936 effective words/s
Loss after epoch 955: 0.0
INFO - 15:54:39: worker thread finished; awaiting finish of 3 more threads
INFO - 15:54:39: worker t

INFO - 15:54:51: EPOCH - 973 : training on 2134975 raw words (1469696 effective words) took 0.8s, 1937877 effective words/s
Loss after epoch 972: 0.0
INFO - 15:54:51: worker thread finished; awaiting finish of 3 more threads
INFO - 15:54:51: worker thread finished; awaiting finish of 2 more threads
INFO - 15:54:51: worker thread finished; awaiting finish of 1 more threads
INFO - 15:54:51: worker thread finished; awaiting finish of 0 more threads
INFO - 15:54:51: EPOCH - 974 : training on 2134975 raw words (1470739 effective words) took 0.8s, 1776455 effective words/s
Loss after epoch 973: 0.0
INFO - 15:54:52: EPOCH 975 - PROGRESS: at 95.32% examples, 1411508 words/s, in_qsize 8, out_qsize 0
INFO - 15:54:52: worker thread finished; awaiting finish of 3 more threads
INFO - 15:54:52: worker thread finished; awaiting finish of 2 more threads
INFO - 15:54:52: worker thread finished; awaiting finish of 1 more threads
INFO - 15:54:52: worker thread finished; awaiting finish of 0 more threads


INFO - 15:55:07: worker thread finished; awaiting finish of 3 more threads
INFO - 15:55:07: worker thread finished; awaiting finish of 2 more threads
INFO - 15:55:07: worker thread finished; awaiting finish of 1 more threads
INFO - 15:55:07: worker thread finished; awaiting finish of 0 more threads
INFO - 15:55:07: EPOCH - 992 : training on 2134975 raw words (1470590 effective words) took 0.8s, 1938297 effective words/s
Loss after epoch 991: 0.0
INFO - 15:55:08: worker thread finished; awaiting finish of 3 more threads
INFO - 15:55:08: worker thread finished; awaiting finish of 2 more threads
INFO - 15:55:08: worker thread finished; awaiting finish of 1 more threads
INFO - 15:55:08: worker thread finished; awaiting finish of 0 more threads
INFO - 15:55:08: EPOCH - 993 : training on 2134975 raw words (1470392 effective words) took 0.9s, 1724190 effective words/s
Loss after epoch 992: 0.0
INFO - 15:55:09: worker thread finished; awaiting finish of 3 more threads
INFO - 15:55:09: worker t

In [24]:
# Similarity 

from w2v_train import W2VLossLogger
from gensim.models import Word2Vec

model = Word2Vec.load("gutenberg_w2v.100d.model")

for word in ['bible', 'book', 'bank', 'water']:
    sims = model.wv.most_similar(positive=word,topn=5)
    print("5 most similar words to {} are {}".format(word,sims))

INFO - 15:55:14: loading Word2Vec object from gutenberg_w2v.100d.model
INFO - 15:55:14: loading wv recursively from gutenberg_w2v.100d.model.wv.* with mmap=None
INFO - 15:55:14: setting ignored attribute vectors_norm to None
INFO - 15:55:14: loading vocabulary recursively from gutenberg_w2v.100d.model.vocabulary.* with mmap=None
INFO - 15:55:14: loading trainables recursively from gutenberg_w2v.100d.model.trainables.* with mmap=None
INFO - 15:55:14: setting ignored attribute cum_table to None
INFO - 15:55:14: loaded gutenberg_w2v.100d.model
INFO - 15:55:14: precomputing L2-norms of word weight vectors


5 most similar words to bible are [('official', 0.3598814010620117), ('identity', 0.3580421805381775), ('respects', 0.3252386152744293), ('poz', 0.32124990224838257), ('circumcision', 0.3197419047355652)]
5 most similar words to book are [('written', 0.5071839094161987), ('letter', 0.5008087158203125), ('chronicles', 0.4943390488624573), ('temple', 0.4883684813976288), ('papers', 0.4611198306083679)]
5 most similar words to bank are [('wall', 0.5670338869094849), ('hill', 0.5109919309616089), ('pool', 0.48958033323287964), ('table', 0.48400554060935974), ('floor', 0.478868305683136)]
5 most similar words to water are [('waters', 0.6183380484580994), ('wine', 0.5704622268676758), ('blood', 0.5105494856834412), ('river', 0.5004690885543823), ('milk', 0.49232739210128784)]


In [30]:
# word-analogy

from numpy import dot,asarray
from numpy.linalg import norm

print(model.wv.most_similar(positive = ['king','girls'],negative = ['queen'],topn=1))
print(model.wv.most_similar(positive = ['taller','good'],negative = ['tall'],topn=1))
print(model.wv.most_similar(positive = ['france','london'],negative = ['paris'],topn=1))


A = asarray(model.wv['kingdom'])
B = asarray(model.wv['king'] - model.wv['queen'] + model.wv['girls'])
cosine = dot(A,B)/(norm(A)*norm(B))
print(cosine)

[('people', 0.4629022479057312)]
[('better', 0.40203723311424255)]
[('england', 0.44453078508377075)]
0.2419478


In [78]:
# Same analysis for google's w2v model

from gensim.models import KeyedVectors

NUM_W2V_TO_LOAD = 1000000

G_model = KeyedVectors.load_word2vec_format('./GoogleNews-vectors-negative300.bin.gz', binary=True,
limit=NUM_W2V_TO_LOAD)

INFO - 14:20:54: loading projection weights from ./GoogleNews-vectors-negative300.bin.gz
INFO - 14:21:32: loaded (3000000, 300) matrix from ./GoogleNews-vectors-negative300.bin.gz


In [43]:
for word in ['bible', 'book', 'bank', 'water']:
    sims = G_model.wv.most_similar(positive=word,topn=5)
    print("5 most similar words to {} are {}".format(word,sims))

  sims = model.wv.most_similar(positive=word,topn=5)
INFO - 13:27:07: precomputing L2-norms of word weight vectors


5 most similar words to bible are [('Bible', 0.7367781400680542), ('bibles', 0.6052597761154175), ('Holy_Bible', 0.5989601612091064), ('scriptures', 0.574568510055542), ('scripture', 0.5697901248931885)]
5 most similar words to book are [('tome', 0.7485830783843994), ('books', 0.7379178404808044), ('memoir', 0.7302927374839783), ('paperback_edition', 0.6868364810943604), ('autobiography', 0.6741527915000916)]
5 most similar words to bank are [('banks', 0.7440759539604187), ('banking', 0.690161406993866), ('Bank', 0.6698698997497559), ('lender', 0.6342284679412842), ('banker', 0.6092953681945801)]
5 most similar words to water are [('potable_water', 0.6799106597900391), ('Water', 0.6706870794296265), ('sewage', 0.6619377136230469), ('groundwater', 0.6588345766067505), ('Floridan_aquifer', 0.6422533988952637)]


In [44]:
print(G_model.wv.most_similar(positive = ['king','girls'],negative = ['queen'],topn=1))
print(G_model.wv.most_similar(positive = ['taller','good'],negative = ['tall'],topn=1))
print(G_model.wv.most_similar(positive = ['france','london'],negative = ['paris'],topn=1))

  boys = [model.wv['king'] - model.wv['queen'] + model.wv['girls']]
  better = [model.wv['taller'] - model.wv['tall'] + model.wv['good']]
  england = [model.wv['france'] - model.wv['paris'] + model.wv['london']]
  print(model.wv.most_similar(positive=boys,topn=1))


[('boys', 0.7302044630050659)]
[('better', 0.7094688415527344)]


  print(model.wv.most_similar(positive=better,topn=1))
  print(model.wv.most_similar(positive=england,topn=1))


[('london', 0.7541539072990417)]


In [31]:
# Step 13 

# .tsv files for Embedding projector

with open('./data/embeddings.tsv','w') as file:
    with open('./data/metadata.tsv','w') as f:
        for word in model.wv.vocab.keys():
            for attr in model.wv[word]:
                file.write('{}\t'.format(attr))
            file.write('\n')
            f.write('{}\n'.format(word))