In [1]:
import numpy as np
import nltk
import matplotlib.pyplot as plt
import re
import sys
import contractions

In [2]:
# Step 1 

!python fetch_gutenberg.py > data/gutenberg.txt

[nltk_data] Downloading package gutenberg to
[nltk_data]     /home/tilemahos/nltk_data...
[nltk_data]   Unzipping corpora/gutenberg.zip.


In [5]:
# Step 2

# Read the gutenberg.txt file word by word
# and create a disctionary that holds as keys the tokens(words) and as values the number of appearance
Dict = {}
with open('./data/gutenberg.txt') as file:
    # reading each line   
    for line in file:
        # reading each word       
        for word in line.split():
            if word not in Dict: # if word does not exist in dict insert it
                Dict[word] = 1
            else: # if word already exists in dict increase its frequency
                Dict[word] = Dict[word] + 1

# Filter rare tokens
Dict = {key: val for key, val in Dict.items() if val >= 5}
        
# Write the Dict in an output file
with open("./vocab/vocab.txt",'w') as f:
    for word in Dict.keys():
        f.write("{}\t{}\n".format(word,Dict[word]))

In [18]:
# Step 3

# Create chars.syms containing all ascii characters indexed
with open("./vocab/chars.syms",'w') as f:
    f.write("<eps>\t0\n")
    for i in range(97,123):
        f.write("{}\t{}\n".format(chr(i),i-96))
 
 # Create words.syms containing all words in corpus indexed
with open("./vocab/words.syms",'w') as f:
    i = 0
    f.write("<eps>\t0\n")
    i = i+1
    for word in Dict.keys():
        f.write("{}\t{}\n".format(word,i))
        i = i+1

In [25]:
# Step 4

# Levenhstein .fst

with open("./fsts/L.fst",'w') as f:
    for i in range(97,98):
        f.write("0\t0\t{}\t{}\t0\n".format(chr(i),chr(i))) # chr -> chr
        f.write("0\t0\t{}\t<eps>\t1\n".format(chr(i))) # chr -> <eps>
        f.write("0\t0\t<eps>\t{}\t1\n".format(chr(i))) # <eps> -> chr
        for j in range(97,123):
            if j != i:
                f.write("0\t0\t{}\t{}\t1\n".format(chr(i),chr(j))) # chr -> chr
    f.write("0\t0\n") # end state
                
!fstcompile -isymbols=./vocab/chars.syms -osymbols=./vocab/chars.syms ./fsts/L.fst ./fsts/L.binfst
!fstdraw -isymbols=./vocab/chars.syms -osymbols=./vocab/chars.syms -portrait ./fsts/L.binfst | dot -Tpng >./fsts/L.png
!display ./fsts/L.png

In [20]:
# Step 5

# Dictionary acceptor .fst

with open("./fsts/V.fst",'w') as f:
    j = 0
    for word in ['emma','by','jane']: # Dict.keys():
        i = 0
        for letter in word:
            if i == 0:
                f.write("{}\t{}\t{}\t{}\t0\n".format(i,j+1,letter,word)) # word[0] -> word
                j = j+1
            else:    
                f.write("{}\t{}\t{}\t<eps>\t0\n".format(j,j+1,letter)) # word[i>0] -> <eps>
                j = j+1
            i = i+1
        f.write("{}\t0\n".format(j)) # end state

In [24]:
!fstcompile -isymbols=./vocab/chars.syms -osymbols=./vocab/words.syms ./fsts/V.fst ./fsts/V.binfst
!fstrmepsilon ./fsts/V.binfst | fstdeterminize | fstminimize >./fsts/V_opt.binfst

!fstdraw -isymbols=./vocab/chars.syms -osymbols=./vocab/words.syms -portrait ./fsts/V.binfst | dot -Tpng >./fsts/V.png
!display ./fsts/V.png

!fstdraw -isymbols=./vocab/chars.syms -osymbols=./vocab/words.syms -portrait ./fsts/V_opt.binfst | dot -Tpng >./fsts/V_opt.png
!display ./fsts/V_opt.png

In [8]:
# Step 6

# Compose L (Levensthein) with V (Dictionary acceptor) to build the naive spell checker S

!fstarcsort --sort_type=olabel ./fsts/L.binfst ./fsts/L.binfst
!fstarcsort --sort_type=ilabel ./fsts/V_opt.binfst ./fsts/V_opt.binfst
!fstcompose ./fsts/L.binfst ./fsts/V_opt.binfst ./fsts/S.binfst

!./predict.sh ./fsts/S.binfst cwt 


cut

In [9]:
!./predict.sh ./fsts/S.binfst cit 

city

In [10]:
!./predict.sh ./fsts/S.binfst antheaterry 

entreaty

In [11]:
# Step 7 

# Correcting the first 20 words of spell_test.txt with S

import subprocess

with open('./data/spell_test.txt') as file:
    # reading each line 
    j = 0
    for line in file:
        j = j+1
        if j == 2:
            break
        i = 0
        # reading each word       
        for word in line.split():
            i = i+1
            if i == 1:
                print("True \n{}".format(word))
            elif i == 2:
                print("False \n{}".format(word))
                print("Corrected ")
                subprocess.call(['bash','predict.sh', './fsts/S.binfst' , word])
                print('\n')
            else:
                break

True 
contented:
False 
contenpted
Corrected 
contented



**Part 1**

In [12]:
# Step 8

!./word_edits.sh abandonned abandoned

n	<eps>


In [13]:
# Create a file with the edits in wiki.txt

'''
with open('./data/wiki.txt') as file:
    # reading each line 
    for line in file:
        # reading words       
        words = line.split()
        subprocess.call(['bash','word_edits_savetofile.sh', words[0] , words[1]])
            
'''

In [14]:
# Create Dict with frequency(value) of each edit(key)

Edits = {}
with open('./data/edits.txt') as file:
    # reading each line   
    for line in file:
        # reading each edit      
        edit =  tuple(line.split())
        if edit not in Edits:
            Edits[edit] = 1
        else:
            Edits[edit] = Edits[edit] + 1

print(len(Edits))
print(24 + 24 + 24*23)

342
600


In [15]:
from math import log10

# E .fst (edit weight == -log10(edit freq))

total = sum(Edits.values())

with open("./fsts/E.fst",'w') as f:
    for i in range(97,123):
        f.write("0\t0\t{}\t{}\t0\n".format(chr(i),chr(i))) # chr -> chr
        
        if (chr(i), '<eps>') not in Edits:
            f.write("0\t0\t{}\t<eps>\t10000\n".format(chr(i))) # chr -> <eps>
        else:
            f.write("0\t0\t{}\t<eps>\t{}\n".format(chr(i),-log10(Edits[(chr(i),'<eps>')]/total))) # chr -> <eps>
            
        if ('<eps>', chr(i)) not in Edits:
            f.write("0\t0\t<eps>\t{}\t10000\n".format(chr(i))) # <eps> -> chr
        else:
            f.write("0\t0\t<eps>\t{}\t{}\n".format(chr(i),-log10(Edits[('<eps>', chr(i))]/total))) # <eps> -> chr
        
        for j in range(97,123):
            if j != i:
                if (chr(i), chr(j)) not in Edits:
                    f.write("0\t0\t{}\t{}\t10000\n".format(chr(i),chr(j))) # chr -> chr
                else:
                    f.write("0\t0\t{}\t{}\t{}\n".format(chr(i),chr(j),-log10(Edits[(chr(i), chr(j))]/total))) # chr -> chr
    
    f.write("0\t0\n") # end state
                
!fstcompile -isymbols=./vocab/chars.syms -osymbols=./vocab/chars.syms ./fsts/E.fst ./fsts/E.binfst
# !fstdraw -isymbols=./vocab/chars.syms -osymbols=./vocab/chars.syms -portrait ./fsts/L.binfst | dot -Tpng >./fsts/L.png
# !display ./fsts/L.png

In [16]:
# Compose E with V (Dictionary acceptor) to build the spell checker EV

!fstarcsort --sort_type=olabel ./fsts/E.binfst ./fsts/E.binfst
!fstarcsort --sort_type=ilabel ./fsts/V_opt.binfst ./fsts/V_opt.binfst
!fstcompose ./fsts/E.binfst ./fsts/V_opt.binfst ./fsts/EV.binfst

!./predict.sh ./fsts/EV.binfst cwt 

wit

In [17]:
!./predict.sh ./fsts/EV.binfst cit 

clit

In [18]:
!./predict.sh ./fsts/EV.binfst antheaterry 

theatre

In [19]:

# Correcting the first 20 words of spell_test.txt with EV

import subprocess

with open('./data/spell_test.txt') as file:
    # reading each line 
    j = 0
    for line in file:
        j = j+1
        if j == 2:
            break
        i = 0
        # reading each word       
        for word in line.split():
            i = i+1
            if i == 1:
                print("True \n{}".format(word))
            elif i == 2:
                print("False \n{}".format(word))
                print("Corrected ")
                subprocess.call(['bash','predict.sh', './fsts/EV.binfst' , word])
                print('\n')
            else:
                break

True 
contented:
False 
contenpted
Corrected 
contented



In [22]:
# Step 9

total_words = sum(Dict.values())

with open("./fsts/W.fst",'w') as f:
    for word in Dict.keys():
        f.write("0\t0\t{}\t{}\t{}\n".format(word,word,-log10(Dict[word]/total_words))) # word -> word
    f.write("0\t0\n") # end state            
    
!fstcompile -isymbols=./vocab/words.syms -osymbols=./vocab/words.syms ./fsts/W.fst ./fsts/W.binfst

In [23]:
# Compose L (Levensthein) with V (Dictionary acceptor) and W (word frequency acceptor)to build the spell checker LVW

!fstarcsort --sort_type=olabel ./fsts/S.binfst ./fsts/S.binfst
!fstarcsort --sort_type=ilabel ./fsts/W.binfst ./fsts/W.binfst
!fstcompose ./fsts/S.binfst ./fsts/W.binfst ./fsts/LVW.binfst

!./predict.sh ./fsts/LVW.binfst cwt 

it

In [24]:
!./predict.sh ./fsts/LVW.binfst cit 

it

In [25]:
# Compose E with V (Dictionary acceptor) and W (word frequency acceptor)to build the spell checker EVW

!fstarcsort --sort_type=olabel ./fsts/EV.binfst ./fsts/EV.binfst
!fstarcsort --sort_type=ilabel ./fsts/W.binfst ./fsts/W.binfst
!fstcompose ./fsts/EV.binfst ./fsts/W.binfst ./fsts/EVW.binfst

!./predict.sh ./fsts/EVW.binfst cwt 

with

In [26]:
!./predict.sh ./fsts/EVW.binfst cit 

it

In [27]:
!fstarcsort --sort_type=olabel ./fsts/V_opt.binfst ./fsts/V_opt.binfst
!fstarcsort --sort_type=ilabel ./fsts/W.binfst ./fsts/W.binfst
!fstcompose ./fsts/V_opt.binfst ./fsts/W.binfst ./fsts/VW.binfst

# !fstdraw -isymbols=./vocab/chars.syms -osymbols=./vocab/words.syms -portrait ./fsts/VW.binfst | dot -Tpng >./fsts/VW.png 
# !display ./fsts/VW.png

# !fstdraw -isymbols=./vocab/chars.syms -osymbols=./vocab/words.syms -portrait ./fsts/V_opt.binfst | dot -Tpng >./fsts/V_opt.png 
# !display ./fsts/V_opt.png

In [36]:
# Step 10

!python run_evaluation.py fsts/S.binfst

contenpted -> contented: contented                                              
contende -> contend: contented                                                  
contended -> contended: contented                                               
contentid -> contented: contented                                               
begining -> beginning: beginning                                                
problam -> problem: problem                                                     
proble -> problem: problem                                                      
promblem -> problem: problem                                                    
proplen -> problem: problem                                                     
dirven -> dire: driven                                                          
exstacy -> ecstasy: ecstasy                                                     
ecstacy -> ecstasy: ecstasy                                                     
guic -> guil: juice         

scarcly -> scarcely: scarcely                                                   
scarecly -> scarcely: scarcely                                                  
scarely -> scarcely: scarcely                                                   
scarsely -> scarcely: scarcely                                                  
questionaire -> questioning: questionnaire                                      
experance -> experience: experience                                             
experiance -> experience: experience                                            
possable -> possible: possible                                                  
reafreshment -> refreshment: refreshment                                        
refreshmant -> refreshment: refreshment                                         
refresment -> refreshment: refreshment                                          
refressmunt -> refreshment: refreshment                                         
embaras -> embark: embarrass

In [37]:
!python run_evaluation.py fsts/LVW.binfst

contenpted -> contented: contented                                              
contende -> contend: contented                                                  
contended -> contended: contented                                               
contentid -> contented: contented                                               
begining -> beginning: beginning                                                
problam -> problem: problem                                                     
proble -> people: problem                                                       
promblem -> problem: problem                                                    
proplen -> people: problem                                                      
dirven -> given: driven                                                         
exstacy -> stay: ecstasy                                                        
ecstacy -> ecstasy: ecstasy                                                     
guic -> in: juice           

scarcly -> scarcely: scarcely                                                   
scarecly -> scarcely: scarcely                                                  
scarely -> scarcely: scarcely                                                   
scarsely -> scarcely: scarcely                                                  
questionaire -> question: questionnaire                                         
experance -> experience: experience                                             
experiance -> experience: experience                                            
possable -> possible: possible                                                  
reafreshment -> refreshment: refreshment                                        
refreshmant -> refreshment: refreshment                                         
refresment -> refreshment: refreshment                                          
refressmunt -> refreshment: refreshment                                         
embaras -> ears: embarrass  

In [1]:
!python run_evaluation.py fsts/EV.binfst

contenpted -> contented: contented                                              
contende -> contend: contented                                                  
contended -> contended: contented                                               
contentid -> contented: contented                                               
begining -> beginning: beginning                                                
problam -> problem: problem                                                     
proble -> problem: problem                                                      
promblem -> problem: problem                                                    
proplen -> people: problem                                                      
dirven -> driven: driven                                                        
exstacy -> exactly: ecstasy                                                     
ecstacy -> ecstasy: ecstasy                                                     
guic -> guil: juice         

scarcly -> scarcely: scarcely                                                   
scarecly -> scarcely: scarcely                                                  
scarely -> scarcely: scarcely                                                   
scarsely -> scarcely: scarcely                                                  
questionaire -> question: questionnaire                                         
experance -> experience: experience                                             
experiance -> experience: experience                                            
possable -> possible: possible                                                  
reafreshment -> refreshment: refreshment                                        
refreshmant -> refreshment: refreshment                                         
refresment -> refreshment: refreshment                                          
refressmunt -> refreshment: refreshment                                         
embaras -> embark: embarrass

In [39]:
!python run_evaluation.py fsts/EVW.binfst

contenpted -> contented: contented                                              
contende -> contend: contented                                                  
contended -> contended: contented                                               
contentid -> contented: contented                                               
begining -> beginning: beginning                                                
problam -> problem: problem                                                     
proble -> problem: problem                                                      
promblem -> problem: problem                                                    
proplen -> people: problem                                                      
dirven -> driven: driven                                                        
exstacy -> exactly: ecstasy                                                     
ecstacy -> ecstasy: ecstasy                                                     
guic -> i: juice            

scarcly -> scarcely: scarcely                                                   
scarecly -> scarcely: scarcely                                                  
scarely -> scarcely: scarcely                                                   
scarsely -> scarcely: scarcely                                                  
questionaire -> question: questionnaire                                         
experance -> experience: experience                                             
experiance -> experience: experience                                            
possable -> possible: possible                                                  
reafreshment -> refreshment: refreshment                                        
refreshmant -> refreshment: refreshment                                         
refresment -> refreshment: refreshment                                          
refressmunt -> refreshment: refreshment                                         
embaras -> embraces: embarra

 **Part 2**

In [6]:
# Suppress deprecation warnings

import warnings

def fxn():
    warnings.warn("deprecated", DeprecationWarning)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fxn()

In [7]:
# Step 12

Words = []
with open('./data/gutenberg.txt') as file:
    # reading each line   
    for line in file:
        # reading each word       
        for word in line.split():
            Words.append(word)
            
print(Words[0:3])

['emma', 'by', 'jane']


In [8]:
from gensim.test.utils import common_texts

print(common_texts[0:2])

[['human', 'interface', 'computer'], ['survey', 'user', 'computer', 'system', 'response', 'time']]


In [1]:
# Train a w2v model on gutenberg 

!python w2v_train.py

INFO - 20:50:06: collecting all words and their counts
INFO - 20:50:06: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - 20:50:06: PROGRESS: at sentence #10000, processed 113514 words, keeping 6066 word types
INFO - 20:50:06: PROGRESS: at sentence #20000, processed 227309 words, keeping 8494 word types
INFO - 20:50:06: PROGRESS: at sentence #30000, processed 324666 words, keeping 9843 word types
INFO - 20:50:06: PROGRESS: at sentence #40000, processed 427437 words, keeping 11782 word types
INFO - 20:50:06: PROGRESS: at sentence #50000, processed 537200 words, keeping 13113 word types
INFO - 20:50:06: PROGRESS: at sentence #60000, processed 647732 words, keeping 14316 word types
INFO - 20:50:06: PROGRESS: at sentence #70000, processed 751683 words, keeping 16047 word types
INFO - 20:50:06: PROGRESS: at sentence #80000, processed 847972 words, keeping 16841 word types
INFO - 20:50:06: PROGRESS: at sentence #90000, processed 957402 words, keeping 17336 word types
I

INFO - 20:50:14: worker thread finished; awaiting finish of 3 more threads
INFO - 20:50:14: worker thread finished; awaiting finish of 2 more threads
INFO - 20:50:14: worker thread finished; awaiting finish of 1 more threads
INFO - 20:50:14: worker thread finished; awaiting finish of 0 more threads
INFO - 20:50:14: EPOCH - 2 : training on 2134975 raw words (1470315 effective words) took 0.8s, 1902745 effective words/s
Loss after epoch 1: 0.0
INFO - 20:50:15: worker thread finished; awaiting finish of 3 more threads
INFO - 20:50:15: worker thread finished; awaiting finish of 2 more threads
INFO - 20:50:15: worker thread finished; awaiting finish of 1 more threads
INFO - 20:50:15: worker thread finished; awaiting finish of 0 more threads
INFO - 20:50:15: EPOCH - 3 : training on 2134975 raw words (1470463 effective words) took 0.7s, 1993035 effective words/s
Loss after epoch 2: 0.0
INFO - 20:50:15: worker thread finished; awaiting finish of 3 more threads
INFO - 20:50:15: worker thread fi

INFO - 20:50:29: worker thread finished; awaiting finish of 3 more threads
INFO - 20:50:29: worker thread finished; awaiting finish of 2 more threads
INFO - 20:50:29: worker thread finished; awaiting finish of 1 more threads
INFO - 20:50:29: worker thread finished; awaiting finish of 0 more threads
INFO - 20:50:29: EPOCH - 21 : training on 2134975 raw words (1470282 effective words) took 0.8s, 1859584 effective words/s
Loss after epoch 20: 0.0
INFO - 20:50:30: worker thread finished; awaiting finish of 3 more threads
INFO - 20:50:30: worker thread finished; awaiting finish of 2 more threads
INFO - 20:50:30: worker thread finished; awaiting finish of 1 more threads
INFO - 20:50:30: worker thread finished; awaiting finish of 0 more threads
INFO - 20:50:30: EPOCH - 22 : training on 2134975 raw words (1470233 effective words) took 0.8s, 1926039 effective words/s
Loss after epoch 21: 0.0
INFO - 20:50:31: worker thread finished; awaiting finish of 3 more threads
INFO - 20:50:31: worker threa

INFO - 20:50:44: worker thread finished; awaiting finish of 3 more threads
INFO - 20:50:44: worker thread finished; awaiting finish of 2 more threads
INFO - 20:50:44: worker thread finished; awaiting finish of 1 more threads
INFO - 20:50:44: worker thread finished; awaiting finish of 0 more threads
INFO - 20:50:44: EPOCH - 40 : training on 2134975 raw words (1470565 effective words) took 0.8s, 1810170 effective words/s
Loss after epoch 39: 0.0
INFO - 20:50:45: worker thread finished; awaiting finish of 3 more threads
INFO - 20:50:45: worker thread finished; awaiting finish of 2 more threads
INFO - 20:50:45: worker thread finished; awaiting finish of 1 more threads
INFO - 20:50:45: worker thread finished; awaiting finish of 0 more threads
INFO - 20:50:45: EPOCH - 41 : training on 2134975 raw words (1470162 effective words) took 0.7s, 2053753 effective words/s
Loss after epoch 40: 0.0
INFO - 20:50:46: worker thread finished; awaiting finish of 3 more threads
INFO - 20:50:46: worker threa

INFO - 20:50:58: worker thread finished; awaiting finish of 0 more threads
INFO - 20:50:58: EPOCH - 58 : training on 2134975 raw words (1470610 effective words) took 0.7s, 2078265 effective words/s
Loss after epoch 57: 0.0
INFO - 20:50:59: worker thread finished; awaiting finish of 3 more threads
INFO - 20:50:59: worker thread finished; awaiting finish of 2 more threads
INFO - 20:50:59: worker thread finished; awaiting finish of 1 more threads
INFO - 20:50:59: worker thread finished; awaiting finish of 0 more threads
INFO - 20:50:59: EPOCH - 59 : training on 2134975 raw words (1469637 effective words) took 0.8s, 1776269 effective words/s
Loss after epoch 58: 0.0
INFO - 20:50:59: worker thread finished; awaiting finish of 3 more threads
INFO - 20:50:59: worker thread finished; awaiting finish of 2 more threads
INFO - 20:50:59: worker thread finished; awaiting finish of 1 more threads
INFO - 20:50:59: worker thread finished; awaiting finish of 0 more threads
INFO - 20:50:59: EPOCH - 60 :

INFO - 20:51:12: worker thread finished; awaiting finish of 3 more threads
INFO - 20:51:12: worker thread finished; awaiting finish of 2 more threads
INFO - 20:51:12: worker thread finished; awaiting finish of 1 more threads
INFO - 20:51:12: worker thread finished; awaiting finish of 0 more threads
INFO - 20:51:12: EPOCH - 77 : training on 2134975 raw words (1470382 effective words) took 0.8s, 1850192 effective words/s
Loss after epoch 76: 0.0
INFO - 20:51:13: worker thread finished; awaiting finish of 3 more threads
INFO - 20:51:13: worker thread finished; awaiting finish of 2 more threads
INFO - 20:51:13: worker thread finished; awaiting finish of 1 more threads
INFO - 20:51:13: worker thread finished; awaiting finish of 0 more threads
INFO - 20:51:13: EPOCH - 78 : training on 2134975 raw words (1469779 effective words) took 0.8s, 1815218 effective words/s
Loss after epoch 77: 0.0
INFO - 20:51:14: worker thread finished; awaiting finish of 3 more threads
INFO - 20:51:14: worker threa

INFO - 20:51:27: worker thread finished; awaiting finish of 3 more threads
INFO - 20:51:27: worker thread finished; awaiting finish of 2 more threads
INFO - 20:51:27: worker thread finished; awaiting finish of 1 more threads
INFO - 20:51:27: worker thread finished; awaiting finish of 0 more threads
INFO - 20:51:27: EPOCH - 96 : training on 2134975 raw words (1469442 effective words) took 0.7s, 2052622 effective words/s
Loss after epoch 95: 0.0
INFO - 20:51:28: worker thread finished; awaiting finish of 3 more threads
INFO - 20:51:28: worker thread finished; awaiting finish of 2 more threads
INFO - 20:51:28: worker thread finished; awaiting finish of 1 more threads
INFO - 20:51:28: worker thread finished; awaiting finish of 0 more threads
INFO - 20:51:28: EPOCH - 97 : training on 2134975 raw words (1469853 effective words) took 0.8s, 1938525 effective words/s
Loss after epoch 96: 0.0
INFO - 20:51:29: worker thread finished; awaiting finish of 3 more threads
INFO - 20:51:29: worker threa

INFO - 20:51:42: worker thread finished; awaiting finish of 3 more threads
INFO - 20:51:42: worker thread finished; awaiting finish of 2 more threads
INFO - 20:51:42: worker thread finished; awaiting finish of 1 more threads
INFO - 20:51:42: worker thread finished; awaiting finish of 0 more threads
INFO - 20:51:42: EPOCH - 115 : training on 2134975 raw words (1470646 effective words) took 0.8s, 1935656 effective words/s
Loss after epoch 114: 0.0
INFO - 20:51:43: worker thread finished; awaiting finish of 3 more threads
INFO - 20:51:43: worker thread finished; awaiting finish of 2 more threads
INFO - 20:51:43: worker thread finished; awaiting finish of 1 more threads
INFO - 20:51:43: worker thread finished; awaiting finish of 0 more threads
INFO - 20:51:43: EPOCH - 116 : training on 2134975 raw words (1469860 effective words) took 0.8s, 1841048 effective words/s
Loss after epoch 115: 0.0
INFO - 20:51:44: worker thread finished; awaiting finish of 3 more threads
INFO - 20:51:44: worker t

INFO - 20:51:57: worker thread finished; awaiting finish of 3 more threads
INFO - 20:51:57: worker thread finished; awaiting finish of 2 more threads
INFO - 20:51:57: worker thread finished; awaiting finish of 1 more threads
INFO - 20:51:57: worker thread finished; awaiting finish of 0 more threads
INFO - 20:51:57: EPOCH - 134 : training on 2134975 raw words (1470630 effective words) took 0.7s, 2020537 effective words/s
Loss after epoch 133: 0.0
INFO - 20:51:58: worker thread finished; awaiting finish of 3 more threads
INFO - 20:51:58: worker thread finished; awaiting finish of 2 more threads
INFO - 20:51:58: worker thread finished; awaiting finish of 1 more threads
INFO - 20:51:58: worker thread finished; awaiting finish of 0 more threads
INFO - 20:51:58: EPOCH - 135 : training on 2134975 raw words (1469977 effective words) took 0.8s, 1917764 effective words/s
Loss after epoch 134: 0.0
INFO - 20:51:59: worker thread finished; awaiting finish of 3 more threads
INFO - 20:51:59: worker t

INFO - 20:52:12: worker thread finished; awaiting finish of 3 more threads
INFO - 20:52:12: worker thread finished; awaiting finish of 2 more threads
INFO - 20:52:12: worker thread finished; awaiting finish of 1 more threads
INFO - 20:52:12: worker thread finished; awaiting finish of 0 more threads
INFO - 20:52:12: EPOCH - 153 : training on 2134975 raw words (1469773 effective words) took 0.8s, 1860260 effective words/s
Loss after epoch 152: 0.0
INFO - 20:52:13: worker thread finished; awaiting finish of 3 more threads
INFO - 20:52:13: worker thread finished; awaiting finish of 2 more threads
INFO - 20:52:13: worker thread finished; awaiting finish of 1 more threads
INFO - 20:52:13: worker thread finished; awaiting finish of 0 more threads
INFO - 20:52:13: EPOCH - 154 : training on 2134975 raw words (1470326 effective words) took 0.8s, 1792196 effective words/s
Loss after epoch 153: 0.0
INFO - 20:52:14: worker thread finished; awaiting finish of 3 more threads
INFO - 20:52:14: worker t

INFO - 20:52:27: worker thread finished; awaiting finish of 3 more threads
INFO - 20:52:27: worker thread finished; awaiting finish of 2 more threads
INFO - 20:52:27: worker thread finished; awaiting finish of 1 more threads
INFO - 20:52:27: worker thread finished; awaiting finish of 0 more threads
INFO - 20:52:27: EPOCH - 172 : training on 2134975 raw words (1470208 effective words) took 0.9s, 1725526 effective words/s
Loss after epoch 171: 0.0
INFO - 20:52:28: worker thread finished; awaiting finish of 3 more threads
INFO - 20:52:28: worker thread finished; awaiting finish of 2 more threads
INFO - 20:52:28: worker thread finished; awaiting finish of 1 more threads
INFO - 20:52:28: worker thread finished; awaiting finish of 0 more threads
INFO - 20:52:28: EPOCH - 173 : training on 2134975 raw words (1470744 effective words) took 0.7s, 2007658 effective words/s
Loss after epoch 172: 0.0
INFO - 20:52:29: worker thread finished; awaiting finish of 3 more threads
INFO - 20:52:29: worker t

INFO - 20:52:42: worker thread finished; awaiting finish of 3 more threads
INFO - 20:52:42: worker thread finished; awaiting finish of 2 more threads
INFO - 20:52:42: worker thread finished; awaiting finish of 1 more threads
INFO - 20:52:42: worker thread finished; awaiting finish of 0 more threads
INFO - 20:52:42: EPOCH - 191 : training on 2134975 raw words (1470628 effective words) took 0.7s, 2004057 effective words/s
Loss after epoch 190: 0.0
INFO - 20:52:43: worker thread finished; awaiting finish of 3 more threads
INFO - 20:52:43: worker thread finished; awaiting finish of 2 more threads
INFO - 20:52:43: worker thread finished; awaiting finish of 1 more threads
INFO - 20:52:43: worker thread finished; awaiting finish of 0 more threads
INFO - 20:52:43: EPOCH - 192 : training on 2134975 raw words (1469437 effective words) took 0.8s, 1801164 effective words/s
Loss after epoch 191: 0.0
INFO - 20:52:44: worker thread finished; awaiting finish of 3 more threads
INFO - 20:52:44: worker t

INFO - 20:52:57: worker thread finished; awaiting finish of 3 more threads
INFO - 20:52:57: worker thread finished; awaiting finish of 2 more threads
INFO - 20:52:57: worker thread finished; awaiting finish of 1 more threads
INFO - 20:52:57: worker thread finished; awaiting finish of 0 more threads
INFO - 20:52:57: EPOCH - 210 : training on 2134975 raw words (1470449 effective words) took 0.8s, 1846976 effective words/s
Loss after epoch 209: 0.0
INFO - 20:52:58: worker thread finished; awaiting finish of 3 more threads
INFO - 20:52:58: worker thread finished; awaiting finish of 2 more threads
INFO - 20:52:58: worker thread finished; awaiting finish of 1 more threads
INFO - 20:52:58: worker thread finished; awaiting finish of 0 more threads
INFO - 20:52:58: EPOCH - 211 : training on 2134975 raw words (1470030 effective words) took 0.8s, 1927664 effective words/s
Loss after epoch 210: 0.0
INFO - 20:52:59: worker thread finished; awaiting finish of 3 more threads
INFO - 20:52:59: worker t

INFO - 20:53:12: worker thread finished; awaiting finish of 3 more threads
INFO - 20:53:12: worker thread finished; awaiting finish of 2 more threads
INFO - 20:53:12: worker thread finished; awaiting finish of 1 more threads
INFO - 20:53:12: worker thread finished; awaiting finish of 0 more threads
INFO - 20:53:12: EPOCH - 228 : training on 2134975 raw words (1471308 effective words) took 0.7s, 2012910 effective words/s
Loss after epoch 227: 0.0
INFO - 20:53:13: worker thread finished; awaiting finish of 3 more threads
INFO - 20:53:13: worker thread finished; awaiting finish of 2 more threads
INFO - 20:53:13: worker thread finished; awaiting finish of 1 more threads
INFO - 20:53:13: worker thread finished; awaiting finish of 0 more threads
INFO - 20:53:13: EPOCH - 229 : training on 2134975 raw words (1469305 effective words) took 0.8s, 1836608 effective words/s
Loss after epoch 228: 0.0
INFO - 20:53:13: worker thread finished; awaiting finish of 3 more threads
INFO - 20:53:13: worker t

INFO - 20:53:26: worker thread finished; awaiting finish of 3 more threads
INFO - 20:53:26: worker thread finished; awaiting finish of 2 more threads
INFO - 20:53:26: worker thread finished; awaiting finish of 1 more threads
INFO - 20:53:26: worker thread finished; awaiting finish of 0 more threads
INFO - 20:53:26: EPOCH - 247 : training on 2134975 raw words (1469536 effective words) took 0.7s, 2000237 effective words/s
Loss after epoch 246: 0.0
INFO - 20:53:27: worker thread finished; awaiting finish of 3 more threads
INFO - 20:53:27: worker thread finished; awaiting finish of 2 more threads
INFO - 20:53:27: worker thread finished; awaiting finish of 1 more threads
INFO - 20:53:27: worker thread finished; awaiting finish of 0 more threads
INFO - 20:53:27: EPOCH - 248 : training on 2134975 raw words (1470111 effective words) took 0.7s, 2008168 effective words/s
Loss after epoch 247: 0.0
INFO - 20:53:28: worker thread finished; awaiting finish of 3 more threads
INFO - 20:53:28: worker t

INFO - 20:53:41: worker thread finished; awaiting finish of 3 more threads
INFO - 20:53:41: worker thread finished; awaiting finish of 2 more threads
INFO - 20:53:41: worker thread finished; awaiting finish of 1 more threads
INFO - 20:53:41: worker thread finished; awaiting finish of 0 more threads
INFO - 20:53:41: EPOCH - 266 : training on 2134975 raw words (1470165 effective words) took 0.8s, 1880194 effective words/s
Loss after epoch 265: 0.0
INFO - 20:53:42: worker thread finished; awaiting finish of 3 more threads
INFO - 20:53:42: worker thread finished; awaiting finish of 2 more threads
INFO - 20:53:42: worker thread finished; awaiting finish of 1 more threads
INFO - 20:53:42: worker thread finished; awaiting finish of 0 more threads
INFO - 20:53:42: EPOCH - 267 : training on 2134975 raw words (1470015 effective words) took 0.7s, 2001559 effective words/s
Loss after epoch 266: 0.0
INFO - 20:53:43: worker thread finished; awaiting finish of 3 more threads
INFO - 20:53:43: worker t

INFO - 20:53:56: worker thread finished; awaiting finish of 3 more threads
INFO - 20:53:56: worker thread finished; awaiting finish of 2 more threads
INFO - 20:53:56: worker thread finished; awaiting finish of 1 more threads
INFO - 20:53:56: worker thread finished; awaiting finish of 0 more threads
INFO - 20:53:56: EPOCH - 285 : training on 2134975 raw words (1470424 effective words) took 0.9s, 1677875 effective words/s
Loss after epoch 284: 0.0
INFO - 20:53:57: worker thread finished; awaiting finish of 3 more threads
INFO - 20:53:57: worker thread finished; awaiting finish of 2 more threads
INFO - 20:53:57: worker thread finished; awaiting finish of 1 more threads
INFO - 20:53:57: worker thread finished; awaiting finish of 0 more threads
INFO - 20:53:57: EPOCH - 286 : training on 2134975 raw words (1470478 effective words) took 0.8s, 1888062 effective words/s
Loss after epoch 285: 0.0
INFO - 20:53:58: worker thread finished; awaiting finish of 3 more threads
INFO - 20:53:58: worker t

INFO - 20:54:10: worker thread finished; awaiting finish of 3 more threads
INFO - 20:54:10: worker thread finished; awaiting finish of 2 more threads
INFO - 20:54:10: worker thread finished; awaiting finish of 1 more threads
INFO - 20:54:10: worker thread finished; awaiting finish of 0 more threads
INFO - 20:54:10: EPOCH - 303 : training on 2134975 raw words (1470464 effective words) took 0.7s, 2130084 effective words/s
Loss after epoch 302: 0.0
INFO - 20:54:11: worker thread finished; awaiting finish of 3 more threads
INFO - 20:54:11: worker thread finished; awaiting finish of 2 more threads
INFO - 20:54:11: worker thread finished; awaiting finish of 1 more threads
INFO - 20:54:11: worker thread finished; awaiting finish of 0 more threads
INFO - 20:54:11: EPOCH - 304 : training on 2134975 raw words (1470197 effective words) took 0.7s, 2020912 effective words/s
Loss after epoch 303: 0.0
INFO - 20:54:12: worker thread finished; awaiting finish of 3 more threads
INFO - 20:54:12: worker t

INFO - 20:54:25: worker thread finished; awaiting finish of 3 more threads
INFO - 20:54:25: worker thread finished; awaiting finish of 2 more threads
INFO - 20:54:25: worker thread finished; awaiting finish of 1 more threads
INFO - 20:54:25: worker thread finished; awaiting finish of 0 more threads
INFO - 20:54:25: EPOCH - 322 : training on 2134975 raw words (1470065 effective words) took 0.7s, 1985492 effective words/s
Loss after epoch 321: 0.0
INFO - 20:54:25: worker thread finished; awaiting finish of 3 more threads
INFO - 20:54:25: worker thread finished; awaiting finish of 2 more threads
INFO - 20:54:25: worker thread finished; awaiting finish of 1 more threads
INFO - 20:54:25: worker thread finished; awaiting finish of 0 more threads
INFO - 20:54:25: EPOCH - 323 : training on 2134975 raw words (1470222 effective words) took 0.7s, 1997128 effective words/s
Loss after epoch 322: 0.0
INFO - 20:54:26: worker thread finished; awaiting finish of 3 more threads
INFO - 20:54:26: worker t

INFO - 20:54:39: worker thread finished; awaiting finish of 3 more threads
INFO - 20:54:39: worker thread finished; awaiting finish of 2 more threads
INFO - 20:54:39: worker thread finished; awaiting finish of 1 more threads
INFO - 20:54:39: worker thread finished; awaiting finish of 0 more threads
INFO - 20:54:39: EPOCH - 340 : training on 2134975 raw words (1470510 effective words) took 0.8s, 1908527 effective words/s
Loss after epoch 339: 0.0
INFO - 20:54:40: worker thread finished; awaiting finish of 3 more threads
INFO - 20:54:40: worker thread finished; awaiting finish of 2 more threads
INFO - 20:54:40: worker thread finished; awaiting finish of 1 more threads
INFO - 20:54:40: worker thread finished; awaiting finish of 0 more threads
INFO - 20:54:40: EPOCH - 341 : training on 2134975 raw words (1470081 effective words) took 0.8s, 1897330 effective words/s
Loss after epoch 340: 0.0
INFO - 20:54:40: worker thread finished; awaiting finish of 3 more threads
INFO - 20:54:40: worker t

INFO - 20:54:53: worker thread finished; awaiting finish of 3 more threads
INFO - 20:54:53: worker thread finished; awaiting finish of 2 more threads
INFO - 20:54:53: worker thread finished; awaiting finish of 1 more threads
INFO - 20:54:53: worker thread finished; awaiting finish of 0 more threads
INFO - 20:54:53: EPOCH - 359 : training on 2134975 raw words (1469565 effective words) took 0.7s, 2091655 effective words/s
Loss after epoch 358: 0.0
INFO - 20:54:54: worker thread finished; awaiting finish of 3 more threads
INFO - 20:54:54: worker thread finished; awaiting finish of 2 more threads
INFO - 20:54:54: worker thread finished; awaiting finish of 1 more threads
INFO - 20:54:54: worker thread finished; awaiting finish of 0 more threads
INFO - 20:54:54: EPOCH - 360 : training on 2134975 raw words (1470211 effective words) took 0.7s, 2009894 effective words/s
Loss after epoch 359: 0.0
INFO - 20:54:55: worker thread finished; awaiting finish of 3 more threads
INFO - 20:54:55: worker t

INFO - 20:55:08: worker thread finished; awaiting finish of 3 more threads
INFO - 20:55:08: worker thread finished; awaiting finish of 2 more threads
INFO - 20:55:08: worker thread finished; awaiting finish of 1 more threads
INFO - 20:55:08: worker thread finished; awaiting finish of 0 more threads
INFO - 20:55:08: EPOCH - 378 : training on 2134975 raw words (1470304 effective words) took 0.8s, 1914972 effective words/s
Loss after epoch 377: 0.0
INFO - 20:55:09: worker thread finished; awaiting finish of 3 more threads
INFO - 20:55:09: worker thread finished; awaiting finish of 2 more threads
INFO - 20:55:09: worker thread finished; awaiting finish of 1 more threads
INFO - 20:55:09: worker thread finished; awaiting finish of 0 more threads
INFO - 20:55:09: EPOCH - 379 : training on 2134975 raw words (1470137 effective words) took 0.9s, 1713954 effective words/s
Loss after epoch 378: 0.0
INFO - 20:55:10: worker thread finished; awaiting finish of 3 more threads
INFO - 20:55:10: worker t

INFO - 20:55:23: worker thread finished; awaiting finish of 3 more threads
INFO - 20:55:23: worker thread finished; awaiting finish of 2 more threads
INFO - 20:55:23: worker thread finished; awaiting finish of 1 more threads
INFO - 20:55:23: worker thread finished; awaiting finish of 0 more threads
INFO - 20:55:23: EPOCH - 396 : training on 2134975 raw words (1469968 effective words) took 0.9s, 1695205 effective words/s
Loss after epoch 395: 0.0
INFO - 20:55:23: worker thread finished; awaiting finish of 3 more threads
INFO - 20:55:23: worker thread finished; awaiting finish of 2 more threads
INFO - 20:55:23: worker thread finished; awaiting finish of 1 more threads
INFO - 20:55:23: worker thread finished; awaiting finish of 0 more threads
INFO - 20:55:23: EPOCH - 397 : training on 2134975 raw words (1470340 effective words) took 0.7s, 1989327 effective words/s
Loss after epoch 396: 0.0
INFO - 20:55:24: worker thread finished; awaiting finish of 3 more threads
INFO - 20:55:24: worker t

INFO - 20:55:37: worker thread finished; awaiting finish of 3 more threads
INFO - 20:55:37: worker thread finished; awaiting finish of 2 more threads
INFO - 20:55:37: worker thread finished; awaiting finish of 1 more threads
INFO - 20:55:37: worker thread finished; awaiting finish of 0 more threads
INFO - 20:55:37: EPOCH - 415 : training on 2134975 raw words (1470435 effective words) took 0.7s, 2021598 effective words/s
Loss after epoch 414: 0.0
INFO - 20:55:38: worker thread finished; awaiting finish of 3 more threads
INFO - 20:55:38: worker thread finished; awaiting finish of 2 more threads
INFO - 20:55:38: worker thread finished; awaiting finish of 1 more threads
INFO - 20:55:38: worker thread finished; awaiting finish of 0 more threads
INFO - 20:55:38: EPOCH - 416 : training on 2134975 raw words (1470306 effective words) took 0.7s, 2009237 effective words/s
Loss after epoch 415: 0.0
INFO - 20:55:39: worker thread finished; awaiting finish of 3 more threads
INFO - 20:55:39: worker t

INFO - 20:55:51: worker thread finished; awaiting finish of 3 more threads
INFO - 20:55:51: worker thread finished; awaiting finish of 2 more threads
INFO - 20:55:51: worker thread finished; awaiting finish of 1 more threads
INFO - 20:55:51: worker thread finished; awaiting finish of 0 more threads
INFO - 20:55:51: EPOCH - 434 : training on 2134975 raw words (1470198 effective words) took 0.8s, 1943762 effective words/s
Loss after epoch 433: 0.0
INFO - 20:55:52: worker thread finished; awaiting finish of 3 more threads
INFO - 20:55:52: worker thread finished; awaiting finish of 2 more threads
INFO - 20:55:52: worker thread finished; awaiting finish of 1 more threads
INFO - 20:55:52: worker thread finished; awaiting finish of 0 more threads
INFO - 20:55:52: EPOCH - 435 : training on 2134975 raw words (1469537 effective words) took 0.7s, 1984053 effective words/s
Loss after epoch 434: 0.0
INFO - 20:55:53: worker thread finished; awaiting finish of 3 more threads
INFO - 20:55:53: worker t

INFO - 20:56:05: worker thread finished; awaiting finish of 3 more threads
INFO - 20:56:05: worker thread finished; awaiting finish of 2 more threads
INFO - 20:56:05: worker thread finished; awaiting finish of 1 more threads
INFO - 20:56:05: worker thread finished; awaiting finish of 0 more threads
INFO - 20:56:05: EPOCH - 453 : training on 2134975 raw words (1470550 effective words) took 0.7s, 2001457 effective words/s
Loss after epoch 452: 0.0
INFO - 20:56:06: worker thread finished; awaiting finish of 3 more threads
INFO - 20:56:06: worker thread finished; awaiting finish of 2 more threads
INFO - 20:56:06: worker thread finished; awaiting finish of 1 more threads
INFO - 20:56:06: worker thread finished; awaiting finish of 0 more threads
INFO - 20:56:06: EPOCH - 454 : training on 2134975 raw words (1470521 effective words) took 0.7s, 1999237 effective words/s
Loss after epoch 453: 0.0
INFO - 20:56:07: worker thread finished; awaiting finish of 3 more threads
INFO - 20:56:07: worker t

INFO - 20:56:19: worker thread finished; awaiting finish of 3 more threads
INFO - 20:56:19: worker thread finished; awaiting finish of 2 more threads
INFO - 20:56:19: worker thread finished; awaiting finish of 1 more threads
INFO - 20:56:19: worker thread finished; awaiting finish of 0 more threads
INFO - 20:56:19: EPOCH - 472 : training on 2134975 raw words (1470485 effective words) took 0.8s, 1952483 effective words/s
Loss after epoch 471: 0.0
INFO - 20:56:20: worker thread finished; awaiting finish of 3 more threads
INFO - 20:56:20: worker thread finished; awaiting finish of 2 more threads
INFO - 20:56:20: worker thread finished; awaiting finish of 1 more threads
INFO - 20:56:20: worker thread finished; awaiting finish of 0 more threads
INFO - 20:56:20: EPOCH - 473 : training on 2134975 raw words (1470542 effective words) took 0.7s, 1998801 effective words/s
Loss after epoch 472: 0.0
INFO - 20:56:21: worker thread finished; awaiting finish of 3 more threads
INFO - 20:56:21: worker t

INFO - 20:56:33: worker thread finished; awaiting finish of 3 more threads
INFO - 20:56:33: worker thread finished; awaiting finish of 2 more threads
INFO - 20:56:33: worker thread finished; awaiting finish of 1 more threads
INFO - 20:56:33: worker thread finished; awaiting finish of 0 more threads
INFO - 20:56:33: EPOCH - 491 : training on 2134975 raw words (1470502 effective words) took 0.7s, 2032055 effective words/s
Loss after epoch 490: 0.0
INFO - 20:56:34: worker thread finished; awaiting finish of 3 more threads
INFO - 20:56:34: worker thread finished; awaiting finish of 2 more threads
INFO - 20:56:34: worker thread finished; awaiting finish of 1 more threads
INFO - 20:56:34: worker thread finished; awaiting finish of 0 more threads
INFO - 20:56:34: EPOCH - 492 : training on 2134975 raw words (1470315 effective words) took 0.7s, 1971696 effective words/s
Loss after epoch 491: 0.0
INFO - 20:56:35: worker thread finished; awaiting finish of 3 more threads
INFO - 20:56:35: worker t

INFO - 20:56:47: worker thread finished; awaiting finish of 3 more threads
INFO - 20:56:47: worker thread finished; awaiting finish of 2 more threads
INFO - 20:56:47: worker thread finished; awaiting finish of 1 more threads
INFO - 20:56:47: worker thread finished; awaiting finish of 0 more threads
INFO - 20:56:47: EPOCH - 510 : training on 2134975 raw words (1470691 effective words) took 0.8s, 1941208 effective words/s
Loss after epoch 509: 0.0
INFO - 20:56:48: worker thread finished; awaiting finish of 3 more threads
INFO - 20:56:48: worker thread finished; awaiting finish of 2 more threads
INFO - 20:56:48: worker thread finished; awaiting finish of 1 more threads
INFO - 20:56:48: worker thread finished; awaiting finish of 0 more threads
INFO - 20:56:48: EPOCH - 511 : training on 2134975 raw words (1470060 effective words) took 0.7s, 2010449 effective words/s
Loss after epoch 510: 0.0
INFO - 20:56:49: worker thread finished; awaiting finish of 3 more threads
INFO - 20:56:49: worker t

INFO - 20:57:01: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:01: worker thread finished; awaiting finish of 2 more threads
INFO - 20:57:01: worker thread finished; awaiting finish of 1 more threads
INFO - 20:57:01: worker thread finished; awaiting finish of 0 more threads
INFO - 20:57:01: EPOCH - 529 : training on 2134975 raw words (1469836 effective words) took 0.7s, 2000876 effective words/s
Loss after epoch 528: 0.0
INFO - 20:57:02: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:02: worker thread finished; awaiting finish of 2 more threads
INFO - 20:57:02: worker thread finished; awaiting finish of 1 more threads
INFO - 20:57:02: worker thread finished; awaiting finish of 0 more threads
INFO - 20:57:02: EPOCH - 530 : training on 2134975 raw words (1470275 effective words) took 0.7s, 2058688 effective words/s
Loss after epoch 529: 0.0
INFO - 20:57:03: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:03: worker t

INFO - 20:57:15: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:15: worker thread finished; awaiting finish of 2 more threads
INFO - 20:57:15: worker thread finished; awaiting finish of 1 more threads
INFO - 20:57:15: worker thread finished; awaiting finish of 0 more threads
INFO - 20:57:15: EPOCH - 548 : training on 2134975 raw words (1469818 effective words) took 0.7s, 2047637 effective words/s
Loss after epoch 547: 0.0
INFO - 20:57:16: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:16: worker thread finished; awaiting finish of 2 more threads
INFO - 20:57:16: worker thread finished; awaiting finish of 1 more threads
INFO - 20:57:16: worker thread finished; awaiting finish of 0 more threads
INFO - 20:57:16: EPOCH - 549 : training on 2134975 raw words (1469659 effective words) took 0.7s, 2017587 effective words/s
Loss after epoch 548: 0.0
INFO - 20:57:17: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:17: worker t

INFO - 20:57:30: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:30: worker thread finished; awaiting finish of 2 more threads
INFO - 20:57:30: worker thread finished; awaiting finish of 1 more threads
INFO - 20:57:30: worker thread finished; awaiting finish of 0 more threads
INFO - 20:57:30: EPOCH - 566 : training on 2134975 raw words (1470064 effective words) took 0.7s, 1964018 effective words/s
Loss after epoch 565: 0.0
INFO - 20:57:31: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:31: worker thread finished; awaiting finish of 2 more threads
INFO - 20:57:31: worker thread finished; awaiting finish of 1 more threads
INFO - 20:57:31: worker thread finished; awaiting finish of 0 more threads
INFO - 20:57:31: EPOCH - 567 : training on 2134975 raw words (1470593 effective words) took 0.7s, 2020901 effective words/s
Loss after epoch 566: 0.0
INFO - 20:57:31: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:31: worker t

INFO - 20:57:44: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:44: worker thread finished; awaiting finish of 2 more threads
INFO - 20:57:44: worker thread finished; awaiting finish of 1 more threads
INFO - 20:57:44: worker thread finished; awaiting finish of 0 more threads
INFO - 20:57:44: EPOCH - 585 : training on 2134975 raw words (1470140 effective words) took 0.7s, 2044501 effective words/s
Loss after epoch 584: 0.0
INFO - 20:57:45: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:45: worker thread finished; awaiting finish of 2 more threads
INFO - 20:57:45: worker thread finished; awaiting finish of 1 more threads
INFO - 20:57:45: worker thread finished; awaiting finish of 0 more threads
INFO - 20:57:45: EPOCH - 586 : training on 2134975 raw words (1469739 effective words) took 0.7s, 1991644 effective words/s
Loss after epoch 585: 0.0
INFO - 20:57:46: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:46: worker t

INFO - 20:57:59: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:59: worker thread finished; awaiting finish of 2 more threads
INFO - 20:57:59: worker thread finished; awaiting finish of 1 more threads
INFO - 20:57:59: worker thread finished; awaiting finish of 0 more threads
INFO - 20:57:59: EPOCH - 604 : training on 2134975 raw words (1469979 effective words) took 0.7s, 1960992 effective words/s
Loss after epoch 603: 0.0
INFO - 20:57:59: worker thread finished; awaiting finish of 3 more threads
INFO - 20:57:59: worker thread finished; awaiting finish of 2 more threads
INFO - 20:57:59: worker thread finished; awaiting finish of 1 more threads
INFO - 20:57:59: worker thread finished; awaiting finish of 0 more threads
INFO - 20:57:59: EPOCH - 605 : training on 2134975 raw words (1470295 effective words) took 0.7s, 2056543 effective words/s
Loss after epoch 604: 0.0
INFO - 20:58:00: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:00: worker t

INFO - 20:58:13: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:13: worker thread finished; awaiting finish of 2 more threads
INFO - 20:58:13: worker thread finished; awaiting finish of 1 more threads
INFO - 20:58:13: worker thread finished; awaiting finish of 0 more threads
INFO - 20:58:13: EPOCH - 623 : training on 2134975 raw words (1469693 effective words) took 0.7s, 2038792 effective words/s
Loss after epoch 622: 0.0
INFO - 20:58:13: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:13: worker thread finished; awaiting finish of 2 more threads
INFO - 20:58:13: worker thread finished; awaiting finish of 1 more threads
INFO - 20:58:13: worker thread finished; awaiting finish of 0 more threads
INFO - 20:58:13: EPOCH - 624 : training on 2134975 raw words (1470532 effective words) took 0.7s, 2019996 effective words/s
Loss after epoch 623: 0.0
INFO - 20:58:14: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:14: worker t

INFO - 20:58:27: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:27: worker thread finished; awaiting finish of 2 more threads
INFO - 20:58:27: worker thread finished; awaiting finish of 1 more threads
INFO - 20:58:27: worker thread finished; awaiting finish of 0 more threads
INFO - 20:58:27: EPOCH - 642 : training on 2134975 raw words (1470234 effective words) took 0.8s, 1948013 effective words/s
Loss after epoch 641: 0.0
INFO - 20:58:28: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:28: worker thread finished; awaiting finish of 2 more threads
INFO - 20:58:28: worker thread finished; awaiting finish of 1 more threads
INFO - 20:58:28: worker thread finished; awaiting finish of 0 more threads
INFO - 20:58:28: EPOCH - 643 : training on 2134975 raw words (1470278 effective words) took 0.8s, 1793276 effective words/s
Loss after epoch 642: 0.0
INFO - 20:58:29: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:29: worker t

INFO - 20:58:42: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:42: worker thread finished; awaiting finish of 2 more threads
INFO - 20:58:42: worker thread finished; awaiting finish of 1 more threads
INFO - 20:58:42: worker thread finished; awaiting finish of 0 more threads
INFO - 20:58:42: EPOCH - 661 : training on 2134975 raw words (1470779 effective words) took 0.7s, 2014225 effective words/s
Loss after epoch 660: 0.0
INFO - 20:58:43: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:43: worker thread finished; awaiting finish of 2 more threads
INFO - 20:58:43: worker thread finished; awaiting finish of 1 more threads
INFO - 20:58:43: worker thread finished; awaiting finish of 0 more threads
INFO - 20:58:43: EPOCH - 662 : training on 2134975 raw words (1469957 effective words) took 0.7s, 1971794 effective words/s
Loss after epoch 661: 0.0
INFO - 20:58:44: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:44: worker t

INFO - 20:58:57: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:57: worker thread finished; awaiting finish of 2 more threads
INFO - 20:58:57: worker thread finished; awaiting finish of 1 more threads
INFO - 20:58:57: worker thread finished; awaiting finish of 0 more threads
INFO - 20:58:57: EPOCH - 680 : training on 2134975 raw words (1469880 effective words) took 0.7s, 2047400 effective words/s
Loss after epoch 679: 0.0
INFO - 20:58:58: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:58: worker thread finished; awaiting finish of 2 more threads
INFO - 20:58:58: worker thread finished; awaiting finish of 1 more threads
INFO - 20:58:58: worker thread finished; awaiting finish of 0 more threads
INFO - 20:58:58: EPOCH - 681 : training on 2134975 raw words (1469784 effective words) took 0.8s, 1942783 effective words/s
Loss after epoch 680: 0.0
INFO - 20:58:58: worker thread finished; awaiting finish of 3 more threads
INFO - 20:58:58: worker t

INFO - 20:59:12: worker thread finished; awaiting finish of 3 more threads
INFO - 20:59:12: worker thread finished; awaiting finish of 2 more threads
INFO - 20:59:12: worker thread finished; awaiting finish of 1 more threads
INFO - 20:59:12: worker thread finished; awaiting finish of 0 more threads
INFO - 20:59:12: EPOCH - 699 : training on 2134975 raw words (1470496 effective words) took 0.8s, 1954448 effective words/s
Loss after epoch 698: 0.0
INFO - 20:59:12: worker thread finished; awaiting finish of 3 more threads
INFO - 20:59:12: worker thread finished; awaiting finish of 2 more threads
INFO - 20:59:12: worker thread finished; awaiting finish of 1 more threads
INFO - 20:59:12: worker thread finished; awaiting finish of 0 more threads
INFO - 20:59:12: EPOCH - 700 : training on 2134975 raw words (1470090 effective words) took 0.8s, 1940730 effective words/s
Loss after epoch 699: 0.0
INFO - 20:59:13: worker thread finished; awaiting finish of 3 more threads
INFO - 20:59:13: worker t

INFO - 20:59:27: worker thread finished; awaiting finish of 3 more threads
INFO - 20:59:27: worker thread finished; awaiting finish of 2 more threads
INFO - 20:59:27: worker thread finished; awaiting finish of 1 more threads
INFO - 20:59:27: worker thread finished; awaiting finish of 0 more threads
INFO - 20:59:27: EPOCH - 718 : training on 2134975 raw words (1469993 effective words) took 0.8s, 1894141 effective words/s
Loss after epoch 717: 0.0
INFO - 20:59:27: worker thread finished; awaiting finish of 3 more threads
INFO - 20:59:27: worker thread finished; awaiting finish of 2 more threads
INFO - 20:59:27: worker thread finished; awaiting finish of 1 more threads
INFO - 20:59:27: worker thread finished; awaiting finish of 0 more threads
INFO - 20:59:27: EPOCH - 719 : training on 2134975 raw words (1470707 effective words) took 0.8s, 1896910 effective words/s
Loss after epoch 718: 0.0
INFO - 20:59:28: worker thread finished; awaiting finish of 3 more threads
INFO - 20:59:28: worker t

INFO - 20:59:42: worker thread finished; awaiting finish of 3 more threads
INFO - 20:59:42: worker thread finished; awaiting finish of 2 more threads
INFO - 20:59:42: worker thread finished; awaiting finish of 1 more threads
INFO - 20:59:42: worker thread finished; awaiting finish of 0 more threads
INFO - 20:59:42: EPOCH - 737 : training on 2134975 raw words (1470277 effective words) took 0.7s, 2049602 effective words/s
Loss after epoch 736: 0.0
INFO - 20:59:42: worker thread finished; awaiting finish of 3 more threads
INFO - 20:59:42: worker thread finished; awaiting finish of 2 more threads
INFO - 20:59:42: worker thread finished; awaiting finish of 1 more threads
INFO - 20:59:42: worker thread finished; awaiting finish of 0 more threads
INFO - 20:59:42: EPOCH - 738 : training on 2134975 raw words (1468952 effective words) took 0.7s, 1970963 effective words/s
Loss after epoch 737: 0.0
INFO - 20:59:43: worker thread finished; awaiting finish of 3 more threads
INFO - 20:59:43: worker t

INFO - 20:59:56: worker thread finished; awaiting finish of 1 more threads
INFO - 20:59:56: worker thread finished; awaiting finish of 0 more threads
INFO - 20:59:56: EPOCH - 755 : training on 2134975 raw words (1470059 effective words) took 0.8s, 1867438 effective words/s
Loss after epoch 754: 0.0
INFO - 20:59:57: worker thread finished; awaiting finish of 3 more threads
INFO - 20:59:57: worker thread finished; awaiting finish of 2 more threads
INFO - 20:59:57: worker thread finished; awaiting finish of 1 more threads
INFO - 20:59:57: worker thread finished; awaiting finish of 0 more threads
INFO - 20:59:57: EPOCH - 756 : training on 2134975 raw words (1470096 effective words) took 0.9s, 1557980 effective words/s
Loss after epoch 755: 0.0
INFO - 20:59:58: worker thread finished; awaiting finish of 3 more threads
INFO - 20:59:58: worker thread finished; awaiting finish of 2 more threads
INFO - 20:59:58: worker thread finished; awaiting finish of 1 more threads
INFO - 20:59:58: worker t

INFO - 21:00:12: worker thread finished; awaiting finish of 3 more threads
INFO - 21:00:12: worker thread finished; awaiting finish of 2 more threads
INFO - 21:00:12: worker thread finished; awaiting finish of 1 more threads
INFO - 21:00:12: worker thread finished; awaiting finish of 0 more threads
INFO - 21:00:12: EPOCH - 774 : training on 2134975 raw words (1470341 effective words) took 0.8s, 1781733 effective words/s
Loss after epoch 773: 0.0
INFO - 21:00:13: worker thread finished; awaiting finish of 3 more threads
INFO - 21:00:13: worker thread finished; awaiting finish of 2 more threads
INFO - 21:00:13: worker thread finished; awaiting finish of 1 more threads
INFO - 21:00:13: worker thread finished; awaiting finish of 0 more threads
INFO - 21:00:13: EPOCH - 775 : training on 2134975 raw words (1469566 effective words) took 0.8s, 1744930 effective words/s
Loss after epoch 774: 0.0
INFO - 21:00:14: worker thread finished; awaiting finish of 3 more threads
INFO - 21:00:14: worker t

INFO - 21:00:27: worker thread finished; awaiting finish of 3 more threads
INFO - 21:00:27: worker thread finished; awaiting finish of 2 more threads
INFO - 21:00:27: worker thread finished; awaiting finish of 1 more threads
INFO - 21:00:27: worker thread finished; awaiting finish of 0 more threads
INFO - 21:00:27: EPOCH - 793 : training on 2134975 raw words (1469367 effective words) took 0.8s, 1848502 effective words/s
Loss after epoch 792: 0.0
INFO - 21:00:28: worker thread finished; awaiting finish of 3 more threads
INFO - 21:00:28: worker thread finished; awaiting finish of 2 more threads
INFO - 21:00:28: worker thread finished; awaiting finish of 1 more threads
INFO - 21:00:28: worker thread finished; awaiting finish of 0 more threads
INFO - 21:00:28: EPOCH - 794 : training on 2134975 raw words (1470649 effective words) took 0.9s, 1710359 effective words/s
Loss after epoch 793: 0.0
INFO - 21:00:29: worker thread finished; awaiting finish of 3 more threads
INFO - 21:00:29: worker t

INFO - 21:00:42: worker thread finished; awaiting finish of 3 more threads
INFO - 21:00:42: worker thread finished; awaiting finish of 2 more threads
INFO - 21:00:42: worker thread finished; awaiting finish of 1 more threads
INFO - 21:00:42: worker thread finished; awaiting finish of 0 more threads
INFO - 21:00:42: EPOCH - 812 : training on 2134975 raw words (1471009 effective words) took 0.7s, 2020868 effective words/s
Loss after epoch 811: 0.0
INFO - 21:00:43: worker thread finished; awaiting finish of 3 more threads
INFO - 21:00:43: worker thread finished; awaiting finish of 2 more threads
INFO - 21:00:43: worker thread finished; awaiting finish of 1 more threads
INFO - 21:00:43: worker thread finished; awaiting finish of 0 more threads
INFO - 21:00:43: EPOCH - 813 : training on 2134975 raw words (1470775 effective words) took 0.7s, 2061890 effective words/s
Loss after epoch 812: 0.0
INFO - 21:00:44: worker thread finished; awaiting finish of 3 more threads
INFO - 21:00:44: worker t

INFO - 21:00:57: worker thread finished; awaiting finish of 3 more threads
INFO - 21:00:57: worker thread finished; awaiting finish of 2 more threads
INFO - 21:00:57: worker thread finished; awaiting finish of 1 more threads
INFO - 21:00:57: worker thread finished; awaiting finish of 0 more threads
INFO - 21:00:57: EPOCH - 831 : training on 2134975 raw words (1469850 effective words) took 0.8s, 1949481 effective words/s
Loss after epoch 830: 0.0
INFO - 21:00:58: worker thread finished; awaiting finish of 3 more threads
INFO - 21:00:58: worker thread finished; awaiting finish of 2 more threads
INFO - 21:00:58: worker thread finished; awaiting finish of 1 more threads
INFO - 21:00:58: worker thread finished; awaiting finish of 0 more threads
INFO - 21:00:58: EPOCH - 832 : training on 2134975 raw words (1470202 effective words) took 0.7s, 2045894 effective words/s
Loss after epoch 831: 0.0
INFO - 21:00:59: worker thread finished; awaiting finish of 3 more threads
INFO - 21:00:59: worker t

INFO - 21:01:12: worker thread finished; awaiting finish of 3 more threads
INFO - 21:01:12: worker thread finished; awaiting finish of 2 more threads
INFO - 21:01:12: worker thread finished; awaiting finish of 1 more threads
INFO - 21:01:12: worker thread finished; awaiting finish of 0 more threads
INFO - 21:01:12: EPOCH - 850 : training on 2134975 raw words (1470620 effective words) took 0.7s, 2047849 effective words/s
Loss after epoch 849: 0.0
INFO - 21:01:12: worker thread finished; awaiting finish of 3 more threads
INFO - 21:01:12: worker thread finished; awaiting finish of 2 more threads
INFO - 21:01:12: worker thread finished; awaiting finish of 1 more threads
INFO - 21:01:12: worker thread finished; awaiting finish of 0 more threads
INFO - 21:01:12: EPOCH - 851 : training on 2134975 raw words (1470800 effective words) took 0.7s, 1968292 effective words/s
Loss after epoch 850: 0.0
INFO - 21:01:13: worker thread finished; awaiting finish of 3 more threads
INFO - 21:01:13: worker t

INFO - 21:01:26: worker thread finished; awaiting finish of 3 more threads
INFO - 21:01:26: worker thread finished; awaiting finish of 2 more threads
INFO - 21:01:26: worker thread finished; awaiting finish of 1 more threads
INFO - 21:01:26: worker thread finished; awaiting finish of 0 more threads
INFO - 21:01:26: EPOCH - 869 : training on 2134975 raw words (1469458 effective words) took 0.8s, 1819326 effective words/s
Loss after epoch 868: 0.0
INFO - 21:01:27: worker thread finished; awaiting finish of 3 more threads
INFO - 21:01:27: worker thread finished; awaiting finish of 2 more threads
INFO - 21:01:27: worker thread finished; awaiting finish of 1 more threads
INFO - 21:01:27: worker thread finished; awaiting finish of 0 more threads
INFO - 21:01:27: EPOCH - 870 : training on 2134975 raw words (1470077 effective words) took 0.9s, 1698927 effective words/s
Loss after epoch 869: 0.0
INFO - 21:01:28: worker thread finished; awaiting finish of 3 more threads
INFO - 21:01:28: worker t

INFO - 21:01:42: worker thread finished; awaiting finish of 3 more threads
INFO - 21:01:42: worker thread finished; awaiting finish of 2 more threads
INFO - 21:01:42: worker thread finished; awaiting finish of 1 more threads
INFO - 21:01:42: worker thread finished; awaiting finish of 0 more threads
INFO - 21:01:42: EPOCH - 888 : training on 2134975 raw words (1470300 effective words) took 0.8s, 1917058 effective words/s
Loss after epoch 887: 0.0
INFO - 21:01:43: worker thread finished; awaiting finish of 3 more threads
INFO - 21:01:43: worker thread finished; awaiting finish of 2 more threads
INFO - 21:01:43: worker thread finished; awaiting finish of 1 more threads
INFO - 21:01:43: worker thread finished; awaiting finish of 0 more threads
INFO - 21:01:43: EPOCH - 889 : training on 2134975 raw words (1469914 effective words) took 0.8s, 1806408 effective words/s
Loss after epoch 888: 0.0
INFO - 21:01:43: worker thread finished; awaiting finish of 3 more threads
INFO - 21:01:43: worker t

INFO - 21:01:57: worker thread finished; awaiting finish of 3 more threads
INFO - 21:01:57: worker thread finished; awaiting finish of 2 more threads
INFO - 21:01:57: worker thread finished; awaiting finish of 1 more threads
INFO - 21:01:57: worker thread finished; awaiting finish of 0 more threads
INFO - 21:01:57: EPOCH - 907 : training on 2134975 raw words (1469856 effective words) took 0.9s, 1716836 effective words/s
Loss after epoch 906: 0.0
INFO - 21:01:58: worker thread finished; awaiting finish of 3 more threads
INFO - 21:01:58: worker thread finished; awaiting finish of 2 more threads
INFO - 21:01:58: worker thread finished; awaiting finish of 1 more threads
INFO - 21:01:58: worker thread finished; awaiting finish of 0 more threads
INFO - 21:01:58: EPOCH - 908 : training on 2134975 raw words (1471091 effective words) took 0.8s, 1950256 effective words/s
Loss after epoch 907: 0.0
INFO - 21:01:59: worker thread finished; awaiting finish of 3 more threads
INFO - 21:01:59: worker t

INFO - 21:02:13: worker thread finished; awaiting finish of 3 more threads
INFO - 21:02:13: worker thread finished; awaiting finish of 2 more threads
INFO - 21:02:13: worker thread finished; awaiting finish of 1 more threads
INFO - 21:02:13: worker thread finished; awaiting finish of 0 more threads
INFO - 21:02:13: EPOCH - 926 : training on 2134975 raw words (1470441 effective words) took 0.8s, 1742974 effective words/s
Loss after epoch 925: 0.0
INFO - 21:02:14: worker thread finished; awaiting finish of 3 more threads
INFO - 21:02:14: worker thread finished; awaiting finish of 2 more threads
INFO - 21:02:14: worker thread finished; awaiting finish of 1 more threads
INFO - 21:02:14: worker thread finished; awaiting finish of 0 more threads
INFO - 21:02:14: EPOCH - 927 : training on 2134975 raw words (1470546 effective words) took 0.8s, 1793413 effective words/s
Loss after epoch 926: 0.0
INFO - 21:02:14: worker thread finished; awaiting finish of 3 more threads
INFO - 21:02:14: worker t

INFO - 21:02:28: worker thread finished; awaiting finish of 3 more threads
INFO - 21:02:28: worker thread finished; awaiting finish of 2 more threads
INFO - 21:02:28: worker thread finished; awaiting finish of 1 more threads
INFO - 21:02:28: worker thread finished; awaiting finish of 0 more threads
INFO - 21:02:28: EPOCH - 945 : training on 2134975 raw words (1470221 effective words) took 0.7s, 1966001 effective words/s
Loss after epoch 944: 0.0
INFO - 21:02:29: worker thread finished; awaiting finish of 3 more threads
INFO - 21:02:29: worker thread finished; awaiting finish of 2 more threads
INFO - 21:02:29: worker thread finished; awaiting finish of 1 more threads
INFO - 21:02:29: worker thread finished; awaiting finish of 0 more threads
INFO - 21:02:29: EPOCH - 946 : training on 2134975 raw words (1470918 effective words) took 0.8s, 1927808 effective words/s
Loss after epoch 945: 0.0
INFO - 21:02:30: worker thread finished; awaiting finish of 3 more threads
INFO - 21:02:30: worker t

INFO - 21:02:43: worker thread finished; awaiting finish of 3 more threads
INFO - 21:02:43: worker thread finished; awaiting finish of 2 more threads
INFO - 21:02:44: worker thread finished; awaiting finish of 1 more threads
INFO - 21:02:44: worker thread finished; awaiting finish of 0 more threads
INFO - 21:02:44: EPOCH - 964 : training on 2134975 raw words (1470655 effective words) took 0.9s, 1589602 effective words/s
Loss after epoch 963: 0.0
INFO - 21:02:44: worker thread finished; awaiting finish of 3 more threads
INFO - 21:02:44: worker thread finished; awaiting finish of 2 more threads
INFO - 21:02:44: worker thread finished; awaiting finish of 1 more threads
INFO - 21:02:44: worker thread finished; awaiting finish of 0 more threads
INFO - 21:02:44: EPOCH - 965 : training on 2134975 raw words (1470276 effective words) took 0.8s, 1743224 effective words/s
Loss after epoch 964: 0.0
INFO - 21:02:45: worker thread finished; awaiting finish of 3 more threads
INFO - 21:02:45: worker t

INFO - 21:02:59: worker thread finished; awaiting finish of 3 more threads
INFO - 21:02:59: worker thread finished; awaiting finish of 2 more threads
INFO - 21:02:59: worker thread finished; awaiting finish of 1 more threads
INFO - 21:02:59: worker thread finished; awaiting finish of 0 more threads
INFO - 21:02:59: EPOCH - 983 : training on 2134975 raw words (1470089 effective words) took 0.7s, 1987601 effective words/s
Loss after epoch 982: 0.0
INFO - 21:03:00: worker thread finished; awaiting finish of 3 more threads
INFO - 21:03:00: worker thread finished; awaiting finish of 2 more threads
INFO - 21:03:00: worker thread finished; awaiting finish of 1 more threads
INFO - 21:03:00: worker thread finished; awaiting finish of 0 more threads
INFO - 21:03:00: EPOCH - 984 : training on 2134975 raw words (1470103 effective words) took 0.8s, 1787349 effective words/s
Loss after epoch 983: 0.0
INFO - 21:03:00: worker thread finished; awaiting finish of 3 more threads
INFO - 21:03:00: worker t

INFO - 21:03:12: saved gutenberg_w2v.100d.model


In [19]:
# Similarity 

from w2v_train import W2VLossLogger
from gensim.models import Word2Vec

model = Word2Vec.load("gutenberg_w2v.100d.model")

for word in ['bible', 'book', 'bank', 'water']:
    sims = model.wv.most_similar(positive=word,topn=5)
    print("5 most similar words to {} are {}".format(word,sims))

INFO - 21:51:02: loading Word2Vec object from gutenberg_w2v.100d.model
INFO - 21:51:02: loading wv recursively from gutenberg_w2v.100d.model.wv.* with mmap=None
INFO - 21:51:02: setting ignored attribute vectors_norm to None
INFO - 21:51:02: loading vocabulary recursively from gutenberg_w2v.100d.model.vocabulary.* with mmap=None
INFO - 21:51:02: loading trainables recursively from gutenberg_w2v.100d.model.trainables.* with mmap=None
INFO - 21:51:02: setting ignored attribute cum_table to None
INFO - 21:51:02: loaded gutenberg_w2v.100d.model
INFO - 21:51:02: precomputing L2-norms of word weight vectors


5 most similar words to bible are [('respects', 0.38803136348724365), ('island', 0.3760893940925598), ('poetry', 0.360975444316864), ('official', 0.35624533891677856), ('ointment', 0.32751742005348206)]
5 most similar words to book are [('written', 0.4902263283729553), ('letter', 0.47883158922195435), ('note', 0.46313685178756714), ('temple', 0.45116883516311646), ('chronicles', 0.4450681209564209)]
5 most similar words to bank are [('table', 0.5371992588043213), ('wall', 0.5297440886497498), ('pool', 0.5177308917045593), ('river', 0.492403119802475), ('top', 0.4804553985595703)]
5 most similar words to water are [('waters', 0.5834245085716248), ('blood', 0.5388904809951782), ('wine', 0.5218234658241272), ('wood', 0.5205837488174438), ('oil', 0.5113111138343811)]


In [20]:
# word-analogy

from numpy import dot,asarray
from numpy.linalg import norm

'''
A = asarray(model.wv['kingdom'])
B = asarray(model.wv['king'] - model.wv['queen'] + model.wv['girls'])
cosine = dot(A,B)/(norm(A)*norm(B))
print(cosine)

'''

print(model.wv.most_similar(positive = ['king','girls'],negative = ['queen'],topn=1))
print(model.wv.most_similar(positive = ['taller','good'],negative = ['tall'],topn=1))
print(model.wv.most_similar(positive = ['france','london'],negative = ['paris'],topn=1))


[('people', 0.45547324419021606)]
[('useful', 0.4046720862388611)]
[('school', 0.4081120491027832)]


In [14]:
# Same analysis for google's w2v model

from gensim.models import KeyedVectors

NUM_W2V_TO_LOAD = 1000000

G_model = KeyedVectors.load_word2vec_format('./GoogleNews-vectors-negative300.bin.gz', binary=True,
limit=NUM_W2V_TO_LOAD)

INFO - 11:58:05: loading projection weights from ./GoogleNews-vectors-negative300.bin.gz
INFO - 11:58:17: loaded (1000000, 300) matrix from ./GoogleNews-vectors-negative300.bin.gz


In [16]:
for word in ['bible', 'book', 'bank', 'water']:
    sims = G_model.wv.most_similar(positive=word,topn=5)
    print("5 most similar words to {} are {}".format(word,sims))

  sims = G_model.wv.most_similar(positive=word,topn=5)


5 most similar words to bible are [('Bible', 0.7367781400680542), ('bibles', 0.6052597761154175), ('Holy_Bible', 0.5989601612091064), ('scriptures', 0.574568510055542), ('scripture', 0.5697901248931885)]
5 most similar words to book are [('tome', 0.7485830783843994), ('books', 0.7379178404808044), ('memoir', 0.7302927374839783), ('paperback_edition', 0.6868364810943604), ('autobiography', 0.6741527915000916)]
5 most similar words to bank are [('banks', 0.7440759539604187), ('banking', 0.690161406993866), ('Bank', 0.6698698997497559), ('lender', 0.6342284679412842), ('banker', 0.6092953681945801)]
5 most similar words to water are [('potable_water', 0.6799106597900391), ('Water', 0.6706870794296265), ('sewage', 0.6619377136230469), ('groundwater', 0.6588345766067505), ('Floridan_aquifer', 0.6422533988952637)]


In [19]:
print(G_model.wv.most_similar(positive = ['king','girls'],negative = ['queen'],topn=1))
print(G_model.wv.most_similar(positive = ['taller','good'],negative = ['tall'],topn=1))
print(G_model.wv.most_similar(positive = ['france','london'],negative = ['paris'],topn=1))

  print(G_model.wv.most_similar(positive = ['king','girls'],negative = ['queen'],topn=1))
  print(G_model.wv.most_similar(positive = ['taller','good'],negative = ['tall'],topn=1))
  print(G_model.wv.most_similar(positive = ['france','london'],negative = ['paris'],topn=1))


[('boys', 0.7518433332443237)]
[('better', 0.7245721220970154)]
[('england', 0.5836853384971619)]


In [22]:
# Step 13 

# .tsv files for Embedding projector

with open('./data/embeddings.tsv','w') as file:
    with open('./data/metadata.tsv','w') as f:
        for word in model.wv.vocab.keys():
            for attr in model.wv[word]:
                file.write('{}\t'.format(attr))
            file.write('\n')
            f.write('{}\n'.format(word))

In [17]:
# Step 14

import tarfile

with tarfile.open("./data/aclImdb_v1.tar.gz", "r") as tf:
    i=0
    for member in tf.getmembers():
        tf.extract(member=member, path="./data")


In [46]:
!python w2v_sentiment_analysis.py

INFO - 22:49:27: loading projection weights from ./data/GoogleNews-vectors-negative300.bin.gz
INFO - 22:49:39: loaded (1000000, 300) matrix from ./data/GoogleNews-vectors-negative300.bin.gz
  if word not in model.wv.vocab.keys():
  nbow = nbow + model.wv[word]
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
/n
acc of clf: 0.61575
