# ECIS 2021 - Needmining
## Training Notebook for word2vec embeddings

***
This is the Jupyter notbook holding code for training and saving word2vec embeddings

The notebook has the following structure:

1. Import
2. Load Data
3. Training
4. Saving embeddings as file

In [1]:
import logging  # Setting up the loggings to monitor gensim
logging.basicConfig(format="%(levelname)s - %(asctime)s: %(message)s", datefmt= '%H:%M:%S', level=logging.INFO)

from pathlib import Path
import pandas as pd  # For data handling
import csv
import re
import numpy as np

import gensim
from gensim.test.utils import get_tmpfile
from gensim.models import Word2Vec

import sys
import multiprocessing

# Count the number of cores available
cores = multiprocessing.cpu_count() 

# The csv file might contain very huge fields, therefore increase the field_size_limit:
csv.field_size_limit(sys.maxsize)

INFO - 11:22:49: adding document #0 to Dictionary(0 unique tokens: [])
INFO - 11:22:49: built Dictionary(12 unique tokens: ['computer', 'human', 'interface', 'response', 'survey']...) from 9 documents (total 29 corpus positions)


131072

In [2]:
# path under which the model will be saved, only if save_model = True
save_path = '../models/w2v-amz.bin'

# path the the csv containing the trainingdata
DATA_PREPROCESSED = Path('../data/raw')

# boolean value which controls if we add a suffix of the publication to trump
add_suffix = False

'''
Training/Model parameter, only if load_model = False
see training section for an explaination of the parameters  
'''
min_count=5
window=10
vector_size=300
sample=6e-5 
negative=5
workers=cores-1
dm=0
dbow_words=1
alpha=0.03 
min_alpha=0.0007 

epochs = 5

np.random.seed(0) #set random seed

In [3]:
class Corpus(object):
    def __init__(self, file_path):
        self.file_path = file_path
    
    """An interator that yields sentences (lists of str)."""
    def __iter__(self):
        with open(self.file_path) as csv_file:
            csv_reader = csv.reader(csv_file)
            firstline = True
            i = 0
            for line in csv_reader:
                if firstline:
                    firstline = False
                    continue
                try:
                    text = line[2]
                except IndexError:
                    continue
                yield gensim.utils.simple_preprocess(text)

In [4]:
corpus = Corpus(DATA_PREPROCESSED / 'needs_sentences_filtered.csv')

In [5]:
model = Word2Vec(sentences=corpus, size=20, window = 5, negative=10, sg=1)

INFO - 11:22:49: collecting all words and their counts
INFO - 11:22:49: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
INFO - 11:22:51: PROGRESS: at sentence #10000, processed 1105922 words, keeping 6609 word types
INFO - 11:22:52: PROGRESS: at sentence #20000, processed 1575295 words, keeping 7977 word types
INFO - 11:22:53: PROGRESS: at sentence #30000, processed 2130525 words, keeping 9199 word types
INFO - 11:22:53: PROGRESS: at sentence #40000, processed 2629710 words, keeping 10159 word types
INFO - 11:22:54: PROGRESS: at sentence #50000, processed 3019211 words, keeping 11309 word types
INFO - 11:22:55: PROGRESS: at sentence #60000, processed 3829862 words, keeping 12440 word types
INFO - 11:22:56: PROGRESS: at sentence #70000, processed 4270707 words, keeping 13434 word types
INFO - 11:22:56: PROGRESS: at sentence #80000, processed 4563781 words, keeping 14573 word types
INFO - 11:22:57: PROGRESS: at sentence #90000, processed 5052566 words, keeping 16013 wor

INFO - 11:23:59: PROGRESS: at sentence #830000, processed 44808293 words, keeping 49659 word types
INFO - 11:24:00: PROGRESS: at sentence #840000, processed 45431403 words, keeping 49993 word types
INFO - 11:24:02: PROGRESS: at sentence #850000, processed 46456091 words, keeping 50462 word types
INFO - 11:24:02: PROGRESS: at sentence #860000, processed 47085186 words, keeping 50747 word types
INFO - 11:24:03: PROGRESS: at sentence #870000, processed 47601685 words, keeping 50989 word types
INFO - 11:24:04: PROGRESS: at sentence #880000, processed 48105935 words, keeping 51259 word types
INFO - 11:24:04: PROGRESS: at sentence #890000, processed 48360930 words, keeping 51451 word types
INFO - 11:24:04: collected 51451 word types from a corpus of 48361223 raw words and 890028 sentences
INFO - 11:24:04: Loading a fresh vocabulary
INFO - 11:24:05: effective_min_count=5 retains 34556 unique words (67% of original 51451, drops 16895)
INFO - 11:24:05: effective_min_count=5 leaves 48322077 word

INFO - 11:25:25: EPOCH 1 - PROGRESS: at 31.69% examples, 151320 words/s, in_qsize 5, out_qsize 0
INFO - 11:25:26: EPOCH 1 - PROGRESS: at 32.02% examples, 151034 words/s, in_qsize 5, out_qsize 0
INFO - 11:25:27: EPOCH 1 - PROGRESS: at 32.13% examples, 150804 words/s, in_qsize 5, out_qsize 0
INFO - 11:25:28: EPOCH 1 - PROGRESS: at 32.29% examples, 150901 words/s, in_qsize 5, out_qsize 0
INFO - 11:25:29: EPOCH 1 - PROGRESS: at 32.48% examples, 151095 words/s, in_qsize 5, out_qsize 0
INFO - 11:25:30: EPOCH 1 - PROGRESS: at 32.72% examples, 151402 words/s, in_qsize 5, out_qsize 0
INFO - 11:25:31: EPOCH 1 - PROGRESS: at 32.97% examples, 151529 words/s, in_qsize 5, out_qsize 0
INFO - 11:25:32: EPOCH 1 - PROGRESS: at 33.23% examples, 151492 words/s, in_qsize 5, out_qsize 0
INFO - 11:25:33: EPOCH 1 - PROGRESS: at 33.47% examples, 151108 words/s, in_qsize 5, out_qsize 0
INFO - 11:25:34: EPOCH 1 - PROGRESS: at 33.77% examples, 151045 words/s, in_qsize 5, out_qsize 0
INFO - 11:25:35: EPOCH 1 - PRO

INFO - 11:26:53: EPOCH 1 - PROGRESS: at 66.66% examples, 148377 words/s, in_qsize 5, out_qsize 0
INFO - 11:26:54: EPOCH 1 - PROGRESS: at 67.09% examples, 148320 words/s, in_qsize 5, out_qsize 0
INFO - 11:26:55: EPOCH 1 - PROGRESS: at 67.70% examples, 148263 words/s, in_qsize 6, out_qsize 0
INFO - 11:26:56: EPOCH 1 - PROGRESS: at 68.34% examples, 148217 words/s, in_qsize 5, out_qsize 0
INFO - 11:26:57: EPOCH 1 - PROGRESS: at 68.59% examples, 148333 words/s, in_qsize 6, out_qsize 0
INFO - 11:26:58: EPOCH 1 - PROGRESS: at 69.12% examples, 148339 words/s, in_qsize 5, out_qsize 0
INFO - 11:26:59: EPOCH 1 - PROGRESS: at 69.42% examples, 148226 words/s, in_qsize 5, out_qsize 0
INFO - 11:27:00: EPOCH 1 - PROGRESS: at 69.82% examples, 148115 words/s, in_qsize 5, out_qsize 0
INFO - 11:27:01: EPOCH 1 - PROGRESS: at 70.32% examples, 148074 words/s, in_qsize 6, out_qsize 0
INFO - 11:27:02: EPOCH 1 - PROGRESS: at 70.69% examples, 148037 words/s, in_qsize 6, out_qsize 0
INFO - 11:27:03: EPOCH 1 - PRO

INFO - 11:28:20: worker thread finished; awaiting finish of 2 more threads
INFO - 11:28:21: worker thread finished; awaiting finish of 1 more threads
INFO - 11:28:21: worker thread finished; awaiting finish of 0 more threads
INFO - 11:28:21: EPOCH - 1 : training on 48361223 raw words (36637299 effective words) took 248.9s, 147172 effective words/s
INFO - 11:28:22: EPOCH 2 - PROGRESS: at 0.13% examples, 129994 words/s, in_qsize 5, out_qsize 0
INFO - 11:28:23: EPOCH 2 - PROGRESS: at 0.26% examples, 131519 words/s, in_qsize 5, out_qsize 0
INFO - 11:28:24: EPOCH 2 - PROGRESS: at 0.41% examples, 132312 words/s, in_qsize 4, out_qsize 0
INFO - 11:28:25: EPOCH 2 - PROGRESS: at 0.59% examples, 129727 words/s, in_qsize 5, out_qsize 0
INFO - 11:28:26: EPOCH 2 - PROGRESS: at 0.81% examples, 130574 words/s, in_qsize 4, out_qsize 0
INFO - 11:28:27: EPOCH 2 - PROGRESS: at 1.07% examples, 131187 words/s, in_qsize 5, out_qsize 0
INFO - 11:28:28: EPOCH 2 - PROGRESS: at 1.39% examples, 129573 words/s, in

INFO - 11:29:47: EPOCH 2 - PROGRESS: at 33.47% examples, 141838 words/s, in_qsize 5, out_qsize 0
INFO - 11:29:48: EPOCH 2 - PROGRESS: at 33.74% examples, 141744 words/s, in_qsize 5, out_qsize 0
INFO - 11:29:49: EPOCH 2 - PROGRESS: at 34.05% examples, 141696 words/s, in_qsize 5, out_qsize 0
INFO - 11:29:50: EPOCH 2 - PROGRESS: at 34.40% examples, 141586 words/s, in_qsize 5, out_qsize 1
INFO - 11:29:51: EPOCH 2 - PROGRESS: at 34.81% examples, 141665 words/s, in_qsize 5, out_qsize 0
INFO - 11:29:52: EPOCH 2 - PROGRESS: at 35.28% examples, 141631 words/s, in_qsize 6, out_qsize 0
INFO - 11:29:53: EPOCH 2 - PROGRESS: at 35.81% examples, 141580 words/s, in_qsize 5, out_qsize 0
INFO - 11:29:54: EPOCH 2 - PROGRESS: at 36.02% examples, 141531 words/s, in_qsize 6, out_qsize 0
INFO - 11:29:55: EPOCH 2 - PROGRESS: at 36.14% examples, 141434 words/s, in_qsize 6, out_qsize 0
INFO - 11:29:57: EPOCH 2 - PROGRESS: at 36.28% examples, 141382 words/s, in_qsize 6, out_qsize 0
INFO - 11:29:58: EPOCH 2 - PRO

INFO - 11:31:15: EPOCH 2 - PROGRESS: at 72.66% examples, 149360 words/s, in_qsize 5, out_qsize 0
INFO - 11:31:16: EPOCH 2 - PROGRESS: at 73.05% examples, 149658 words/s, in_qsize 5, out_qsize 0
INFO - 11:31:17: EPOCH 2 - PROGRESS: at 73.59% examples, 149976 words/s, in_qsize 5, out_qsize 0
INFO - 11:31:18: EPOCH 2 - PROGRESS: at 74.31% examples, 150186 words/s, in_qsize 6, out_qsize 0
INFO - 11:31:19: EPOCH 2 - PROGRESS: at 75.20% examples, 150988 words/s, in_qsize 4, out_qsize 1
INFO - 11:31:20: EPOCH 2 - PROGRESS: at 75.53% examples, 151775 words/s, in_qsize 5, out_qsize 0
INFO - 11:31:21: EPOCH 2 - PROGRESS: at 76.01% examples, 152679 words/s, in_qsize 2, out_qsize 0
INFO - 11:31:22: EPOCH 2 - PROGRESS: at 76.65% examples, 153511 words/s, in_qsize 6, out_qsize 0
INFO - 11:31:23: EPOCH 2 - PROGRESS: at 77.32% examples, 153945 words/s, in_qsize 4, out_qsize 1
INFO - 11:31:24: EPOCH 2 - PROGRESS: at 78.01% examples, 154059 words/s, in_qsize 5, out_qsize 0
INFO - 11:31:25: EPOCH 2 - PRO

INFO - 11:32:39: EPOCH 3 - PROGRESS: at 14.61% examples, 157667 words/s, in_qsize 5, out_qsize 0
INFO - 11:32:40: EPOCH 3 - PROGRESS: at 15.24% examples, 157696 words/s, in_qsize 5, out_qsize 0
INFO - 11:32:42: EPOCH 3 - PROGRESS: at 16.06% examples, 157331 words/s, in_qsize 5, out_qsize 0
INFO - 11:32:43: EPOCH 3 - PROGRESS: at 16.67% examples, 156750 words/s, in_qsize 5, out_qsize 0
INFO - 11:32:44: EPOCH 3 - PROGRESS: at 17.26% examples, 156449 words/s, in_qsize 5, out_qsize 0
INFO - 11:32:45: EPOCH 3 - PROGRESS: at 17.89% examples, 155764 words/s, in_qsize 5, out_qsize 0
INFO - 11:32:46: EPOCH 3 - PROGRESS: at 18.32% examples, 155353 words/s, in_qsize 6, out_qsize 0
INFO - 11:32:47: EPOCH 3 - PROGRESS: at 18.51% examples, 154640 words/s, in_qsize 5, out_qsize 0
INFO - 11:32:48: EPOCH 3 - PROGRESS: at 18.81% examples, 154502 words/s, in_qsize 5, out_qsize 0
INFO - 11:32:49: EPOCH 3 - PROGRESS: at 19.17% examples, 154094 words/s, in_qsize 5, out_qsize 0
INFO - 11:32:50: EPOCH 3 - PRO

INFO - 11:34:07: EPOCH 3 - PROGRESS: at 55.02% examples, 162839 words/s, in_qsize 5, out_qsize 0
INFO - 11:34:08: EPOCH 3 - PROGRESS: at 55.84% examples, 163273 words/s, in_qsize 4, out_qsize 1
INFO - 11:34:09: EPOCH 3 - PROGRESS: at 56.77% examples, 163762 words/s, in_qsize 5, out_qsize 0
INFO - 11:34:10: EPOCH 3 - PROGRESS: at 57.40% examples, 164104 words/s, in_qsize 4, out_qsize 1
INFO - 11:34:11: EPOCH 3 - PROGRESS: at 58.18% examples, 164383 words/s, in_qsize 5, out_qsize 0
INFO - 11:34:12: EPOCH 3 - PROGRESS: at 58.76% examples, 164800 words/s, in_qsize 6, out_qsize 0
INFO - 11:34:13: EPOCH 3 - PROGRESS: at 58.96% examples, 165159 words/s, in_qsize 5, out_qsize 0
INFO - 11:34:14: EPOCH 3 - PROGRESS: at 59.20% examples, 165495 words/s, in_qsize 5, out_qsize 0
INFO - 11:34:15: EPOCH 3 - PROGRESS: at 59.49% examples, 165856 words/s, in_qsize 5, out_qsize 0
INFO - 11:34:16: EPOCH 3 - PROGRESS: at 59.86% examples, 166184 words/s, in_qsize 5, out_qsize 0
INFO - 11:34:17: EPOCH 3 - PRO

INFO - 11:35:35: EPOCH 3 - PROGRESS: at 96.47% examples, 171243 words/s, in_qsize 5, out_qsize 0
INFO - 11:35:36: EPOCH 3 - PROGRESS: at 96.91% examples, 171124 words/s, in_qsize 5, out_qsize 0
INFO - 11:35:37: EPOCH 3 - PROGRESS: at 97.33% examples, 170942 words/s, in_qsize 5, out_qsize 0
INFO - 11:35:38: EPOCH 3 - PROGRESS: at 97.74% examples, 170767 words/s, in_qsize 6, out_qsize 0
INFO - 11:35:39: EPOCH 3 - PROGRESS: at 97.99% examples, 170652 words/s, in_qsize 5, out_qsize 0
INFO - 11:35:40: EPOCH 3 - PROGRESS: at 98.46% examples, 170560 words/s, in_qsize 6, out_qsize 0
INFO - 11:35:41: EPOCH 3 - PROGRESS: at 99.20% examples, 170503 words/s, in_qsize 4, out_qsize 0
INFO - 11:35:42: worker thread finished; awaiting finish of 2 more threads
INFO - 11:35:42: worker thread finished; awaiting finish of 1 more threads
INFO - 11:35:42: worker thread finished; awaiting finish of 0 more threads
INFO - 11:35:42: EPOCH - 3 : training on 48361223 raw words (36637771 effective words) took 214.

INFO - 11:37:01: EPOCH 4 - PROGRESS: at 31.82% examples, 140232 words/s, in_qsize 5, out_qsize 0
INFO - 11:37:02: EPOCH 4 - PROGRESS: at 32.05% examples, 140350 words/s, in_qsize 5, out_qsize 0
INFO - 11:37:03: EPOCH 4 - PROGRESS: at 32.20% examples, 140545 words/s, in_qsize 5, out_qsize 0
INFO - 11:37:04: EPOCH 4 - PROGRESS: at 32.38% examples, 140933 words/s, in_qsize 6, out_qsize 0
INFO - 11:37:05: EPOCH 4 - PROGRESS: at 32.59% examples, 141196 words/s, in_qsize 5, out_qsize 0
INFO - 11:37:06: EPOCH 4 - PROGRESS: at 32.81% examples, 141378 words/s, in_qsize 5, out_qsize 0
INFO - 11:37:07: EPOCH 4 - PROGRESS: at 33.04% examples, 141521 words/s, in_qsize 6, out_qsize 0
INFO - 11:37:08: EPOCH 4 - PROGRESS: at 33.34% examples, 141861 words/s, in_qsize 5, out_qsize 0
INFO - 11:37:09: EPOCH 4 - PROGRESS: at 33.64% examples, 141948 words/s, in_qsize 5, out_qsize 0
INFO - 11:37:10: EPOCH 4 - PROGRESS: at 34.02% examples, 142296 words/s, in_qsize 6, out_qsize 0
INFO - 11:37:11: EPOCH 4 - PRO

INFO - 11:38:28: EPOCH 4 - PROGRESS: at 70.10% examples, 150417 words/s, in_qsize 5, out_qsize 0
INFO - 11:38:29: EPOCH 4 - PROGRESS: at 70.60% examples, 150438 words/s, in_qsize 5, out_qsize 0
INFO - 11:38:30: EPOCH 4 - PROGRESS: at 71.36% examples, 150536 words/s, in_qsize 5, out_qsize 0
INFO - 11:38:32: EPOCH 4 - PROGRESS: at 72.07% examples, 150614 words/s, in_qsize 4, out_qsize 1
INFO - 11:38:33: EPOCH 4 - PROGRESS: at 72.22% examples, 150699 words/s, in_qsize 5, out_qsize 0
INFO - 11:38:34: EPOCH 4 - PROGRESS: at 72.44% examples, 150777 words/s, in_qsize 5, out_qsize 0
INFO - 11:38:35: EPOCH 4 - PROGRESS: at 72.70% examples, 150852 words/s, in_qsize 5, out_qsize 0
INFO - 11:38:36: EPOCH 4 - PROGRESS: at 73.20% examples, 151391 words/s, in_qsize 5, out_qsize 0
INFO - 11:38:37: EPOCH 4 - PROGRESS: at 74.12% examples, 152188 words/s, in_qsize 5, out_qsize 0
INFO - 11:38:38: EPOCH 4 - PROGRESS: at 75.15% examples, 152985 words/s, in_qsize 5, out_qsize 0
INFO - 11:38:39: EPOCH 4 - PRO

INFO - 11:39:54: EPOCH 5 - PROGRESS: at 6.26% examples, 136648 words/s, in_qsize 5, out_qsize 0
INFO - 11:39:55: EPOCH 5 - PROGRESS: at 6.63% examples, 137390 words/s, in_qsize 6, out_qsize 0
INFO - 11:39:56: EPOCH 5 - PROGRESS: at 7.14% examples, 137321 words/s, in_qsize 4, out_qsize 2
INFO - 11:39:57: EPOCH 5 - PROGRESS: at 7.64% examples, 136856 words/s, in_qsize 5, out_qsize 0
INFO - 11:39:58: EPOCH 5 - PROGRESS: at 8.05% examples, 136655 words/s, in_qsize 5, out_qsize 0
INFO - 11:39:59: EPOCH 5 - PROGRESS: at 8.80% examples, 136758 words/s, in_qsize 4, out_qsize 1
INFO - 11:40:00: EPOCH 5 - PROGRESS: at 9.34% examples, 136751 words/s, in_qsize 6, out_qsize 0
INFO - 11:40:01: EPOCH 5 - PROGRESS: at 9.67% examples, 136364 words/s, in_qsize 5, out_qsize 0
INFO - 11:40:02: EPOCH 5 - PROGRESS: at 10.18% examples, 136297 words/s, in_qsize 5, out_qsize 0
INFO - 11:40:03: EPOCH 5 - PROGRESS: at 11.04% examples, 136800 words/s, in_qsize 5, out_qsize 0
INFO - 11:40:04: EPOCH 5 - PROGRESS: a

INFO - 11:41:22: EPOCH 5 - PROGRESS: at 37.72% examples, 133844 words/s, in_qsize 5, out_qsize 0
INFO - 11:41:23: EPOCH 5 - PROGRESS: at 37.98% examples, 133549 words/s, in_qsize 5, out_qsize 0
INFO - 11:41:24: EPOCH 5 - PROGRESS: at 38.24% examples, 133208 words/s, in_qsize 5, out_qsize 0
INFO - 11:41:25: EPOCH 5 - PROGRESS: at 38.53% examples, 132998 words/s, in_qsize 5, out_qsize 0
INFO - 11:41:27: EPOCH 5 - PROGRESS: at 38.85% examples, 132803 words/s, in_qsize 5, out_qsize 0
INFO - 11:41:28: EPOCH 5 - PROGRESS: at 39.20% examples, 132593 words/s, in_qsize 5, out_qsize 0
INFO - 11:41:29: EPOCH 5 - PROGRESS: at 39.67% examples, 132409 words/s, in_qsize 6, out_qsize 0
INFO - 11:41:30: EPOCH 5 - PROGRESS: at 40.03% examples, 132260 words/s, in_qsize 6, out_qsize 0
INFO - 11:41:31: EPOCH 5 - PROGRESS: at 40.92% examples, 132180 words/s, in_qsize 5, out_qsize 0
INFO - 11:41:32: EPOCH 5 - PROGRESS: at 41.45% examples, 132107 words/s, in_qsize 5, out_qsize 0
INFO - 11:41:33: EPOCH 5 - PRO

INFO - 11:42:51: EPOCH 5 - PROGRESS: at 71.40% examples, 128827 words/s, in_qsize 5, out_qsize 0
INFO - 11:42:52: EPOCH 5 - PROGRESS: at 72.06% examples, 128874 words/s, in_qsize 5, out_qsize 0
INFO - 11:42:53: EPOCH 5 - PROGRESS: at 72.18% examples, 128806 words/s, in_qsize 5, out_qsize 0
INFO - 11:42:55: EPOCH 5 - PROGRESS: at 72.33% examples, 128754 words/s, in_qsize 6, out_qsize 0
INFO - 11:42:56: EPOCH 5 - PROGRESS: at 72.50% examples, 128708 words/s, in_qsize 5, out_qsize 0
INFO - 11:42:57: EPOCH 5 - PROGRESS: at 72.69% examples, 128654 words/s, in_qsize 5, out_qsize 0
INFO - 11:42:58: EPOCH 5 - PROGRESS: at 72.91% examples, 128534 words/s, in_qsize 6, out_qsize 0
INFO - 11:42:59: EPOCH 5 - PROGRESS: at 73.15% examples, 128454 words/s, in_qsize 6, out_qsize 0
INFO - 11:43:00: EPOCH 5 - PROGRESS: at 73.46% examples, 128409 words/s, in_qsize 5, out_qsize 0
INFO - 11:43:01: EPOCH 5 - PROGRESS: at 73.87% examples, 128361 words/s, in_qsize 6, out_qsize 0
INFO - 11:43:02: EPOCH 5 - PRO

INFO - 11:44:20: EPOCH 5 - PROGRESS: at 97.71% examples, 125792 words/s, in_qsize 5, out_qsize 0
INFO - 11:44:21: EPOCH 5 - PROGRESS: at 97.84% examples, 125693 words/s, in_qsize 5, out_qsize 0
INFO - 11:44:22: EPOCH 5 - PROGRESS: at 98.09% examples, 125632 words/s, in_qsize 4, out_qsize 1
INFO - 11:44:23: EPOCH 5 - PROGRESS: at 98.52% examples, 125644 words/s, in_qsize 5, out_qsize 0
INFO - 11:44:24: EPOCH 5 - PROGRESS: at 99.20% examples, 125709 words/s, in_qsize 6, out_qsize 0
INFO - 11:44:25: worker thread finished; awaiting finish of 2 more threads
INFO - 11:44:25: worker thread finished; awaiting finish of 1 more threads
INFO - 11:44:25: worker thread finished; awaiting finish of 0 more threads
INFO - 11:44:25: EPOCH - 5 : training on 48361223 raw words (36637258 effective words) took 291.3s, 125789 effective words/s
INFO - 11:44:25: training on a 241806115 raw words (183181277 effective words) took 1213.5s, 150947 effective words/s


In [8]:
model.wv.save_word2vec_format(save_path, binary=True)

INFO - 11:48:02: storing 34556x20 projection weights into ../models/w2v-amz.bin
