# Word2Vec in Python 

In [18]:
%load_ext autoreload

In [29]:
%autoreload 2
from gensim.models.word2vec import Word2Vec 
import logging
from utils_parallel import *
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

## Model Hyperparameters

In [48]:
SIZE = 300
WINDOW = 20
MIN_COUNT = 10
WORKERS = 16
EPOCH = 4
SEED = 42

In [15]:
!mkdir ~/models

mkdir: cannot create directory ‘/home/kaspar/models’: File exists


In [23]:
!mkdir ~/processed

mkdir: cannot create directory ‘/home/kaspar/processed’: File exists


In [42]:
!ls

Anaconda3-2019.10-Linux-x86_64.sh  ResearchDrive  models     processed
KB-RiR				   anaconda3	  nltk_data


## Data Parameters

In [44]:
START_YEAR = 1860
END_YEAR = 1870
ROOT = "/home/kaspar/ResearchDrive"
OUTPUT = "/home/kaspar/models/{}-{}.w2v.model".format(START_YEAR,END_YEAR)

## Training and saving a model

In [45]:
sentences = SentIterator(ROOT,date_range=(START_YEAR,END_YEAR),processed_path='/home/kaspar/processed',tokenized=False,n_jobs=-1)
%time sentences.prepareLines()

Processing zip files
{'/home/kaspar/ResearchDrive/1870-1879.zip', '/home/kaspar/ResearchDrive/1860-1869.zip'}
32651
728315
Zip files processed and stored in /home/kaspar/processed/1860-1870.txt
CPU times: user 1min 43s, sys: 9.95 s, total: 1min 53s
Wall time: 2min 28s


In [49]:
model = Word2Vec(size=SIZE, window=WINDOW, min_count=MIN_COUNT, workers=WORKERS, seed=SEED)
%time model.build_vocab(sentences=sentences)
total_examples = model.corpus_count
print(total_examples)

2019-11-22 16:33:02,285 : INFO : collecting all words and their counts
2019-11-22 16:33:02,285 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2019-11-22 16:33:03,130 : INFO : PROGRESS: at sentence #10000, processed 4811089 words, keeping 259368 word types
2019-11-22 16:33:04,145 : INFO : PROGRESS: at sentence #20000, processed 10381032 words, keeping 470004 word types
2019-11-22 16:33:05,280 : INFO : PROGRESS: at sentence #30000, processed 16556085 words, keeping 655947 word types
2019-11-22 16:33:06,284 : INFO : PROGRESS: at sentence #40000, processed 21949117 words, keeping 796887 word types
2019-11-22 16:33:07,197 : INFO : PROGRESS: at sentence #50000, processed 26987925 words, keeping 905294 word types
2019-11-22 16:33:08,134 : INFO : PROGRESS: at sentence #60000, processed 32085974 words, keeping 1016361 word types
2019-11-22 16:33:09,091 : INFO : PROGRESS: at sentence #70000, processed 37291675 words, keeping 1116129 word types
2019-11-22 16:33:10,144 

2019-11-22 16:34:18,154 : INFO : PROGRESS: at sentence #700000, processed 400414317 words, keeping 6012235 word types
2019-11-22 16:34:19,303 : INFO : PROGRESS: at sentence #710000, processed 406482053 words, keeping 6084291 word types
2019-11-22 16:34:20,488 : INFO : PROGRESS: at sentence #720000, processed 412705160 words, keeping 6156661 word types
2019-11-22 16:34:21,643 : INFO : PROGRESS: at sentence #730000, processed 418779590 words, keeping 6238327 word types
2019-11-22 16:34:22,803 : INFO : PROGRESS: at sentence #740000, processed 424902388 words, keeping 6309736 word types
2019-11-22 16:34:23,952 : INFO : PROGRESS: at sentence #750000, processed 430908418 words, keeping 6380232 word types
2019-11-22 16:34:25,200 : INFO : PROGRESS: at sentence #760000, processed 437482851 words, keeping 6450518 word types
2019-11-22 16:34:25,292 : INFO : collected 6454185 word types from a corpus of 437966730 raw words and 760966 sentences
2019-11-22 16:34:25,293 : INFO : Loading a fresh vocab

CPU times: user 2min 18s, sys: 1.3 s, total: 2min 19s
Wall time: 2min 19s
760966


In [50]:
%time model.train(sentences=sentences, total_examples=total_examples, epochs=EPOCH)

2019-11-22 16:35:21,997 : INFO : training model with 16 workers on 350091 vocabulary and 300 features, using sg=0 hs=0 sample=0.001 negative=5 window=20
2019-11-22 16:35:23,011 : INFO : EPOCH 1 - PROGRESS: at 0.29% examples, 831676 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:35:24,043 : INFO : EPOCH 1 - PROGRESS: at 0.68% examples, 889381 words/s, in_qsize 28, out_qsize 3
2019-11-22 16:35:25,058 : INFO : EPOCH 1 - PROGRESS: at 1.16% examples, 926365 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:35:26,063 : INFO : EPOCH 1 - PROGRESS: at 1.62% examples, 928607 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:35:27,067 : INFO : EPOCH 1 - PROGRESS: at 2.08% examples, 941257 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:35:28,068 : INFO : EPOCH 1 - PROGRESS: at 2.39% examples, 948895 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:35:29,072 : INFO : EPOCH 1 - PROGRESS: at 2.64% examples, 948403 words/s, in_qsize 27, out_qsize 4
2019-11-22 16:35:30,082 : INFO : EPOCH 1 - PROGRESS: at 

2019-11-22 16:36:34,512 : INFO : EPOCH 1 - PROGRESS: at 26.01% examples, 974214 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:36:35,514 : INFO : EPOCH 1 - PROGRESS: at 26.41% examples, 974640 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:36:36,528 : INFO : EPOCH 1 - PROGRESS: at 26.72% examples, 974551 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:36:37,536 : INFO : EPOCH 1 - PROGRESS: at 27.10% examples, 974632 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:36:38,552 : INFO : EPOCH 1 - PROGRESS: at 27.42% examples, 974536 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:36:39,553 : INFO : EPOCH 1 - PROGRESS: at 27.79% examples, 974637 words/s, in_qsize 28, out_qsize 3
2019-11-22 16:36:40,560 : INFO : EPOCH 1 - PROGRESS: at 28.13% examples, 974723 words/s, in_qsize 28, out_qsize 3
2019-11-22 16:36:41,562 : INFO : EPOCH 1 - PROGRESS: at 28.37% examples, 974703 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:36:42,565 : INFO : EPOCH 1 - PROGRESS: at 28.72% examples, 974823 words/s,

2019-11-22 16:37:46,987 : INFO : EPOCH 1 - PROGRESS: at 50.79% examples, 974703 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:37:47,988 : INFO : EPOCH 1 - PROGRESS: at 51.18% examples, 974545 words/s, in_qsize 27, out_qsize 4
2019-11-22 16:37:48,994 : INFO : EPOCH 1 - PROGRESS: at 51.57% examples, 974758 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:37:50,019 : INFO : EPOCH 1 - PROGRESS: at 51.88% examples, 974518 words/s, in_qsize 25, out_qsize 6
2019-11-22 16:37:51,022 : INFO : EPOCH 1 - PROGRESS: at 52.35% examples, 974553 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:37:52,027 : INFO : EPOCH 1 - PROGRESS: at 52.67% examples, 974480 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:37:53,032 : INFO : EPOCH 1 - PROGRESS: at 53.11% examples, 974589 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:37:54,040 : INFO : EPOCH 1 - PROGRESS: at 53.45% examples, 974618 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:37:55,046 : INFO : EPOCH 1 - PROGRESS: at 53.88% examples, 974626 words/s,

2019-11-22 16:38:59,685 : INFO : EPOCH 1 - PROGRESS: at 75.98% examples, 974509 words/s, in_qsize 32, out_qsize 0
2019-11-22 16:39:00,688 : INFO : EPOCH 1 - PROGRESS: at 76.25% examples, 974642 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:39:01,702 : INFO : EPOCH 1 - PROGRESS: at 76.62% examples, 974583 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:39:02,702 : INFO : EPOCH 1 - PROGRESS: at 76.97% examples, 974570 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:39:03,706 : INFO : EPOCH 1 - PROGRESS: at 77.27% examples, 974595 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:39:04,708 : INFO : EPOCH 1 - PROGRESS: at 77.61% examples, 974613 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:39:05,709 : INFO : EPOCH 1 - PROGRESS: at 77.92% examples, 974684 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:39:06,717 : INFO : EPOCH 1 - PROGRESS: at 78.24% examples, 974548 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:39:07,727 : INFO : EPOCH 1 - PROGRESS: at 78.56% examples, 974600 words/s,

2019-11-22 16:40:12,278 : INFO : EPOCH 1 - PROGRESS: at 98.62% examples, 975244 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:40:13,278 : INFO : EPOCH 1 - PROGRESS: at 98.92% examples, 975300 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:40:14,286 : INFO : EPOCH 1 - PROGRESS: at 99.17% examples, 975346 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:40:15,298 : INFO : EPOCH 1 - PROGRESS: at 99.50% examples, 975383 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:40:16,303 : INFO : EPOCH 1 - PROGRESS: at 99.81% examples, 975334 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:40:16,721 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-11-22 16:40:16,731 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-11-22 16:40:16,742 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-11-22 16:40:16,749 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-11-22 16:40:16,750 : INFO : worker thread finished; await

2019-11-22 16:41:12,187 : INFO : EPOCH 2 - PROGRESS: at 19.94% examples, 974585 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:41:13,202 : INFO : EPOCH 2 - PROGRESS: at 20.28% examples, 974245 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:41:14,207 : INFO : EPOCH 2 - PROGRESS: at 20.70% examples, 974141 words/s, in_qsize 29, out_qsize 3
2019-11-22 16:41:15,217 : INFO : EPOCH 2 - PROGRESS: at 20.99% examples, 974246 words/s, in_qsize 31, out_qsize 2
2019-11-22 16:41:16,219 : INFO : EPOCH 2 - PROGRESS: at 21.40% examples, 973895 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:41:17,235 : INFO : EPOCH 2 - PROGRESS: at 21.72% examples, 974337 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:41:18,240 : INFO : EPOCH 2 - PROGRESS: at 22.05% examples, 974389 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:41:19,242 : INFO : EPOCH 2 - PROGRESS: at 22.40% examples, 974720 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:41:20,243 : INFO : EPOCH 2 - PROGRESS: at 22.69% examples, 975045 words/s,

2019-11-22 16:42:24,792 : INFO : EPOCH 2 - PROGRESS: at 44.85% examples, 973082 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:42:25,795 : INFO : EPOCH 2 - PROGRESS: at 45.19% examples, 973289 words/s, in_qsize 32, out_qsize 0
2019-11-22 16:42:26,806 : INFO : EPOCH 2 - PROGRESS: at 45.61% examples, 973383 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:42:27,806 : INFO : EPOCH 2 - PROGRESS: at 45.95% examples, 973384 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:42:28,813 : INFO : EPOCH 2 - PROGRESS: at 46.41% examples, 973367 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:42:29,818 : INFO : EPOCH 2 - PROGRESS: at 46.73% examples, 973327 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:42:30,826 : INFO : EPOCH 2 - PROGRESS: at 47.04% examples, 973393 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:42:31,836 : INFO : EPOCH 2 - PROGRESS: at 47.44% examples, 973344 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:42:32,836 : INFO : EPOCH 2 - PROGRESS: at 47.74% examples, 973207 words/s,

2019-11-22 16:43:37,363 : INFO : EPOCH 2 - PROGRESS: at 69.87% examples, 971557 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:43:38,364 : INFO : EPOCH 2 - PROGRESS: at 70.19% examples, 971653 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:43:39,365 : INFO : EPOCH 2 - PROGRESS: at 70.57% examples, 971584 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:43:40,385 : INFO : EPOCH 2 - PROGRESS: at 70.94% examples, 971589 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:43:41,387 : INFO : EPOCH 2 - PROGRESS: at 71.30% examples, 971687 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:43:42,388 : INFO : EPOCH 2 - PROGRESS: at 71.64% examples, 971526 words/s, in_qsize 28, out_qsize 3
2019-11-22 16:43:43,403 : INFO : EPOCH 2 - PROGRESS: at 71.95% examples, 971470 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:43:44,422 : INFO : EPOCH 2 - PROGRESS: at 72.36% examples, 971473 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:43:45,429 : INFO : EPOCH 2 - PROGRESS: at 72.65% examples, 971440 words/s,

2019-11-22 16:44:49,961 : INFO : EPOCH 2 - PROGRESS: at 92.82% examples, 971607 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:44:50,969 : INFO : EPOCH 2 - PROGRESS: at 93.11% examples, 971582 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:44:51,981 : INFO : EPOCH 2 - PROGRESS: at 93.48% examples, 971642 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:44:52,993 : INFO : EPOCH 2 - PROGRESS: at 93.76% examples, 971769 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:44:54,017 : INFO : EPOCH 2 - PROGRESS: at 94.16% examples, 971794 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:44:55,022 : INFO : EPOCH 2 - PROGRESS: at 94.47% examples, 971810 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:44:56,031 : INFO : EPOCH 2 - PROGRESS: at 94.74% examples, 971738 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:44:57,037 : INFO : EPOCH 2 - PROGRESS: at 95.11% examples, 971870 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:44:58,038 : INFO : EPOCH 2 - PROGRESS: at 95.47% examples, 971850 words/s,

2019-11-22 16:45:49,849 : INFO : EPOCH 3 - PROGRESS: at 13.42% examples, 976515 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:45:50,855 : INFO : EPOCH 3 - PROGRESS: at 13.77% examples, 975600 words/s, in_qsize 27, out_qsize 4
2019-11-22 16:45:51,864 : INFO : EPOCH 3 - PROGRESS: at 14.13% examples, 976282 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:45:52,876 : INFO : EPOCH 3 - PROGRESS: at 14.49% examples, 975799 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:45:53,878 : INFO : EPOCH 3 - PROGRESS: at 14.83% examples, 975680 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:45:54,881 : INFO : EPOCH 3 - PROGRESS: at 15.21% examples, 975576 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:45:55,891 : INFO : EPOCH 3 - PROGRESS: at 15.56% examples, 975609 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:45:56,902 : INFO : EPOCH 3 - PROGRESS: at 15.94% examples, 975073 words/s, in_qsize 29, out_qsize 3
2019-11-22 16:45:57,906 : INFO : EPOCH 3 - PROGRESS: at 16.30% examples, 975750 words/s,

2019-11-22 16:47:02,496 : INFO : EPOCH 3 - PROGRESS: at 38.44% examples, 976840 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:47:03,537 : INFO : EPOCH 3 - PROGRESS: at 38.78% examples, 976687 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:47:04,564 : INFO : EPOCH 3 - PROGRESS: at 39.16% examples, 976490 words/s, in_qsize 27, out_qsize 4
2019-11-22 16:47:05,578 : INFO : EPOCH 3 - PROGRESS: at 39.49% examples, 976595 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:47:06,580 : INFO : EPOCH 3 - PROGRESS: at 39.82% examples, 976392 words/s, in_qsize 28, out_qsize 3
2019-11-22 16:47:07,590 : INFO : EPOCH 3 - PROGRESS: at 40.18% examples, 976361 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:47:08,592 : INFO : EPOCH 3 - PROGRESS: at 40.47% examples, 976498 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:47:09,606 : INFO : EPOCH 3 - PROGRESS: at 40.83% examples, 976087 words/s, in_qsize 28, out_qsize 3
2019-11-22 16:47:10,608 : INFO : EPOCH 3 - PROGRESS: at 41.14% examples, 976055 words/s,

2019-11-22 16:48:15,139 : INFO : EPOCH 3 - PROGRESS: at 63.88% examples, 974639 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:48:16,140 : INFO : EPOCH 3 - PROGRESS: at 64.25% examples, 974514 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:48:17,149 : INFO : EPOCH 3 - PROGRESS: at 64.52% examples, 974308 words/s, in_qsize 28, out_qsize 3
2019-11-22 16:48:18,165 : INFO : EPOCH 3 - PROGRESS: at 64.83% examples, 974395 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:48:19,167 : INFO : EPOCH 3 - PROGRESS: at 65.20% examples, 974454 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:48:20,172 : INFO : EPOCH 3 - PROGRESS: at 65.50% examples, 974422 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:48:21,173 : INFO : EPOCH 3 - PROGRESS: at 65.92% examples, 974443 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:48:22,200 : INFO : EPOCH 3 - PROGRESS: at 66.30% examples, 974415 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:48:23,202 : INFO : EPOCH 3 - PROGRESS: at 66.69% examples, 974612 words/s,

2019-11-22 16:49:27,739 : INFO : EPOCH 3 - PROGRESS: at 87.31% examples, 974600 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:49:28,757 : INFO : EPOCH 3 - PROGRESS: at 87.57% examples, 974466 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:49:29,771 : INFO : EPOCH 3 - PROGRESS: at 87.84% examples, 974510 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:49:30,773 : INFO : EPOCH 3 - PROGRESS: at 88.16% examples, 974490 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:49:31,784 : INFO : EPOCH 3 - PROGRESS: at 88.36% examples, 974371 words/s, in_qsize 31, out_qsize 2
2019-11-22 16:49:32,797 : INFO : EPOCH 3 - PROGRESS: at 88.65% examples, 974374 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:49:33,810 : INFO : EPOCH 3 - PROGRESS: at 88.95% examples, 974455 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:49:34,812 : INFO : EPOCH 3 - PROGRESS: at 89.31% examples, 974471 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:49:35,835 : INFO : EPOCH 3 - PROGRESS: at 89.72% examples, 974486 words/s,

2019-11-22 16:50:26,537 : INFO : EPOCH 4 - PROGRESS: at 7.01% examples, 975567 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:50:27,545 : INFO : EPOCH 4 - PROGRESS: at 7.41% examples, 976190 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:50:28,560 : INFO : EPOCH 4 - PROGRESS: at 7.78% examples, 975865 words/s, in_qsize 28, out_qsize 3
2019-11-22 16:50:29,562 : INFO : EPOCH 4 - PROGRESS: at 8.20% examples, 976401 words/s, in_qsize 32, out_qsize 0
2019-11-22 16:50:30,579 : INFO : EPOCH 4 - PROGRESS: at 8.56% examples, 976894 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:50:31,579 : INFO : EPOCH 4 - PROGRESS: at 8.95% examples, 977184 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:50:32,599 : INFO : EPOCH 4 - PROGRESS: at 9.27% examples, 976715 words/s, in_qsize 28, out_qsize 3
2019-11-22 16:50:33,608 : INFO : EPOCH 4 - PROGRESS: at 9.60% examples, 977095 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:50:34,615 : INFO : EPOCH 4 - PROGRESS: at 9.99% examples, 977546 words/s, in_qsize

2019-11-22 16:51:39,122 : INFO : EPOCH 4 - PROGRESS: at 32.10% examples, 977133 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:51:40,122 : INFO : EPOCH 4 - PROGRESS: at 32.47% examples, 977462 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:51:41,133 : INFO : EPOCH 4 - PROGRESS: at 32.79% examples, 977126 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:51:42,138 : INFO : EPOCH 4 - PROGRESS: at 33.18% examples, 976883 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:51:43,151 : INFO : EPOCH 4 - PROGRESS: at 33.50% examples, 976684 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:51:44,154 : INFO : EPOCH 4 - PROGRESS: at 33.91% examples, 977018 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:51:45,160 : INFO : EPOCH 4 - PROGRESS: at 34.23% examples, 977051 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:51:46,163 : INFO : EPOCH 4 - PROGRESS: at 34.63% examples, 977068 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:51:47,169 : INFO : EPOCH 4 - PROGRESS: at 34.95% examples, 977015 words/s,

2019-11-22 16:52:51,712 : INFO : EPOCH 4 - PROGRESS: at 57.96% examples, 973653 words/s, in_qsize 27, out_qsize 5
2019-11-22 16:52:52,716 : INFO : EPOCH 4 - PROGRESS: at 58.24% examples, 973561 words/s, in_qsize 32, out_qsize 3
2019-11-22 16:52:53,716 : INFO : EPOCH 4 - PROGRESS: at 58.55% examples, 973498 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:52:54,736 : INFO : EPOCH 4 - PROGRESS: at 58.93% examples, 973521 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:52:55,761 : INFO : EPOCH 4 - PROGRESS: at 59.19% examples, 973196 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:52:56,769 : INFO : EPOCH 4 - PROGRESS: at 59.61% examples, 973257 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:52:57,793 : INFO : EPOCH 4 - PROGRESS: at 59.85% examples, 973185 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:52:58,820 : INFO : EPOCH 4 - PROGRESS: at 60.12% examples, 972949 words/s, in_qsize 28, out_qsize 3
2019-11-22 16:52:59,821 : INFO : EPOCH 4 - PROGRESS: at 60.51% examples, 973377 words/s,

2019-11-22 16:54:04,451 : INFO : EPOCH 4 - PROGRESS: at 81.85% examples, 972724 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:54:05,451 : INFO : EPOCH 4 - PROGRESS: at 82.12% examples, 972652 words/s, in_qsize 31, out_qsize 2
2019-11-22 16:54:06,453 : INFO : EPOCH 4 - PROGRESS: at 82.44% examples, 972703 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:54:07,455 : INFO : EPOCH 4 - PROGRESS: at 82.69% examples, 972812 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:54:08,455 : INFO : EPOCH 4 - PROGRESS: at 82.99% examples, 972566 words/s, in_qsize 30, out_qsize 2
2019-11-22 16:54:09,472 : INFO : EPOCH 4 - PROGRESS: at 83.24% examples, 972546 words/s, in_qsize 29, out_qsize 2
2019-11-22 16:54:10,486 : INFO : EPOCH 4 - PROGRESS: at 83.52% examples, 972460 words/s, in_qsize 30, out_qsize 1
2019-11-22 16:54:11,492 : INFO : EPOCH 4 - PROGRESS: at 83.86% examples, 972358 words/s, in_qsize 31, out_qsize 0
2019-11-22 16:54:12,502 : INFO : EPOCH 4 - PROGRESS: at 84.10% examples, 972295 words/s,

2019-11-22 16:55:03,130 : INFO : training on a 1751866920 raw words (1150256501 effective words) took 1181.1s, 973859 effective words/s


CPU times: user 2h 57min 19s, sys: 36.9 s, total: 2h 57min 56s
Wall time: 19min 41s


(1150256501, 1751866920)

## Save model

In [51]:
model.save(OUTPUT)

2019-11-22 16:57:10,043 : INFO : saving Word2Vec object under /home/kaspar/models/1860-1870.w2v.model, separately None
2019-11-22 16:57:10,044 : INFO : storing np array 'vectors' to /home/kaspar/models/1860-1870.w2v.model.wv.vectors.npy
2019-11-22 16:57:10,277 : INFO : not storing attribute vectors_norm
2019-11-22 16:57:10,278 : INFO : storing np array 'syn1neg' to /home/kaspar/models/1860-1870.w2v.model.trainables.syn1neg.npy
2019-11-22 16:57:10,505 : INFO : not storing attribute cum_table
2019-11-22 16:57:11,043 : INFO : saved /home/kaspar/models/1860-1870.w2v.model
