In [1]:
import gensim
import logging
import os
import re
import string

In [2]:
# basic logging setup
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [3]:
base_dir = "accessible-v4.0-small"

In [4]:
class MyArticles(object):
    def __init__(self, base_dir):
        self.base_dir = base_dir
                                             
    def __iter__(self):
        papers = os.listdir(self.base_dir)
        
        for paper in papers:
            if not paper.startswith('.'):
                logging.info("Opening paper {0}".format(paper))
        
                issues = os.listdir(base_dir + "/" + paper)
        
                for issue in issues:
                    if not issue.startswith('.'):
                        #logging.info("Opening issue {0}".format(issue))
                        articles = os.listdir(base_dir + "/" + paper + "/" + issue)

                        for article in articles:
                            if not article.startswith('.'):
                               # logging.info("Reading article {0}".format(article))
                            
                                articleFile = open(base_dir + "/" + paper + "/" + issue + "/" + article, "r")
                                articleText = articleFile.read()
                                articleFile.close()
                    
                                # create word list for the article; could refine to be sentences later 
                                articleWords = []
               
                                # ignore single-char words and words with numbers in them                        
                                for word in re.split('\W+', articleText):
                                    if len(word) > 1 and not any(char.isdigit() for char in word):
                                        # lowercase and add to list
                                        articleWords.append(word.lower())
                        
                                yield articleWords

In [5]:
# get docs into the list of list formas

articles = MyArticles(base_dir)

In [6]:
# build vocab and train model
model = gensim.models.Word2Vec(
    articles,
    min_count=5, # default is 5; this trims the corpus for words only used once; up to 100 is OK 
    size=200, # size of NN layers; default is 100; higher for larger corpora
    workers=10) # parallel processing; needs Cython

# save model
model.save(base_dir + "-w2v-model")

2019-01-16 22:28:18,520 : INFO : collecting all words and their counts
2019-01-16 22:28:18,521 : INFO : Opening paper FreedomsJournal
2019-01-16 22:28:18,524 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2019-01-16 22:28:19,962 : INFO : Opening paper FrederickDouglassPaper
2019-01-16 22:28:26,389 : INFO : PROGRESS: at sentence #10000, processed 5459992 words, keeping 61012 word types
2019-01-16 22:28:28,506 : INFO : Opening paper NationalAntiSlaveryStandard
2019-01-16 22:28:53,178 : INFO : PROGRESS: at sentence #20000, processed 24444322 words, keeping 126994 word types
2019-01-16 22:28:58,541 : INFO : Opening paper TheColoredAmerican
2019-01-16 22:29:05,850 : INFO : Opening paper TheNorthStar
2019-01-16 22:29:07,888 : INFO : PROGRESS: at sentence #30000, processed 32156832 words, keeping 142233 word types
2019-01-16 22:29:12,190 : INFO : Opening paper TheChristianRecorder
2019-01-16 22:29:16,836 : INFO : PROGRESS: at sentence #40000, processed 36327743 wor

2019-01-16 22:33:51,708 : INFO : EPOCH 1 - PROGRESS: at 8.01% examples, 458749 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:33:52,713 : INFO : EPOCH 1 - PROGRESS: at 8.14% examples, 458834 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:33:53,731 : INFO : EPOCH 1 - PROGRESS: at 8.25% examples, 458992 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:33:54,747 : INFO : EPOCH 1 - PROGRESS: at 8.37% examples, 459089 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:33:55,749 : INFO : EPOCH 1 - PROGRESS: at 8.48% examples, 459104 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:33:56,754 : INFO : EPOCH 1 - PROGRESS: at 8.61% examples, 459408 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:33:57,759 : INFO : EPOCH 1 - PROGRESS: at 8.72% examples, 459553 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:33:58,759 : INFO : EPOCH 1 - PROGRESS: at 8.83% examples, 459533 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:33:59,765 : INFO : EPOCH 1 - PROGRESS: at 8.96% examples, 459783 words/s, in_qsize 0, out_

2019-01-16 22:35:03,228 : INFO : EPOCH 1 - PROGRESS: at 35.85% examples, 455083 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:35:04,238 : INFO : EPOCH 1 - PROGRESS: at 36.21% examples, 455104 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:35:05,243 : INFO : EPOCH 1 - PROGRESS: at 36.55% examples, 455088 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:35:06,265 : INFO : EPOCH 1 - PROGRESS: at 36.92% examples, 455031 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:35:06,981 : INFO : Opening paper ProvincialFreeman
2019-01-16 22:35:07,275 : INFO : EPOCH 1 - PROGRESS: at 37.38% examples, 455010 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:35:08,276 : INFO : EPOCH 1 - PROGRESS: at 38.10% examples, 454909 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:35:09,289 : INFO : EPOCH 1 - PROGRESS: at 38.79% examples, 454806 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:35:10,062 : INFO : Opening paper GodeysLadysBook
2019-01-16 22:35:10,294 : INFO : EPOCH 1 - PROGRESS: at 39.44% examples, 454693 w

2019-01-16 22:36:14,781 : INFO : EPOCH 1 - PROGRESS: at 60.62% examples, 457161 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:36:15,791 : INFO : EPOCH 1 - PROGRESS: at 61.05% examples, 457031 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:36:16,798 : INFO : EPOCH 1 - PROGRESS: at 61.44% examples, 456968 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:36:17,804 : INFO : EPOCH 1 - PROGRESS: at 61.90% examples, 456913 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:36:18,809 : INFO : EPOCH 1 - PROGRESS: at 62.37% examples, 456803 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:36:19,822 : INFO : EPOCH 1 - PROGRESS: at 62.88% examples, 456680 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:36:20,832 : INFO : EPOCH 1 - PROGRESS: at 63.27% examples, 456669 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:36:21,847 : INFO : EPOCH 1 - PROGRESS: at 63.74% examples, 456579 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:36:22,848 : INFO : EPOCH 1 - PROGRESS: at 64.23% examples, 456481 words/s, in_qsiz

2019-01-16 22:37:26,434 : INFO : EPOCH 1 - PROGRESS: at 92.83% examples, 451357 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:37:27,438 : INFO : EPOCH 1 - PROGRESS: at 93.17% examples, 451201 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:37:28,440 : INFO : Opening paper FrankLesliesWeekly
2019-01-16 22:37:28,457 : INFO : EPOCH 1 - PROGRESS: at 93.56% examples, 451065 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:37:29,468 : INFO : EPOCH 1 - PROGRESS: at 93.79% examples, 451117 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:37:30,477 : INFO : EPOCH 1 - PROGRESS: at 94.01% examples, 451194 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:37:31,483 : INFO : EPOCH 1 - PROGRESS: at 94.24% examples, 451190 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:37:32,484 : INFO : EPOCH 1 - PROGRESS: at 94.44% examples, 451207 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:37:33,490 : INFO : EPOCH 1 - PROGRESS: at 94.66% examples, 451262 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:37:34,498 : INFO

2019-01-16 22:38:29,442 : INFO : EPOCH 2 - PROGRESS: at 8.02% examples, 460855 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:38:30,449 : INFO : EPOCH 2 - PROGRESS: at 8.15% examples, 461031 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:38:31,457 : INFO : EPOCH 2 - PROGRESS: at 8.26% examples, 461203 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:38:32,457 : INFO : EPOCH 2 - PROGRESS: at 8.38% examples, 461204 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:38:33,468 : INFO : EPOCH 2 - PROGRESS: at 8.49% examples, 461161 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:38:34,473 : INFO : EPOCH 2 - PROGRESS: at 8.62% examples, 461404 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:38:35,482 : INFO : EPOCH 2 - PROGRESS: at 8.74% examples, 461566 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:38:36,508 : INFO : EPOCH 2 - PROGRESS: at 8.84% examples, 461202 words/s, in_qsize 0, out_qsize 1
2019-01-16 22:38:37,510 : INFO : EPOCH 2 - PROGRESS: at 8.98% examples, 461817 words/s, in_qsize 0, out_

2019-01-16 22:39:40,977 : INFO : EPOCH 2 - PROGRESS: at 35.98% examples, 456835 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:39:41,993 : INFO : EPOCH 2 - PROGRESS: at 36.34% examples, 456817 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:39:43,005 : INFO : EPOCH 2 - PROGRESS: at 36.70% examples, 456803 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:39:44,018 : INFO : EPOCH 2 - PROGRESS: at 37.05% examples, 456784 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:39:44,345 : INFO : Opening paper ProvincialFreeman
2019-01-16 22:39:45,031 : INFO : EPOCH 2 - PROGRESS: at 37.64% examples, 456688 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:39:46,038 : INFO : EPOCH 2 - PROGRESS: at 38.35% examples, 456590 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:39:47,051 : INFO : EPOCH 2 - PROGRESS: at 39.12% examples, 456457 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:39:47,417 : INFO : Opening paper GodeysLadysBook
2019-01-16 22:39:48,057 : INFO : EPOCH 2 - PROGRESS: at 39.56% examples, 456489 w

2019-01-16 22:40:52,522 : INFO : EPOCH 2 - PROGRESS: at 60.87% examples, 458780 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:40:53,536 : INFO : EPOCH 2 - PROGRESS: at 61.28% examples, 458717 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:40:54,536 : INFO : EPOCH 2 - PROGRESS: at 61.75% examples, 458652 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:40:55,550 : INFO : EPOCH 2 - PROGRESS: at 62.16% examples, 458578 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:40:56,561 : INFO : EPOCH 2 - PROGRESS: at 62.72% examples, 458439 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:40:57,564 : INFO : EPOCH 2 - PROGRESS: at 63.15% examples, 458401 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:40:58,567 : INFO : EPOCH 2 - PROGRESS: at 63.55% examples, 458359 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:40:59,579 : INFO : EPOCH 2 - PROGRESS: at 64.06% examples, 458240 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:41:00,584 : INFO : EPOCH 2 - PROGRESS: at 64.51% examples, 458084 words/s, in_qsiz

2019-01-16 22:42:04,063 : INFO : EPOCH 2 - PROGRESS: at 93.26% examples, 453687 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:42:04,823 : INFO : Opening paper FrankLesliesWeekly
2019-01-16 22:42:05,069 : INFO : EPOCH 2 - PROGRESS: at 93.61% examples, 453584 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:42:06,078 : INFO : EPOCH 2 - PROGRESS: at 93.84% examples, 453662 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:42:07,089 : INFO : EPOCH 2 - PROGRESS: at 94.07% examples, 453734 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:42:08,102 : INFO : EPOCH 2 - PROGRESS: at 94.29% examples, 453797 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:42:09,102 : INFO : EPOCH 2 - PROGRESS: at 94.52% examples, 453842 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:42:10,115 : INFO : EPOCH 2 - PROGRESS: at 94.73% examples, 453923 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:42:11,124 : INFO : EPOCH 2 - PROGRESS: at 94.96% examples, 453991 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:42:12,137 : INFO

2019-01-16 22:43:07,389 : INFO : EPOCH 3 - PROGRESS: at 8.23% examples, 457049 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:43:08,397 : INFO : EPOCH 3 - PROGRESS: at 8.35% examples, 457078 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:43:09,400 : INFO : EPOCH 3 - PROGRESS: at 8.46% examples, 457159 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:43:10,404 : INFO : EPOCH 3 - PROGRESS: at 8.57% examples, 457342 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:43:11,405 : INFO : EPOCH 3 - PROGRESS: at 8.69% examples, 457621 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:43:12,411 : INFO : EPOCH 3 - PROGRESS: at 8.80% examples, 457715 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:43:13,417 : INFO : EPOCH 3 - PROGRESS: at 8.93% examples, 457741 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:43:14,436 : INFO : EPOCH 3 - PROGRESS: at 9.05% examples, 457918 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:43:15,455 : INFO : EPOCH 3 - PROGRESS: at 9.16% examples, 458245 words/s, in_qsize 0, out_

2019-01-16 22:44:19,000 : INFO : EPOCH 3 - PROGRESS: at 36.48% examples, 453975 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:44:20,008 : INFO : EPOCH 3 - PROGRESS: at 36.84% examples, 453964 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:44:20,984 : INFO : Opening paper ProvincialFreeman
2019-01-16 22:44:21,010 : INFO : EPOCH 3 - PROGRESS: at 37.17% examples, 453854 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:44:22,012 : INFO : EPOCH 3 - PROGRESS: at 37.90% examples, 453731 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:44:23,017 : INFO : EPOCH 3 - PROGRESS: at 38.60% examples, 453617 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:44:24,035 : INFO : EPOCH 3 - PROGRESS: at 39.34% examples, 453491 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:44:24,079 : INFO : Opening paper GodeysLadysBook
2019-01-16 22:44:25,048 : INFO : EPOCH 3 - PROGRESS: at 39.63% examples, 453661 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:44:26,049 : INFO : EPOCH 3 - PROGRESS: at 39.87% examples, 453888 w

2019-01-16 22:45:30,517 : INFO : EPOCH 3 - PROGRESS: at 61.25% examples, 455867 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:45:31,519 : INFO : EPOCH 3 - PROGRESS: at 61.71% examples, 455813 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:45:32,522 : INFO : EPOCH 3 - PROGRESS: at 62.14% examples, 455698 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:45:33,526 : INFO : EPOCH 3 - PROGRESS: at 62.63% examples, 455596 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:45:34,533 : INFO : EPOCH 3 - PROGRESS: at 63.10% examples, 455541 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:45:35,537 : INFO : EPOCH 3 - PROGRESS: at 63.50% examples, 455509 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:45:36,552 : INFO : EPOCH 3 - PROGRESS: at 63.99% examples, 455404 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:45:37,554 : INFO : EPOCH 3 - PROGRESS: at 64.48% examples, 455281 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:45:38,559 : INFO : EPOCH 3 - PROGRESS: at 64.95% examples, 455177 words/s, in_qsiz

2019-01-16 22:46:42,046 : INFO : EPOCH 3 - PROGRESS: at 93.45% examples, 450804 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:46:42,336 : INFO : Opening paper FrankLesliesWeekly
2019-01-16 22:46:43,048 : INFO : EPOCH 3 - PROGRESS: at 93.72% examples, 450801 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:46:44,051 : INFO : EPOCH 3 - PROGRESS: at 93.94% examples, 450857 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:46:45,066 : INFO : EPOCH 3 - PROGRESS: at 94.18% examples, 450956 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:46:46,083 : INFO : EPOCH 3 - PROGRESS: at 94.39% examples, 451007 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:46:47,093 : INFO : EPOCH 3 - PROGRESS: at 94.61% examples, 451075 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:46:48,107 : INFO : EPOCH 3 - PROGRESS: at 94.85% examples, 451150 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:46:49,107 : INFO : EPOCH 3 - PROGRESS: at 95.05% examples, 451219 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:46:50,117 : INFO

2019-01-16 22:47:45,111 : INFO : EPOCH 4 - PROGRESS: at 8.26% examples, 460643 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:47:46,128 : INFO : EPOCH 4 - PROGRESS: at 8.38% examples, 460528 words/s, in_qsize 0, out_qsize 1
2019-01-16 22:47:47,137 : INFO : EPOCH 4 - PROGRESS: at 8.49% examples, 460724 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:47:48,141 : INFO : EPOCH 4 - PROGRESS: at 8.62% examples, 461007 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:47:49,154 : INFO : EPOCH 4 - PROGRESS: at 8.74% examples, 461123 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:47:50,164 : INFO : EPOCH 4 - PROGRESS: at 8.84% examples, 461108 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:47:51,177 : INFO : EPOCH 4 - PROGRESS: at 8.98% examples, 461278 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:47:52,186 : INFO : EPOCH 4 - PROGRESS: at 9.09% examples, 461423 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:47:53,198 : INFO : EPOCH 4 - PROGRESS: at 9.20% examples, 461775 words/s, in_qsize 0, out_

2019-01-16 22:48:56,712 : INFO : EPOCH 4 - PROGRESS: at 36.74% examples, 457071 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:48:57,724 : INFO : EPOCH 4 - PROGRESS: at 37.10% examples, 457042 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:48:57,919 : INFO : Opening paper ProvincialFreeman
2019-01-16 22:48:58,728 : INFO : EPOCH 4 - PROGRESS: at 37.75% examples, 456940 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:48:59,732 : INFO : EPOCH 4 - PROGRESS: at 38.43% examples, 456821 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:49:00,744 : INFO : EPOCH 4 - PROGRESS: at 39.19% examples, 456592 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:49:01,007 : INFO : Opening paper GodeysLadysBook
2019-01-16 22:49:01,745 : INFO : EPOCH 4 - PROGRESS: at 39.58% examples, 456704 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:49:02,760 : INFO : EPOCH 4 - PROGRESS: at 39.82% examples, 456899 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:49:03,760 : INFO : EPOCH 4 - PROGRESS: at 40.06% examples, 457102 w

2019-01-16 22:50:08,266 : INFO : EPOCH 4 - PROGRESS: at 61.86% examples, 459069 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:50:09,274 : INFO : EPOCH 4 - PROGRESS: at 62.32% examples, 458969 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:50:10,288 : INFO : EPOCH 4 - PROGRESS: at 62.84% examples, 458844 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:50:11,296 : INFO : EPOCH 4 - PROGRESS: at 63.24% examples, 458837 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:50:12,299 : INFO : EPOCH 4 - PROGRESS: at 63.70% examples, 458749 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:50:13,301 : INFO : EPOCH 4 - PROGRESS: at 64.18% examples, 458670 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:50:14,302 : INFO : EPOCH 4 - PROGRESS: at 64.64% examples, 458544 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:50:15,304 : INFO : EPOCH 4 - PROGRESS: at 65.09% examples, 458467 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:50:16,317 : INFO : EPOCH 4 - PROGRESS: at 65.61% examples, 458363 words/s, in_qsiz

2019-01-16 22:51:19,873 : INFO : EPOCH 4 - PROGRESS: at 93.92% examples, 454047 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:51:20,887 : INFO : EPOCH 4 - PROGRESS: at 94.15% examples, 454097 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:51:21,896 : INFO : EPOCH 4 - PROGRESS: at 94.36% examples, 454153 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:51:22,906 : INFO : EPOCH 4 - PROGRESS: at 94.59% examples, 454222 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:51:23,914 : INFO : EPOCH 4 - PROGRESS: at 94.82% examples, 454285 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:51:24,927 : INFO : EPOCH 4 - PROGRESS: at 95.04% examples, 454368 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:51:25,929 : INFO : EPOCH 4 - PROGRESS: at 95.26% examples, 454440 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:51:26,942 : INFO : EPOCH 4 - PROGRESS: at 95.46% examples, 454498 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:51:27,956 : INFO : EPOCH 4 - PROGRESS: at 95.70% examples, 454585 words/s, in_qsiz

2019-01-16 22:52:23,183 : INFO : EPOCH 5 - PROGRESS: at 8.41% examples, 451700 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:52:24,188 : INFO : EPOCH 5 - PROGRESS: at 8.53% examples, 451698 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:52:25,190 : INFO : EPOCH 5 - PROGRESS: at 8.64% examples, 451805 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:52:26,192 : INFO : EPOCH 5 - PROGRESS: at 8.76% examples, 451986 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:52:27,207 : INFO : EPOCH 5 - PROGRESS: at 8.86% examples, 451505 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:52:28,208 : INFO : EPOCH 5 - PROGRESS: at 8.99% examples, 451681 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:52:29,214 : INFO : EPOCH 5 - PROGRESS: at 9.10% examples, 451950 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:52:30,230 : INFO : EPOCH 5 - PROGRESS: at 9.21% examples, 452122 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:52:31,242 : INFO : EPOCH 5 - PROGRESS: at 9.33% examples, 452166 words/s, in_qsize 0, out_

2019-01-16 22:53:34,779 : INFO : EPOCH 5 - PROGRESS: at 36.54% examples, 446142 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:53:35,780 : INFO : EPOCH 5 - PROGRESS: at 36.91% examples, 446091 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:53:36,595 : INFO : Opening paper ProvincialFreeman
2019-01-16 22:53:36,788 : INFO : EPOCH 5 - PROGRESS: at 37.31% examples, 445997 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:53:37,791 : INFO : EPOCH 5 - PROGRESS: at 37.99% examples, 445882 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:53:38,792 : INFO : EPOCH 5 - PROGRESS: at 38.67% examples, 445786 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:53:39,744 : INFO : Opening paper GodeysLadysBook
2019-01-16 22:53:39,801 : INFO : EPOCH 5 - PROGRESS: at 39.41% examples, 445638 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:53:40,810 : INFO : EPOCH 5 - PROGRESS: at 39.65% examples, 445833 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:53:41,822 : INFO : EPOCH 5 - PROGRESS: at 39.90% examples, 446082 w

2019-01-16 22:54:46,302 : INFO : EPOCH 5 - PROGRESS: at 60.87% examples, 448259 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:54:47,312 : INFO : EPOCH 5 - PROGRESS: at 61.26% examples, 448208 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:54:48,328 : INFO : EPOCH 5 - PROGRESS: at 61.72% examples, 448128 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:54:49,335 : INFO : EPOCH 5 - PROGRESS: at 62.14% examples, 448054 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:54:50,349 : INFO : EPOCH 5 - PROGRESS: at 62.64% examples, 447931 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:54:51,349 : INFO : EPOCH 5 - PROGRESS: at 63.10% examples, 447862 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:54:52,364 : INFO : EPOCH 5 - PROGRESS: at 63.49% examples, 447802 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:54:53,374 : INFO : EPOCH 5 - PROGRESS: at 63.97% examples, 447717 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:54:54,387 : INFO : EPOCH 5 - PROGRESS: at 64.44% examples, 447570 words/s, in_qsiz

2019-01-16 22:55:58,167 : INFO : Opening paper DouglassMonthly
2019-01-16 22:55:58,933 : INFO : EPOCH 5 - PROGRESS: at 93.08% examples, 443349 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:55:59,941 : INFO : EPOCH 5 - PROGRESS: at 93.43% examples, 443216 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:56:00,316 : INFO : Opening paper FrankLesliesWeekly
2019-01-16 22:56:00,944 : INFO : EPOCH 5 - PROGRESS: at 93.70% examples, 443204 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:56:01,960 : INFO : EPOCH 5 - PROGRESS: at 93.92% examples, 443262 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:56:02,971 : INFO : EPOCH 5 - PROGRESS: at 94.15% examples, 443326 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:56:03,984 : INFO : EPOCH 5 - PROGRESS: at 94.36% examples, 443395 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:56:04,987 : INFO : EPOCH 5 - PROGRESS: at 94.57% examples, 443443 words/s, in_qsize 0, out_qsize 0
2019-01-16 22:56:05,994 : INFO : EPOCH 5 - PROGRESS: at 94.79% examples, 443513 

In [20]:
# testing some basic functions

# basic similarity
w1 = "freedom"
model.wv.most_similar(positive=w1, topn=50)

[('liberty', 0.8603097796440125),
 ('offreedom', 0.7068889141082764),
 ('ofliberty', 0.6963063478469849),
 ('freedomand', 0.6153182983398438),
 ('freedomin', 0.6114463806152344),
 ('libertyand', 0.6082854866981506),
 ('freedomof', 0.6009382009506226),
 ('thefreedom', 0.587365984916687),
 ('libertyin', 0.5841888189315796),
 ('libertyof', 0.5730873346328735),
 ('freedomto', 0.543325662612915),
 ('andfreedom', 0.5268338918685913),
 ('rights', 0.5245076417922974),
 ('freedomfor', 0.5214343070983887),
 ('universalfreedom', 0.5157692432403564),
 ('humanfreedom', 0.5073760747909546),
 ('emancipation', 0.4976997375488281),
 ('universalliberty', 0.49759191274642944),
 ('humanity', 0.4877506494522095),
 ('andliberty', 0.476218581199646),
 ('free', 0.46744900941848755),
 ('impartialliberty', 0.4630075991153717),
 ('enfranchisement', 0.45792388916015625),
 ('tofreedom', 0.4561663866043091),
 ('equality', 0.4560352861881256),
 ('libertyare', 0.45481932163238525),
 ('universalemancipation', 0.452693

In [8]:
# two word similarity 

model.wv.similarity(w1="freedom",w2="justice")

0.3081880960772845

In [9]:
model.wv.similarity(w1="freedom",w2="abolition")

0.3881127546968688

In [26]:
model.wv.similarity(w1="freedom",w2="emancipation")

0.9996274907535606

In [10]:
model.wv.similarity(w1="freedom",w2="liberation")

0.3910873099331539

In [22]:
# opposite words
model.wv.most_similar(positive=["freedom","emancipation"], negative=["slavery"], topn=20)

[('liberty', 0.6121565103530884),
 ('liberation', 0.5771019458770752),
 ('emancipationto', 0.5716506242752075),
 ('offreedom', 0.5694626569747925),
 ('enfranchisement', 0.5659188032150269),
 ('universalemancipation', 0.531051754951477),
 ('ofliberty', 0.5227570533752441),
 ('theemancipation', 0.4975431263446808),
 ('freedomto', 0.4924303889274597),
 ('freedomand', 0.48114728927612305),
 ('emancipationof', 0.4767819046974182),
 ('freedomfor', 0.46657171845436096),
 ('deliverance', 0.46637940406799316),
 ('libertyand', 0.4616428017616272),
 ('extinction', 0.45941323041915894),
 ('abolitionof', 0.45894789695739746),
 ('thefreedom', 0.45332634449005127),
 ('freedomin', 0.44901221990585327),
 ('immediateemancipation', 0.44775301218032837),
 ('freedomof', 0.4473683536052704)]

In [23]:
model.wv.most_similar(positive=["freedom","justice"], negative=["slavery"], topn=20)

[('liberty', 0.5829455256462097),
 ('justiceand', 0.5654289722442627),
 ('ofliberty', 0.5484073162078857),
 ('freedomand', 0.5271247625350952),
 ('libertyand', 0.5262999534606934),
 ('andfreedom', 0.5092192888259888),
 ('ofjustice', 0.5062933564186096),
 ('righteousnessand', 0.4856838881969452),
 ('andjustice', 0.4853516221046448),
 ('equity', 0.48221224546432495),
 ('andhumanity', 0.47141093015670776),
 ('humanityand', 0.4654231369495392),
 ('offreedom', 0.45145225524902344),
 ('humanity', 0.44794246554374695),
 ('justiceof', 0.44688063859939575),
 ('andliberty', 0.43530911207199097),
 ('truthand', 0.432658314704895),
 ('honor', 0.41448870301246643),
 ('honour', 0.399139404296875),
 ('libertyof', 0.3989446461200714)]

In [24]:
model.wv.most_similar(positive=["freedom","abolition"], negative=["slavery"],topn=20)

[('liberty', 0.6073888540267944),
 ('abolitionof', 0.5925824642181396),
 ('theabolition', 0.5789734125137329),
 ('theanti', 0.5541331768035889),
 ('anti', 0.5451400279998779),
 ('offreedom', 0.538105845451355),
 ('extinction', 0.5205190181732178),
 ('unconditionalabolition', 0.5107524394989014),
 ('extinctionof', 0.5060350298881531),
 ('freedomand', 0.5053471326828003),
 ('ofliberty', 0.5020107626914978),
 ('abolishment', 0.4903695285320282),
 ('emancipation', 0.4791562259197235),
 ('andfreedom', 0.47902804613113403),
 ('libertyand', 0.4777108132839203),
 ('immediateabolition', 0.46573418378829956),
 ('ofanti', 0.46157634258270264),
 ('theiranti', 0.459446519613266),
 ('uncompromisinganti', 0.45820844173431396),
 ('entireabolition', 0.4555511772632599)]

In [25]:
model.wv.most_similar(positive=["freedom","liberation"], negative=["slavery"],topn=20)

[('enfranchisement', 0.5890267491340637),
 ('deliverance', 0.5835366249084473),
 ('deliveranceof', 0.528423011302948),
 ('release', 0.5161937475204468),
 ('liberationof', 0.5152287483215332),
 ('disenthralment', 0.5138409733772278),
 ('liberty', 0.5130751132965088),
 ('emancipationto', 0.5016998052597046),
 ('redemption', 0.5006049871444702),
 ('freedomto', 0.4893316924571991),
 ('theemancipation', 0.4638637900352478),
 ('emancipation', 0.45972001552581787),
 ('thefreedom', 0.45963573455810547),
 ('emancipationof', 0.459356427192688),
 ('liberating', 0.4565788805484772),
 ('restoration', 0.4383823871612549),
 ('extinction', 0.4346863031387329),
 ('freedomof', 0.43465563654899597),
 ('recovery', 0.43309032917022705),
 ('theliberation', 0.4280696511268616)]

In [15]:
model.wv.similarity(w1="freedom",w2="slavery")

0.2621654662714637

In [16]:
model.wv.similarity(w1="abolition",w2="slavery")

0.363795133449599

In [17]:
model.wv.similarity(w1="emancipation",w2="slavery")


0.38278991852956423

In [18]:
model.wv.similarity(w1="liberation",w2="slavery")

0.12439727388881948

In [28]:
model.wv.most_similar(negative=["freedom"],topn=20)

[('insubstance', 0.3397386372089386),
 ('reflectively', 0.3365861177444458),
 ('busses', 0.3242341876029968),
 ('librarian', 0.32376059889793396),
 ('doke', 0.32179272174835205),
 ('dentist', 0.3122277855873108),
 ('ruefully', 0.31117627024650574),
 ('casually', 0.30954819917678833),
 ('assessor', 0.30595290660858154),
 ('jocelin', 0.3016507029533386),
 ('clerks', 0.29672783613204956),
 ('relatedthe', 0.296588271856308),
 ('cruttenden', 0.29542914032936096),
 ('cutter', 0.29472848773002625),
 ('shipbroker', 0.2939457297325134),
 ('sexton', 0.29246434569358826),
 ('cator', 0.2922428846359253),
 ('matriculated', 0.29216068983078003),
 ('tring', 0.29096826910972595),
 ('detective', 0.2903852164745331)]

In [29]:
model.wv.most_similar(negative=["slavery"],topn=20)

[('theodosia', 0.329913854598999),
 ('porters', 0.31303250789642334),
 ('trousseaus', 0.3130304217338562),
 ('apolonia', 0.3126440644264221),
 ('silvie', 0.30968037247657776),
 ('unluckily', 0.30761244893074036),
 ('askedif', 0.3062184751033783),
 ('yearbefore', 0.3047272562980652),
 ('skaters', 0.30349600315093994),
 ('connectient', 0.3015727996826172),
 ('alle', 0.30115562677383423),
 ('lasses', 0.3006308078765869),
 ('caboose', 0.298845112323761),
 ('notthere', 0.2979559302330017),
 ('stevedore', 0.29785168170928955),
 ('snipe', 0.2968602776527405),
 ('waggin', 0.29603156447410583),
 ('blithesome', 0.2958426773548126),
 ('whileon', 0.29552996158599854),
 ('bohemian', 0.29411572217941284)]

In [30]:
model.wv.most_similar(positive=["he","freedom"], negative=["she"], topn=20)

[('liberty', 0.7371569871902466),
 ('offreedom', 0.5458797216415405),
 ('ofliberty', 0.5371274352073669),
 ('freedomof', 0.5196456909179688),
 ('freedomto', 0.48756200075149536),
 ('libertyof', 0.48633280396461487),
 ('libertyand', 0.4763438403606415),
 ('thefreedom', 0.46641239523887634),
 ('freedomin', 0.46458011865615845),
 ('emancipation', 0.4557468891143799),
 ('freedomand', 0.448272705078125),
 ('rights', 0.42606255412101746),
 ('universalemancipation', 0.42320185899734497),
 ('freedomfor', 0.42207109928131104),
 ('libertyin', 0.4208998382091522),
 ('humanfreedom', 0.4175874590873718),
 ('hisrights', 0.4087570309638977),
 ('universalliberty', 0.40544384717941284),
 ('impartialliberty', 0.40467363595962524),
 ('andliberty', 0.4039745330810547)]

In [31]:
model.wv.most_similar(positive=["he","liberation"], negative=["she"], topn=20)

[('emancipation', 0.5923379063606262),
 ('deliveranceof', 0.5842519402503967),
 ('theemancipation', 0.5834643840789795),
 ('liberationof', 0.5771814584732056),
 ('deliverance', 0.5641262531280518),
 ('release', 0.556685745716095),
 ('enfranchisement', 0.5537338256835938),
 ('liberating', 0.5305815935134888),
 ('emancipationto', 0.5175020694732666),
 ('emancipating', 0.5121076107025146),
 ('manumission', 0.503220796585083),
 ('enfranchisementof', 0.4996788799762726),
 ('recapture', 0.4977020025253296),
 ('disenthralment', 0.4936557412147522),
 ('unconditionalemancipation', 0.49298954010009766),
 ('theliberation', 0.48650068044662476),
 ('emancipationof', 0.4766657054424286),
 ('expulsion', 0.47166115045547485),
 ('removal', 0.4691636264324188),
 ('liberate', 0.46584033966064453)]

In [32]:
model.wv.most_similar(positive=["he","emancipation"], negative=["she"], topn=20)

[('abolition', 0.5840641260147095),
 ('theemancipation', 0.5838963389396667),
 ('ofemancipation', 0.5768318772315979),
 ('immediateemancipation', 0.5758593082427979),
 ('universalemancipation', 0.5648214817047119),
 ('emancipationto', 0.563896656036377),
 ('liberation', 0.5592828392982483),
 ('unconditionalemancipation', 0.5522013902664185),
 ('emancipationis', 0.5457920432090759),
 ('emancipationwhich', 0.5090665817260742),
 ('colonization', 0.48641684651374817),
 ('emancipating', 0.47732865810394287),
 ('enfranchisement', 0.47141650319099426),
 ('manumission', 0.46744468808174133),
 ('emancipationin', 0.46457362174987793),
 ('emancipationof', 0.45806407928466797),
 ('emancipationand', 0.45672309398651123),
 ('emancipationists', 0.45611512660980225),
 ('annexation', 0.4423949718475342),
 ('unconditionaland', 0.442057341337204)]

In [33]:
model.wv.most_similar(positive=["he","abolition"], negative=["she"], topn=20)

[('emancipation', 0.5396702289581299),
 ('theabolition', 0.5352801084518433),
 ('abolishment', 0.5181679129600525),
 ('abolitionof', 0.49431654810905457),
 ('colonization', 0.48735764622688293),
 ('repeal', 0.4782942533493042),
 ('emancipationists', 0.476290762424469),
 ('abolitionism', 0.4751638174057007),
 ('unconditionalabolition', 0.4729675054550171),
 ('anti', 0.4658471643924713),
 ('abolitions', 0.463434636592865),
 ('annexation', 0.44923684000968933),
 ('dissolution', 0.4454294443130493),
 ('disunion', 0.43925976753234863),
 ('abolitionists', 0.4383278787136078),
 ('immediateabolition', 0.4357284903526306),
 ('extinction', 0.432897686958313),
 ('unconditionalemancipation', 0.4324530065059662),
 ('antislavery', 0.42531269788742065),
 ('immediateemancipation', 0.4252984821796417)]

In [36]:
def mascify(y): 
    return [x[0] for x in model.wv.most_similar(y,topn=1000) if model.wv.similarity(x[0],"she") < model.wv.similarity(x[0],"he") - 0.1][0]

In [46]:
def femify(y): 
    return [x[0] for x in model.wv.most_similar(y,topn=1000) if model.wv.similarity(x[0],"he") < model.wv.similarity(x[0],"she") - 0.1][0]

In [38]:
model.wv.most_similar(positive=["he","labor"], negative=["she"], topn=20)

[('labour', 0.7706866264343262),
 ('labourer', 0.5732357501983643),
 ('laborer', 0.5581916570663452),
 ('laborof', 0.511107325553894),
 ('toil', 0.46996334195137024),
 ('laborers', 0.46411335468292236),
 ('slavelabor', 0.4521053731441498),
 ('theirlabor', 0.44290855526924133),
 ('employer', 0.4421233534812927),
 ('producer', 0.4371755123138428),
 ('consumer', 0.4351278245449066),
 ('wages', 0.4347209334373474),
 ('laboris', 0.4282694458961487),
 ('labors', 0.4220976233482361),
 ('earnings', 0.42161640524864197),
 ('employment', 0.42066994309425354),
 ('oflabor', 0.4178470969200134),
 ('industry', 0.41507506370544434),
 ('capitalist', 0.4117794632911682),
 ('unrequited', 0.411433607339859)]

In [39]:
model.wv.most_similar(positive=["he","work"], negative=["she"], topn=20)

[('thework', 0.5660370588302612),
 ('workwhich', 0.5006560683250427),
 ('workof', 0.4714723825454712),
 ('task', 0.4606185555458069),
 ('works', 0.4528791606426239),
 ('goodwork', 0.4388577938079834),
 ('job', 0.43531692028045654),
 ('undertaking', 0.42167627811431885),
 ('awork', 0.42004063725471497),
 ('hiswork', 0.4127237796783447),
 ('business', 0.4082722067832947),
 ('workman', 0.4017372131347656),
 ('working', 0.39673173427581787),
 ('labors', 0.3839649260044098),
 ('enterprise', 0.38079559803009033),
 ('greatwork', 0.3752608895301819),
 ('workto', 0.3739856481552124),
 ('book', 0.3676258325576782),
 ('thiswork', 0.36592012643814087),
 ('plan', 0.35875606536865234)]

In [40]:
model.wv.most_similar(positive=["he","editor"], negative=["she"], topn=20)

[('editorof', 0.6315087080001831),
 ('theeditor', 0.5635166168212891),
 ('brownson', 0.5495580434799194),
 ('writer', 0.5347850322723389),
 ('editors', 0.5207881331443787),
 ('liberator', 0.5183323621749878),
 ('edit', 0.5115718841552734),
 ('reporter', 0.5109946727752686),
 ('gurley', 0.5103272199630737),
 ('leavitt', 0.5067963600158691),
 ('heinzen', 0.5040760040283203),
 ('investigator', 0.5009287595748901),
 ('danforth', 0.5006861686706543),
 ('gyurman', 0.49994897842407227),
 ('ditor', 0.49953410029411316),
 ('recorder', 0.4988257884979248),
 ('publisher', 0.49578240513801575),
 ('editorsof', 0.49338364601135254),
 ('correspondent', 0.48941710591316223),
 ('emancipator', 0.48862364888191223)]

In [41]:
model.wv.most_similar(positive=["he","editing"], negative=["she"], topn=20)

[('edit', 0.6577107310295105),
 ('publishing', 0.5382227301597595),
 ('printing', 0.5317913889884949),
 ('editor', 0.5210978388786316),
 ('edits', 0.5191155672073364),
 ('theeditor', 0.5181102752685547),
 ('printer', 0.5165809392929077),
 ('editorof', 0.4942779839038849),
 ('editorship', 0.4846949875354767),
 ('frederickdouglass', 0.479436993598938),
 ('emancipator', 0.47362810373306274),
 ('awhig', 0.4681946039199829),
 ('publisher', 0.4645629823207855),
 ('newspaper', 0.463682621717453),
 ('compiling', 0.4606969356536865),
 ('publication', 0.45931971073150635),
 ('columnsof', 0.4541594386100769),
 ('editorially', 0.44720980525016785),
 ('fortnightly', 0.4462085962295532),
 ('editorial', 0.4421541094779968)]

In [42]:
model.wv.most_similar(positive=["he","writing"], negative=["she"], topn=20)

[('hisletter', 0.48643049597740173),
 ('inditing', 0.47988414764404297),
 ('manuscript', 0.4755515456199646),
 ('penning', 0.47041988372802734),
 ('copyist', 0.4646728038787842),
 ('copying', 0.46283119916915894),
 ('quoting', 0.46192842721939087),
 ('memorandum', 0.45745450258255005),
 ('written', 0.4565618634223938),
 ('wrote', 0.4520462155342102),
 ('write', 0.4514564275741577),
 ('writer', 0.4483950734138489),
 ('letterto', 0.44291388988494873),
 ('printer', 0.4269883930683136),
 ('speakingand', 0.4204513728618622),
 ('publishing', 0.4171189069747925),
 ('ofwriting', 0.4142105281352997),
 ('punctuation', 0.4140129089355469),
 ('verbally', 0.41308721899986267),
 ('reading', 0.4120013117790222)]

In [43]:
model.wv.most_similar(positive=["she","cooking"], negative=["he"], topn=20)

[('ironing', 0.7112582325935364),
 ('baking', 0.6435930728912354),
 ('pastry', 0.6146605610847473),
 ('laundry', 0.6019372344017029),
 ('scrubbing', 0.5969020128250122),
 ('washing', 0.5954079627990723),
 ('dusting', 0.5862024426460266),
 ('churning', 0.5839918851852417),
 ('housekeeping', 0.5804911851882935),
 ('stoves', 0.5721753835678101),
 ('bedding', 0.5687488913536072),
 ('heating', 0.5620366334915161),
 ('kneading', 0.5588648319244385),
 ('nursery', 0.5553981065750122),
 ('utensils', 0.5551400184631348),
 ('dairy', 0.5549020171165466),
 ('drying', 0.5536801815032959),
 ('kitchen', 0.5531803369522095),
 ('mending', 0.5508454442024231),
 ('pans', 0.547383189201355)]

In [44]:
model.wv.most_similar(positive=["he","working"], negative=["she"], topn=20)

[('laboring', 0.5146450400352478),
 ('theworking', 0.49840494990348816),
 ('worked', 0.4883202910423279),
 ('labouring', 0.46637803316116333),
 ('drilling', 0.40460607409477234),
 ('employing', 0.37732332944869995),
 ('employed', 0.37593182921409607),
 ('thelaboring', 0.3727867007255554),
 ('atwork', 0.36835944652557373),
 ('boring', 0.3665507435798645),
 ('work', 0.3643168807029724),
 ('workman', 0.36309903860092163),
 ('studying', 0.36209022998809814),
 ('ofteaching', 0.3578157424926758),
 ('digging', 0.3577783405780792),
 ('towork', 0.3512488603591919),
 ('hiring', 0.34543249011039734),
 ('workin', 0.34477779269218445),
 ('seafaring', 0.3431152105331421),
 ('operating', 0.34307771921157837)]

In [47]:
mascify("editing")

'printing'

In [48]:
femify("editing")

'nonpareil'

In [49]:
mascify("labor")

'employer'

In [50]:
femify("labor")

'laborto'