In [1]:
import gzip
import gensim 
import logging

# настройка параметров журналирования
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [2]:
data_file="reviews_data.txt.gz"

with gzip.open ('reviews_data.txt.gz', 'rb') as f:
    for i,line in enumerate (f):
        print(line)
        break

b"Oct 12 2009 \tNice trendy hotel location not too bad.\tI stayed in this hotel for one night. As this is a fairly new place some of the taxi drivers did not know where it was and/or did not want to drive there. Once I have eventually arrived at the hotel, I was very pleasantly surprised with the decor of the lobby/ground floor area. It was very stylish and modern. I found the reception's staff geeting me with 'Aloha' a bit out of place, but I guess they are briefed to say that to keep up the coroporate image.As I have a Starwood Preferred Guest member, I was given a small gift upon-check in. It was only a couple of fridge magnets in a gift box, but nevertheless a nice gesture.My room was nice and roomy, there are tea and coffee facilities in each room and you get two complimentary bottles of water plus some toiletries by 'bliss'.The location is not great. It is at the last metro stop and you then need to take a taxi, but if you are not planning on going to see the historic sites in Be

In [3]:
def read_input(input_file):
    """This method reads the input file which is in gzip format"""
    
    logging.info("reading file {0}...this may take a while".format(input_file))
    
    with gzip.open (input_file, 'rb') as f:
        for i, line in enumerate (f): 

            if (i%10000==0):
                logging.info ("read {0} reviews".format (i))
            # do some pre-processing and return a list of words for each review text
            yield gensim.utils.simple_preprocess (line)
            
# read the tokenized reviews into a list
# each review item becomes a serries of words
# so this becomes a list of lists
documents = list (read_input (data_file))
logging.info ("Done reading data file")

2019-01-24 18:19:25,955 : INFO : reading file reviews_data.txt.gz...this may take a while
2019-01-24 18:19:25,959 : INFO : read 0 reviews
2019-01-24 18:19:30,559 : INFO : read 10000 reviews
2019-01-24 18:19:33,406 : INFO : read 20000 reviews
2019-01-24 18:19:37,855 : INFO : read 30000 reviews
2019-01-24 18:19:42,286 : INFO : read 40000 reviews
2019-01-24 18:19:50,387 : INFO : read 50000 reviews
2019-01-24 18:19:55,609 : INFO : read 60000 reviews
2019-01-24 18:19:59,092 : INFO : read 70000 reviews
2019-01-24 18:20:02,310 : INFO : read 80000 reviews
2019-01-24 18:20:06,435 : INFO : read 90000 reviews
2019-01-24 18:20:10,150 : INFO : read 100000 reviews
2019-01-24 18:20:13,818 : INFO : read 110000 reviews
2019-01-24 18:20:17,023 : INFO : read 120000 reviews
2019-01-24 18:20:20,222 : INFO : read 130000 reviews
2019-01-24 18:20:24,225 : INFO : read 140000 reviews
2019-01-24 18:20:27,482 : INFO : read 150000 reviews
2019-01-24 18:20:30,041 : INFO : read 160000 reviews
2019-01-24 18:20:32,719

In [8]:
len(documents)

255404

In [9]:
documents[0]

['oct',
 'nice',
 'trendy',
 'hotel',
 'location',
 'not',
 'too',
 'bad',
 'stayed',
 'in',
 'this',
 'hotel',
 'for',
 'one',
 'night',
 'as',
 'this',
 'is',
 'fairly',
 'new',
 'place',
 'some',
 'of',
 'the',
 'taxi',
 'drivers',
 'did',
 'not',
 'know',
 'where',
 'it',
 'was',
 'and',
 'or',
 'did',
 'not',
 'want',
 'to',
 'drive',
 'there',
 'once',
 'have',
 'eventually',
 'arrived',
 'at',
 'the',
 'hotel',
 'was',
 'very',
 'pleasantly',
 'surprised',
 'with',
 'the',
 'decor',
 'of',
 'the',
 'lobby',
 'ground',
 'floor',
 'area',
 'it',
 'was',
 'very',
 'stylish',
 'and',
 'modern',
 'found',
 'the',
 'reception',
 'staff',
 'geeting',
 'me',
 'with',
 'aloha',
 'bit',
 'out',
 'of',
 'place',
 'but',
 'guess',
 'they',
 'are',
 'briefed',
 'to',
 'say',
 'that',
 'to',
 'keep',
 'up',
 'the',
 'coroporate',
 'image',
 'as',
 'have',
 'starwood',
 'preferred',
 'guest',
 'member',
 'was',
 'given',
 'small',
 'gift',
 'upon',
 'check',
 'in',
 'it',
 'was',
 'only',
 'co

In [4]:
model = gensim.models.Word2Vec (documents, size=150, window=10, min_count=2, workers=10)
model.train(documents,total_examples=len(documents),epochs=10)

2019-01-24 18:21:00,452 : INFO : collecting all words and their counts
2019-01-24 18:21:00,454 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2019-01-24 18:21:00,942 : INFO : PROGRESS: at sentence #10000, processed 1655714 words, keeping 25777 word types
2019-01-24 18:21:01,419 : INFO : PROGRESS: at sentence #20000, processed 3317863 words, keeping 35016 word types
2019-01-24 18:21:02,010 : INFO : PROGRESS: at sentence #30000, processed 5264072 words, keeping 47518 word types
2019-01-24 18:21:02,517 : INFO : PROGRESS: at sentence #40000, processed 7081746 words, keeping 56675 word types
2019-01-24 18:21:03,125 : INFO : PROGRESS: at sentence #50000, processed 9089491 words, keeping 63744 word types
2019-01-24 18:21:03,723 : INFO : PROGRESS: at sentence #60000, processed 11013723 words, keeping 76781 word types
2019-01-24 18:21:04,187 : INFO : PROGRESS: at sentence #70000, processed 12637525 words, keeping 83194 word types
2019-01-24 18:21:04,620 : INFO : PROG

2019-01-24 18:21:53,739 : INFO : EPOCH 1 - PROGRESS: at 74.09% examples, 602221 words/s, in_qsize 17, out_qsize 2
2019-01-24 18:21:54,743 : INFO : EPOCH 1 - PROGRESS: at 75.76% examples, 600395 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:21:55,752 : INFO : EPOCH 1 - PROGRESS: at 77.41% examples, 598531 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:21:56,753 : INFO : EPOCH 1 - PROGRESS: at 79.29% examples, 597948 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:21:57,801 : INFO : EPOCH 1 - PROGRESS: at 81.12% examples, 596727 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:21:58,804 : INFO : EPOCH 1 - PROGRESS: at 83.26% examples, 597406 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:21:59,807 : INFO : EPOCH 1 - PROGRESS: at 85.16% examples, 597654 words/s, in_qsize 18, out_qsize 1
2019-01-24 18:22:00,852 : INFO : EPOCH 1 - PROGRESS: at 87.38% examples, 597700 words/s, in_qsize 20, out_qsize 0
2019-01-24 18:22:01,853 : INFO : EPOCH 1 - PROGRESS: at 89.63% examples, 598364 words/s,

2019-01-24 18:22:58,333 : INFO : EPOCH 2 - PROGRESS: at 96.71% examples, 574661 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:22:59,362 : INFO : EPOCH 2 - PROGRESS: at 98.87% examples, 575071 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:22:59,823 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-01-24 18:22:59,844 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-01-24 18:22:59,850 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-01-24 18:22:59,878 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-01-24 18:22:59,896 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-01-24 18:22:59,902 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-01-24 18:22:59,920 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-01-24 18:22:59,929 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-01-24 18:22:59,944 : INFO : worker thre

2019-01-24 18:23:54,939 : INFO : EPOCH 4 - PROGRESS: at 9.88% examples, 611726 words/s, in_qsize 18, out_qsize 1
2019-01-24 18:23:55,983 : INFO : EPOCH 4 - PROGRESS: at 11.54% examples, 607175 words/s, in_qsize 18, out_qsize 1
2019-01-24 18:23:57,035 : INFO : EPOCH 4 - PROGRESS: at 13.30% examples, 605409 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:23:58,052 : INFO : EPOCH 4 - PROGRESS: at 14.87% examples, 598096 words/s, in_qsize 16, out_qsize 3
2019-01-24 18:23:59,058 : INFO : EPOCH 4 - PROGRESS: at 16.75% examples, 603964 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:24:00,059 : INFO : EPOCH 4 - PROGRESS: at 18.45% examples, 605417 words/s, in_qsize 20, out_qsize 0
2019-01-24 18:24:01,064 : INFO : EPOCH 4 - PROGRESS: at 20.08% examples, 607893 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:24:02,086 : INFO : EPOCH 4 - PROGRESS: at 21.86% examples, 603927 words/s, in_qsize 18, out_qsize 1
2019-01-24 18:24:03,136 : INFO : EPOCH 4 - PROGRESS: at 23.41% examples, 601725 words/s, 

2019-01-24 18:24:59,274 : INFO : EPOCH 5 - PROGRESS: at 42.28% examples, 627364 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:25:00,276 : INFO : EPOCH 5 - PROGRESS: at 44.73% examples, 629253 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:25:01,284 : INFO : EPOCH 5 - PROGRESS: at 46.81% examples, 627676 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:25:02,289 : INFO : EPOCH 5 - PROGRESS: at 48.90% examples, 626394 words/s, in_qsize 20, out_qsize 0
2019-01-24 18:25:03,293 : INFO : EPOCH 5 - PROGRESS: at 51.16% examples, 627673 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:25:04,343 : INFO : EPOCH 5 - PROGRESS: at 53.38% examples, 628833 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:25:05,345 : INFO : EPOCH 5 - PROGRESS: at 55.87% examples, 631222 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:25:06,352 : INFO : EPOCH 5 - PROGRESS: at 58.02% examples, 631203 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:25:07,360 : INFO : EPOCH 5 - PROGRESS: at 60.21% examples, 631237 words/s,

2019-01-24 18:26:00,297 : INFO : EPOCH 1 - PROGRESS: at 68.02% examples, 602227 words/s, in_qsize 17, out_qsize 2
2019-01-24 18:26:01,303 : INFO : EPOCH 1 - PROGRESS: at 70.23% examples, 604604 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:26:02,357 : INFO : EPOCH 1 - PROGRESS: at 72.53% examples, 605954 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:26:03,377 : INFO : EPOCH 1 - PROGRESS: at 75.07% examples, 609000 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:26:04,414 : INFO : EPOCH 1 - PROGRESS: at 77.19% examples, 610366 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:26:05,414 : INFO : EPOCH 1 - PROGRESS: at 79.36% examples, 611849 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:26:06,416 : INFO : EPOCH 1 - PROGRESS: at 81.52% examples, 613419 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:26:07,460 : INFO : EPOCH 1 - PROGRESS: at 83.84% examples, 614650 words/s, in_qsize 20, out_qsize 0
2019-01-24 18:26:08,465 : INFO : EPOCH 1 - PROGRESS: at 86.04% examples, 616149 words/s,

2019-01-24 18:27:01,666 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-01-24 18:27:01,674 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-01-24 18:27:01,681 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-01-24 18:27:01,689 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-01-24 18:27:01,694 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-01-24 18:27:01,720 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-01-24 18:27:01,722 : INFO : EPOCH - 2 : training on 41519355 raw words (30350993 effective words) took 47.2s, 642981 effective words/s
2019-01-24 18:27:02,741 : INFO : EPOCH 3 - PROGRESS: at 1.97% examples, 615723 words/s, in_qsize 20, out_qsize 0
2019-01-24 18:27:03,744 : INFO : EPOCH 3 - PROGRESS: at 4.30% examples, 656338 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:27:04,748 : INFO : EPOCH 3 - PROGRESS: at 6.57% examples, 671703 words/s, 

2019-01-24 18:28:01,407 : INFO : EPOCH 4 - PROGRESS: at 23.38% examples, 657800 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:28:02,416 : INFO : EPOCH 4 - PROGRESS: at 25.26% examples, 656499 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:28:03,418 : INFO : EPOCH 4 - PROGRESS: at 27.76% examples, 657008 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:28:04,427 : INFO : EPOCH 4 - PROGRESS: at 30.09% examples, 657465 words/s, in_qsize 19, out_qsize 5
2019-01-24 18:28:05,451 : INFO : EPOCH 4 - PROGRESS: at 32.71% examples, 659995 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:28:06,457 : INFO : EPOCH 4 - PROGRESS: at 35.00% examples, 660925 words/s, in_qsize 20, out_qsize 4
2019-01-24 18:28:07,459 : INFO : EPOCH 4 - PROGRESS: at 37.38% examples, 661832 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:28:08,467 : INFO : EPOCH 4 - PROGRESS: at 39.64% examples, 660466 words/s, in_qsize 18, out_qsize 1
2019-01-24 18:28:09,498 : INFO : EPOCH 4 - PROGRESS: at 41.79% examples, 654619 words/s,

2019-01-24 18:29:06,324 : INFO : EPOCH 5 - PROGRESS: at 65.05% examples, 651571 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:29:07,337 : INFO : EPOCH 5 - PROGRESS: at 67.05% examples, 650405 words/s, in_qsize 14, out_qsize 5
2019-01-24 18:29:08,353 : INFO : EPOCH 5 - PROGRESS: at 69.41% examples, 651763 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:29:09,379 : INFO : EPOCH 5 - PROGRESS: at 71.59% examples, 651845 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:29:10,383 : INFO : EPOCH 5 - PROGRESS: at 73.95% examples, 652075 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:29:11,399 : INFO : EPOCH 5 - PROGRESS: at 76.03% examples, 652065 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:29:12,432 : INFO : EPOCH 5 - PROGRESS: at 78.25% examples, 653142 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:29:13,454 : INFO : EPOCH 5 - PROGRESS: at 80.47% examples, 653397 words/s, in_qsize 20, out_qsize 0
2019-01-24 18:29:14,463 : INFO : EPOCH 5 - PROGRESS: at 82.68% examples, 653459 words/s,

2019-01-24 18:30:11,434 : INFO : EPOCH 6 - PROGRESS: at 98.64% examples, 613628 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:30:11,981 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-01-24 18:30:12,023 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-01-24 18:30:12,031 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-01-24 18:30:12,034 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-01-24 18:30:12,036 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-01-24 18:30:12,042 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-01-24 18:30:12,045 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-01-24 18:30:12,060 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-01-24 18:30:12,066 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-01-24 18:30:12,068 : INFO : worker thread finished; awaiting f

2019-01-24 18:31:08,160 : INFO : EPOCH 8 - PROGRESS: at 15.02% examples, 600539 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:31:09,187 : INFO : EPOCH 8 - PROGRESS: at 16.97% examples, 607122 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:31:10,198 : INFO : EPOCH 8 - PROGRESS: at 18.57% examples, 604939 words/s, in_qsize 17, out_qsize 2
2019-01-24 18:31:11,254 : INFO : EPOCH 8 - PROGRESS: at 20.08% examples, 600894 words/s, in_qsize 20, out_qsize 0
2019-01-24 18:31:12,259 : INFO : EPOCH 8 - PROGRESS: at 22.04% examples, 604147 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:31:13,260 : INFO : EPOCH 8 - PROGRESS: at 23.49% examples, 600974 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:31:14,271 : INFO : EPOCH 8 - PROGRESS: at 25.08% examples, 597198 words/s, in_qsize 19, out_qsize 1
2019-01-24 18:31:15,271 : INFO : EPOCH 8 - PROGRESS: at 27.21% examples, 595904 words/s, in_qsize 18, out_qsize 1
2019-01-24 18:31:16,282 : INFO : EPOCH 8 - PROGRESS: at 29.63% examples, 599969 words/s,

2019-01-24 18:32:12,557 : INFO : EPOCH 9 - PROGRESS: at 52.80% examples, 644173 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:32:13,564 : INFO : EPOCH 9 - PROGRESS: at 55.16% examples, 645851 words/s, in_qsize 18, out_qsize 1
2019-01-24 18:32:14,582 : INFO : EPOCH 9 - PROGRESS: at 57.41% examples, 645109 words/s, in_qsize 18, out_qsize 1
2019-01-24 18:32:15,583 : INFO : EPOCH 9 - PROGRESS: at 59.59% examples, 644729 words/s, in_qsize 18, out_qsize 1
2019-01-24 18:32:16,587 : INFO : EPOCH 9 - PROGRESS: at 61.93% examples, 646145 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:32:17,608 : INFO : EPOCH 9 - PROGRESS: at 64.36% examples, 645574 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:32:18,622 : INFO : EPOCH 9 - PROGRESS: at 66.42% examples, 644851 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:32:19,680 : INFO : EPOCH 9 - PROGRESS: at 68.89% examples, 645981 words/s, in_qsize 18, out_qsize 1
2019-01-24 18:32:20,708 : INFO : EPOCH 9 - PROGRESS: at 70.90% examples, 645487 words/s,

2019-01-24 18:33:17,475 : INFO : EPOCH 10 - PROGRESS: at 80.64% examples, 576927 words/s, in_qsize 18, out_qsize 1
2019-01-24 18:33:18,479 : INFO : EPOCH 10 - PROGRESS: at 83.05% examples, 579980 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:33:19,480 : INFO : EPOCH 10 - PROGRESS: at 85.15% examples, 581921 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:33:20,512 : INFO : EPOCH 10 - PROGRESS: at 87.66% examples, 584229 words/s, in_qsize 20, out_qsize 2
2019-01-24 18:33:21,514 : INFO : EPOCH 10 - PROGRESS: at 89.91% examples, 585293 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:33:22,536 : INFO : EPOCH 10 - PROGRESS: at 92.27% examples, 586677 words/s, in_qsize 18, out_qsize 1
2019-01-24 18:33:23,540 : INFO : EPOCH 10 - PROGRESS: at 94.66% examples, 589382 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:33:24,546 : INFO : EPOCH 10 - PROGRESS: at 96.84% examples, 590207 words/s, in_qsize 19, out_qsize 0
2019-01-24 18:33:25,566 : INFO : EPOCH 10 - PROGRESS: at 99.11% examples, 591114

(303493179, 415193550)

In [6]:
#path = get_tmpfile("word2vec_rev.model")
model.save("word2vec.model")

2019-01-24 18:36:04,669 : INFO : saving Word2Vec object under word2vec.model, separately None
2019-01-24 18:36:04,671 : INFO : storing np array 'vectors' to word2vec.model.wv.vectors.npy
2019-01-24 18:36:04,814 : INFO : not storing attribute vectors_norm
2019-01-24 18:36:04,815 : INFO : storing np array 'syn1neg' to word2vec.model.trainables.syn1neg.npy
2019-01-24 18:36:04,960 : INFO : not storing attribute cum_table
2019-01-24 18:36:05,253 : INFO : saved word2vec.model


In [11]:
model_new = gensim.models.Word2Vec.load("word2vec.model")

2019-01-24 19:02:41,309 : INFO : loading Word2Vec object from word2vec.model
2019-01-24 19:02:41,591 : INFO : loading wv recursively from word2vec.model.wv.* with mmap=None
2019-01-24 19:02:41,592 : INFO : loading vectors from word2vec.model.wv.vectors.npy with mmap=None
2019-01-24 19:02:41,751 : INFO : setting ignored attribute vectors_norm to None
2019-01-24 19:02:41,753 : INFO : loading vocabulary recursively from word2vec.model.vocabulary.* with mmap=None
2019-01-24 19:02:41,755 : INFO : loading trainables recursively from word2vec.model.trainables.* with mmap=None
2019-01-24 19:02:41,756 : INFO : loading syn1neg from word2vec.model.trainables.syn1neg.npy with mmap=None
2019-01-24 19:02:41,890 : INFO : setting ignored attribute cum_table to None
2019-01-24 19:02:41,891 : INFO : loaded word2vec.model


In [12]:
w1 = "dirty"
model.wv.most_similar (positive=w1)

[('filthy', 0.8692750930786133),
 ('stained', 0.7698962092399597),
 ('unclean', 0.7692201137542725),
 ('dusty', 0.7628558874130249),
 ('smelly', 0.7557368278503418),
 ('grubby', 0.7452136278152466),
 ('dingy', 0.7277590036392212),
 ('disgusting', 0.7223472595214844),
 ('soiled', 0.7223440408706665),
 ('grimy', 0.7183251976966858)]

In [13]:
w1 = ["polite"]
model.wv.most_similar (positive=w1,topn=6)

[('courteous', 0.9189587831497192),
 ('friendly', 0.8329201936721802),
 ('cordial', 0.8024827837944031),
 ('professional', 0.7817876935005188),
 ('attentive', 0.7681726217269897),
 ('curteous', 0.7582568526268005)]

In [14]:
w1 = ["france"]
model.wv.most_similar (positive=w1,topn=6)

[('canada', 0.6476145386695862),
 ('spain', 0.6432772874832153),
 ('germany', 0.6296396255493164),
 ('barcelona', 0.6212668418884277),
 ('gaulle', 0.6191176772117615),
 ('england', 0.6107905507087708)]

In [15]:
w1 = ["shocked"]
model.wv.most_similar (positive=w1,topn=6)

[('horrified', 0.8068848848342896),
 ('amazed', 0.7898840308189392),
 ('stunned', 0.7681757211685181),
 ('astonished', 0.7673320174217224),
 ('appalled', 0.76075679063797),
 ('dismayed', 0.7502361536026001)]

In [16]:
w1 = ["bed",'sheet','pillow']
w2 = ['couch']
model.wv.most_similar (positive=w1,negative=w2,topn=10)

[('duvet', 0.6956197619438171),
 ('blanket', 0.6931217908859253),
 ('mattress', 0.6816356778144836),
 ('quilt', 0.6740881204605103),
 ('matress', 0.6473985910415649),
 ('pillowcase', 0.6471307277679443),
 ('coverlet', 0.6345054507255554),
 ('pillows', 0.6297971606254578),
 ('sheets', 0.6192577481269836),
 ('foam', 0.6144356727600098)]

In [17]:
model.wv.similarity(w1="dirty",w2="smelly")

0.75573677

In [18]:
model.wv.similarity(w1="dirty",w2="dirty")

1.0

In [19]:
model.wv.similarity(w1="dirty",w2="clean")

0.25410885

In [20]:
model.wv.doesnt_match(["cat","dog","france"])

'france'

In [21]:
model.wv.doesnt_match(["bed","pillow","duvet","shower"])

'shower'

In [24]:
model.predict_output_word(["cat","dog"])

[('cat', 0.40784225),
 ('barking', 0.29624984),
 ('swing', 0.19342239),
 ('matilda', 0.07314546),
 ('dog', 0.011240242),
 ('kennel', 0.009119239),
 ('dogs', 0.0014198059),
 ('peeve', 0.0008128989),
 ('cats', 0.0006184085),
 ('poop', 0.0004392112)]

In [27]:
vector = model.wv['pillow']  # numpy vector of a word
print(vector.shape)
vector

(150,)


array([-0.56824565,  0.6685691 ,  0.14243403,  1.1476588 ,  1.0893593 ,
        2.6202507 ,  0.9725516 , -0.9167326 ,  3.76899   , -0.11449307,
        0.32539964, -5.6854806 ,  0.7073057 , -2.8831625 , -0.09475641,
       -0.7659395 , -0.99904543, -3.8276749 ,  1.3647859 ,  3.1056736 ,
       -2.8734605 ,  2.4335964 ,  1.5020852 , -7.214404  , -1.48709   ,
       -1.7010206 , -3.4045622 , -2.2163882 ,  0.1604887 , -6.512992  ,
        2.818291  , -2.065619  ,  0.03880703,  1.8291322 ,  6.7831035 ,
       -0.7760299 , -2.7290337 , -3.174662  , -2.65267   , -1.1406963 ,
       -0.41191527,  0.22097304, -3.9633777 , -1.4580814 ,  0.21903144,
        1.671294  ,  0.6285273 ,  1.903788  ,  1.7688664 , -1.110936  ,
        2.5103934 , -1.2374666 , -3.3466825 ,  5.34845   ,  4.0317664 ,
        0.2728421 , -2.8600705 ,  2.2491212 , -3.1623528 ,  0.45840135,
        2.290354  , -3.4075727 , -0.87489134, -2.2255137 , -2.1741323 ,
       -1.5789785 , -2.4309692 ,  5.885373  , -0.6189368 ,  0.09