In [1]:
import pandas as pd
import numpy as np
import re
from tqdm import tqdm

from nltk.corpus import stopwords

from gensim.models import Word2Vec
from gensim.models import Phrases
from gensim.models.phrases import Phraser

from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

%matplotlib inline

In [5]:
ls

[0m[01;34mhotel-reviews[0m/     [01;31mhotel-reviews.csv.zip[0m  train_word2vec.ipynb
hotel-reviews.csv  [01;31mhotel-reviews.zip[0m


In [6]:
data = pd.read_csv("hotel-reviews.csv")

In [7]:
data.head()

Unnamed: 0,User_ID,Description,Browser_Used,Device_Used,Is_Response
0,id10326,The room was kind of clean but had a VERY stro...,Edge,Mobile,not happy
1,id10327,I stayed at the Crown Plaza April -- - April -...,Internet Explorer,Mobile,not happy
2,id10328,I booked this hotel through Hotwire at the low...,Mozilla,Tablet,not happy
3,id10329,Stayed here with husband and sons on the way t...,InternetExplorer,Desktop,happy
4,id10330,My girlfriends and I stayed here to celebrate ...,Edge,Tablet,not happy


In [9]:
stopWords = stopwords.words("english")

# pre processing data
def cleanData(sentence):
    processedList = ""
    
    # convert to lowercase, ignore all special characters - keep only alpha-numericals and spaces (not removing full-stop here)
    sentence = re.sub(r'[^A-Za-z0-9\s.]',r'',str(sentence).lower())
    sentence = re.sub(r'\n',r' ',sentence)
    
    # remove stop words
    sentence = " ".join([word for word in sentence.split() if word not in stopWords])
    
    return sentence

In [10]:
cleanData(data['Description'][2])

'booked hotel hotwire lowest price could find. got front desk manager gave us smoking room. argued little baby would booked room known smoking. manager would hear anything told hotwire books cheapest rooms available. get go unhappy. great deal persuasion discussion got nonsmoking room. thereafter room minimal amenities. besides great location near dupont circle much say overpriced hotel. room small ok condition. bathroom small tub. bathroom amenities also minimal. fridge microwave rent fridge staff keep baby things. parking costs per day best drive here. also breakfast included. lobby small feels old. thing available coffee lobby decent. poor service minimal amenities small rooms small bathrooms view great location. distance metro either mcpherson station dupont station. try look better available.'

In [20]:
data["Description"] = data["Description"].map(lambda x : cleanData(x))

# Word2Vec

Going to use **gensim library** to train word2vec model. Gensim accepts input in form of list of lists, where each internal list consists of review sentence.

Each review in our data may have more than one sentence. We'll split each sentence and create a list of sentences to pass it to gensim.

In [21]:
tmp_corpus = data["Description"].map(lambda x : x.split("."))

In [24]:
tmp_corpus[2]

['booked hotel hotwire lowest price could find',
 ' got front desk manager gave us smoking room',
 ' argued little baby would booked room known smoking',
 ' manager would hear anything told hotwire books cheapest rooms available',
 ' get go unhappy',
 ' great deal persuasion discussion got nonsmoking room',
 ' thereafter room minimal amenities',
 ' besides great location near dupont circle much say overpriced hotel',
 ' room small ok condition',
 ' bathroom small tub',
 ' bathroom amenities also minimal',
 ' fridge microwave rent fridge staff keep baby things',
 ' parking costs per day best drive here',
 ' also breakfast included',
 ' lobby small feels old',
 ' thing available coffee lobby decent',
 ' poor service minimal amenities small rooms small bathrooms view great location',
 ' distance metro either mcpherson station dupont station',
 ' try look better available',
 '']

In [25]:
# corpus [[w1,w2,w3..],[..]]   #tqdm shows progress bar
corpus = []
for i in tqdm(range(len(tmp_corpus))):
    for line in tmp_corpus[i]:
        words = [x for x in line.split()]
        corpus.append(words)

100%|██████████| 38932/38932 [00:01<00:00, 24199.55it/s]


In [26]:
num_of_sentences = len(corpus)
num_of_words = 0
for line in corpus:
    num_of_words += len(line)

print('Num of sentences - %s'%(num_of_sentences))
print('Num of words - %s'%(num_of_words))

Num of sentences - 444271
Num of words - 3111121


**We'll do a little more preprocessing here by extracting phrases from the corpus. For example, new york -> new_york, etc. We'll limit to only bigrams.** 

In [27]:
phrases = Phrases(sentences=corpus,min_count=25,threshold=50)
bigram = Phraser(phrases)

2018-09-22 13:01:41,874 : INFO : collecting all words and their counts
2018-09-22 13:01:41,879 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2018-09-22 13:01:42,034 : INFO : PROGRESS: at sentence #10000, processed 70801 words and 55905 word types
2018-09-22 13:01:42,159 : INFO : PROGRESS: at sentence #20000, processed 141274 words and 98827 word types
2018-09-22 13:01:42,284 : INFO : PROGRESS: at sentence #30000, processed 212516 words and 137942 word types
2018-09-22 13:01:42,409 : INFO : PROGRESS: at sentence #40000, processed 282042 words and 173306 word types
2018-09-22 13:01:42,540 : INFO : PROGRESS: at sentence #50000, processed 351277 words and 206238 word types
2018-09-22 13:01:42,670 : INFO : PROGRESS: at sentence #60000, processed 421236 words and 238626 word types
2018-09-22 13:01:42,805 : INFO : PROGRESS: at sentence #70000, processed 491663 words and 270170 word types
2018-09-22 13:01:42,945 : INFO : PROGRESS: at sentence #80000, processed 563363 wo

In [29]:
for index,sentence in tqdm(enumerate(corpus)):
    corpus[index] = bigram[sentence]

444271it [00:10, 43109.45it/s]


In [30]:
# shuffle corpus
def shuffle_corpus(sentences):
    shuffled = list(sentences)
    random.shuffle(shuffled)
    return shuffled

In [31]:
# sg - skip gram |  window = size of the window | size = vector dimension
size = 100
window_size = 2 # sentences weren't too long, so
epochs = 100
min_count = 2
workers = 4

# train word2vec model using gensim
# model = Word2Vec(corpus, sg=1,window=window_size,size=size,
#                  min_count=min_count,workers=workers,iter=epochs,sample=0.01)

2018-09-22 13:05:01,569 : INFO : collecting all words and their counts
2018-09-22 13:05:01,573 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2018-09-22 13:05:01,602 : INFO : PROGRESS: at sentence #10000, processed 68251 words, keeping 8812 word types
2018-09-22 13:05:01,634 : INFO : PROGRESS: at sentence #20000, processed 136136 words, keeping 12451 word types
2018-09-22 13:05:01,656 : INFO : PROGRESS: at sentence #30000, processed 204641 words, keeping 15230 word types
2018-09-22 13:05:01,681 : INFO : PROGRESS: at sentence #40000, processed 271576 words, keeping 17587 word types
2018-09-22 13:05:01,710 : INFO : PROGRESS: at sentence #50000, processed 338157 words, keeping 19539 word types
2018-09-22 13:05:01,734 : INFO : PROGRESS: at sentence #60000, processed 405603 words, keeping 21363 word types
2018-09-22 13:05:01,760 : INFO : PROGRESS: at sentence #70000, processed 473392 words, keeping 23191 word types
2018-09-22 13:05:01,784 : INFO : PROGRESS: at se

2018-09-22 13:05:15,993 : INFO : EPOCH 3 - PROGRESS: at 63.95% examples, 628108 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:05:16,998 : INFO : EPOCH 3 - PROGRESS: at 87.78% examples, 645621 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:05:17,468 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-09-22 13:05:17,489 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-09-22 13:05:17,501 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-09-22 13:05:17,502 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-09-22 13:05:17,502 : INFO : EPOCH - 3 : training on 2995867 raw words (2963903 effective words) took 4.5s, 653960 effective words/s
2018-09-22 13:05:18,515 : INFO : EPOCH 4 - PROGRESS: at 14.34% examples, 424545 words/s, in_qsize 8, out_qsize 0
2018-09-22 13:05:19,522 : INFO : EPOCH 4 - PROGRESS: at 34.31% examples, 507140 words/s, in_qsize 7, out_qsize 1
2018-09-22 13:05:20,547 : INFO : EPOCH 4 - PROG

2018-09-22 13:05:56,780 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-09-22 13:05:56,794 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-09-22 13:05:56,813 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-09-22 13:05:56,823 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-09-22 13:05:56,825 : INFO : EPOCH - 11 : training on 2995867 raw words (2963903 effective words) took 4.6s, 637533 effective words/s
2018-09-22 13:05:57,844 : INFO : EPOCH 12 - PROGRESS: at 22.64% examples, 667121 words/s, in_qsize 8, out_qsize 0
2018-09-22 13:05:58,846 : INFO : EPOCH 12 - PROGRESS: at 46.00% examples, 678893 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:05:59,856 : INFO : EPOCH 12 - PROGRESS: at 69.75% examples, 684142 words/s, in_qsize 6, out_qsize 1
2018-09-22 13:06:00,876 : INFO : EPOCH 12 - PROGRESS: at 87.79% examples, 643640 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:06:01,540 : INFO : worker th

2018-09-22 13:06:36,967 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-09-22 13:06:36,968 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-09-22 13:06:36,979 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-09-22 13:06:36,979 : INFO : EPOCH - 19 : training on 2995867 raw words (2963903 effective words) took 5.6s, 531044 effective words/s
2018-09-22 13:06:38,009 : INFO : EPOCH 20 - PROGRESS: at 21.64% examples, 641695 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:06:39,015 : INFO : EPOCH 20 - PROGRESS: at 43.66% examples, 645031 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:06:40,043 : INFO : EPOCH 20 - PROGRESS: at 65.66% examples, 641693 words/s, in_qsize 8, out_qsize 1
2018-09-22 13:06:41,047 : INFO : EPOCH 20 - PROGRESS: at 89.75% examples, 658397 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:06:41,425 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-09-22 13:06:41,441 : INFO : worker th

2018-09-22 13:07:20,729 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-09-22 13:07:20,751 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-09-22 13:07:20,761 : INFO : EPOCH 27 - PROGRESS: at 99.76% examples, 581189 words/s, in_qsize 1, out_qsize 1
2018-09-22 13:07:20,761 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-09-22 13:07:20,771 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-09-22 13:07:20,771 : INFO : EPOCH - 27 : training on 2995867 raw words (2963903 effective words) took 5.1s, 581414 effective words/s
2018-09-22 13:07:21,779 : INFO : EPOCH 28 - PROGRESS: at 16.94% examples, 504217 words/s, in_qsize 8, out_qsize 0
2018-09-22 13:07:22,782 : INFO : EPOCH 28 - PROGRESS: at 32.33% examples, 478683 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:07:23,785 : INFO : EPOCH 28 - PROGRESS: at 44.67% examples, 440746 words/s, in_qsize 8, out_qsize 0
2018-09-22 13:07:24,797 : INFO : EPOCH 28 

2018-09-22 13:07:58,209 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-09-22 13:07:58,234 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-09-22 13:07:58,242 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-09-22 13:07:58,244 : INFO : EPOCH - 35 : training on 2995867 raw words (2963903 effective words) took 6.0s, 494476 effective words/s
2018-09-22 13:07:59,273 : INFO : EPOCH 36 - PROGRESS: at 15.94% examples, 466722 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:08:00,275 : INFO : EPOCH 36 - PROGRESS: at 39.35% examples, 577934 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:08:01,291 : INFO : EPOCH 36 - PROGRESS: at 63.27% examples, 619049 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:08:02,304 : INFO : EPOCH 36 - PROGRESS: at 87.78% examples, 642384 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:08:02,793 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-09-22 13:08:02,802 : INFO : worker th

2018-09-22 13:08:40,598 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-09-22 13:08:40,613 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-09-22 13:08:40,622 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-09-22 13:08:40,624 : INFO : EPOCH - 43 : training on 2995867 raw words (2963903 effective words) took 4.9s, 604965 effective words/s
2018-09-22 13:08:41,650 : INFO : EPOCH 44 - PROGRESS: at 15.32% examples, 450499 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:08:42,659 : INFO : EPOCH 44 - PROGRESS: at 38.71% examples, 568062 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:08:43,668 : INFO : EPOCH 44 - PROGRESS: at 61.93% examples, 607476 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:08:44,671 : INFO : EPOCH 44 - PROGRESS: at 86.15% examples, 632878 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:08:45,216 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-09-22 13:08:45,226 : INFO : worker th

2018-09-22 13:09:20,084 : INFO : EPOCH 52 - PROGRESS: at 71.50% examples, 696181 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:09:21,118 : INFO : EPOCH 52 - PROGRESS: at 85.46% examples, 621407 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:09:21,784 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-09-22 13:09:21,821 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-09-22 13:09:21,848 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-09-22 13:09:21,856 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-09-22 13:09:21,857 : INFO : EPOCH - 52 : training on 2995867 raw words (2963903 effective words) took 4.8s, 615836 effective words/s
2018-09-22 13:09:22,921 : INFO : EPOCH 53 - PROGRESS: at 13.99% examples, 401439 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:09:23,934 : INFO : EPOCH 53 - PROGRESS: at 37.35% examples, 540711 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:09:24,944 : INFO : EPOCH 53 

2018-09-22 13:10:01,176 : INFO : EPOCH 60 - PROGRESS: at 68.73% examples, 674019 words/s, in_qsize 8, out_qsize 0
2018-09-22 13:10:02,188 : INFO : EPOCH 60 - PROGRESS: at 93.05% examples, 683838 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:10:02,436 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-09-22 13:10:02,459 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-09-22 13:10:02,460 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-09-22 13:10:02,465 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-09-22 13:10:02,466 : INFO : EPOCH - 60 : training on 2995867 raw words (2963903 effective words) took 4.3s, 687318 effective words/s
2018-09-22 13:10:03,492 : INFO : EPOCH 61 - PROGRESS: at 23.65% examples, 692965 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:10:04,503 : INFO : EPOCH 61 - PROGRESS: at 48.01% examples, 703518 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:10:05,522 : INFO : EPOCH 61 

2018-09-22 13:10:38,502 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-09-22 13:10:38,504 : INFO : EPOCH - 68 : training on 2995867 raw words (2963903 effective words) took 4.2s, 700846 effective words/s
2018-09-22 13:10:39,526 : INFO : EPOCH 69 - PROGRESS: at 23.31% examples, 683025 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:10:40,537 : INFO : EPOCH 69 - PROGRESS: at 47.65% examples, 698637 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:10:41,547 : INFO : EPOCH 69 - PROGRESS: at 71.81% examples, 700615 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:10:42,568 : INFO : EPOCH 69 - PROGRESS: at 95.03% examples, 694917 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:10:42,728 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-09-22 13:10:42,738 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-09-22 13:10:42,759 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-09-22 13:10:42,765 : INFO : worker th

2018-09-22 13:11:18,655 : INFO : EPOCH 77 - PROGRESS: at 70.46% examples, 691913 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:11:19,665 : INFO : EPOCH 77 - PROGRESS: at 93.71% examples, 690313 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:11:20,024 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-09-22 13:11:20,033 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-09-22 13:11:20,043 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-09-22 13:11:20,049 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-09-22 13:11:20,050 : INFO : EPOCH - 77 : training on 2995867 raw words (2963903 effective words) took 4.4s, 672094 effective words/s
2018-09-22 13:11:21,078 : INFO : EPOCH 78 - PROGRESS: at 19.26% examples, 563842 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:11:22,081 : INFO : EPOCH 78 - PROGRESS: at 40.00% examples, 587404 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:11:23,093 : INFO : EPOCH 78 

2018-09-22 13:11:58,244 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-09-22 13:11:58,244 : INFO : EPOCH - 85 : training on 2995867 raw words (2963903 effective words) took 5.2s, 567153 effective words/s
2018-09-22 13:11:59,274 : INFO : EPOCH 86 - PROGRESS: at 19.91% examples, 585069 words/s, in_qsize 6, out_qsize 1
2018-09-22 13:12:00,277 : INFO : EPOCH 86 - PROGRESS: at 39.35% examples, 578468 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:12:01,292 : INFO : EPOCH 86 - PROGRESS: at 58.29% examples, 570655 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:12:02,303 : INFO : EPOCH 86 - PROGRESS: at 77.18% examples, 564948 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:12:03,219 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-09-22 13:12:03,233 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-09-22 13:12:03,260 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-09-22 13:12:03,261 : INFO : worker th

2018-09-22 13:12:38,766 : INFO : EPOCH 94 - PROGRESS: at 60.91% examples, 596591 words/s, in_qsize 8, out_qsize 0
2018-09-22 13:12:39,772 : INFO : EPOCH 94 - PROGRESS: at 81.08% examples, 594818 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:12:40,636 : INFO : worker thread finished; awaiting finish of 3 more threads
2018-09-22 13:12:40,647 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-09-22 13:12:40,651 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-09-22 13:12:40,653 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-09-22 13:12:40,654 : INFO : EPOCH - 94 : training on 2995867 raw words (2963903 effective words) took 4.9s, 602269 effective words/s
2018-09-22 13:12:41,673 : INFO : EPOCH 95 - PROGRESS: at 19.59% examples, 583230 words/s, in_qsize 6, out_qsize 1
2018-09-22 13:12:42,681 : INFO : EPOCH 95 - PROGRESS: at 39.03% examples, 576083 words/s, in_qsize 7, out_qsize 0
2018-09-22 13:12:43,686 : INFO : EPOCH 95 

In [33]:
# model.build_vocab(sentences=shuffle_corpus(corpus),update=True)

# for i in range(5):
#     model.train(sentences=shuffle_corpus(corpus),epochs=50,total_examples=model.corpus_count)

In [34]:
# save model
# model.save('hotel_reviews_w2v_model')

2018-09-22 13:13:54,818 : INFO : saving Word2Vec object under hotel_reviews_w2v_model, separately None
2018-09-22 13:13:54,823 : INFO : not storing attribute vectors_norm
2018-09-22 13:13:54,828 : INFO : not storing attribute cum_table
2018-09-22 13:13:55,092 : INFO : saved hotel_reviews_w2v_model


In [36]:
# load word2vec model
model = Word2Vec.load('hotel_reviews_w2v_model')

2018-09-22 13:14:21,830 : INFO : loading Word2Vec object from hotel_reviews_w2v_model
2018-09-22 13:14:22,067 : INFO : loading wv recursively from hotel_reviews_w2v_model.wv.* with mmap=None
2018-09-22 13:14:22,068 : INFO : setting ignored attribute vectors_norm to None
2018-09-22 13:14:22,068 : INFO : loading vocabulary recursively from hotel_reviews_w2v_model.vocabulary.* with mmap=None
2018-09-22 13:14:22,069 : INFO : loading trainables recursively from hotel_reviews_w2v_model.trainables.* with mmap=None
2018-09-22 13:14:22,070 : INFO : setting ignored attribute cum_table to None
2018-09-22 13:14:22,071 : INFO : loaded hotel_reviews_w2v_model


#  Using model

In [37]:
model.most_similar(positive=['woman', 'king'], negative=['man'], topn=1)

  """Entry point for launching an IPython kernel.
2018-09-22 13:14:41,754 : INFO : precomputing L2-norms of word weight vectors
  if np.issubdtype(vec.dtype, np.int):


[('queen', 0.7110299468040466)]

In [38]:
model.doesnt_match('hotel nice clean holiday'.split())

  """Entry point for launching an IPython kernel.
  if np.issubdtype(vec.dtype, np.int):


'holiday'

In [39]:
model.similarity('dinner','lunch')

  """Entry point for launching an IPython kernel.
  if np.issubdtype(vec.dtype, np.int):


0.76692426

In [41]:
model.most_similar('food')

  """Entry point for launching an IPython kernel.
  if np.issubdtype(vec.dtype, np.int):


[('meals', 0.6542510390281677),
 ('matto', 0.6398987770080566),
 ('foods', 0.6317042112350464),
 ('meal', 0.6284471750259399),
 ('tasty', 0.60737544298172),
 ('atoll', 0.592156171798706),
 ('pastas', 0.5831188559532166),
 ('nile', 0.5758324265480042),
 ('nutritious', 0.5757604241371155),
 ('staffgood', 0.5736162662506104)]

In [42]:
model.wv['hotel']

array([-0.21407582,  0.15739445, -0.10329814,  0.3981375 , -0.14881004,
        0.320674  , -0.16944902,  0.09859808, -0.26828048, -0.12704675,
        0.06020573, -0.05732861,  0.18150927, -0.1539523 , -0.47318745,
       -0.34647158,  0.04417315, -0.13315365, -0.21100152,  0.5085535 ,
       -0.01224163,  0.2504834 ,  0.10718042, -0.30854124,  0.38876066,
       -0.15223077, -0.31235757,  0.39786354,  0.30793688, -0.32732442,
       -0.3485975 ,  0.21008287,  0.10110498, -0.21678847,  0.05093117,
       -0.07201957, -0.28647166, -0.0811832 ,  0.21155277,  0.06419438,
       -0.30983937,  0.2392125 , -0.296849  , -0.0018551 ,  0.09599836,
       -0.27217454,  0.06335046, -0.39513707, -0.0400341 , -0.23345749,
        0.05474209, -0.3861522 , -0.33149615,  0.02692843,  0.26644826,
        0.1404996 ,  0.04800099,  0.37454945,  0.33229715,  0.17175405,
        0.07764574,  0.16153583, -0.37697798, -0.13939062,  0.2111367 ,
       -0.29558524,  0.2614005 ,  0.38711253,  0.35019726, -0.20