In [7]:
# https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/doc2vec-lee.ipynb
from collections import namedtuple

from gensim.summarization.textcleaner import get_sentences
from gensim.models.doc2vec import TaggedDocument, Doc2Vec
from gensim.utils import simple_preprocess
from gensim.test.utils import common_texts, get_tmpfile


class TaggedDocument(namedtuple('TaggedDocument', 'words tags original')):
    """ Based on gensim's TaggedDocument. "original" field is added. """
    # FIXME: Extend class to keep all methods.
    
    def __str__(self):
        return '%s(%s, %s)' % (self.__class__.__name__, self.words, self.tags)


def read_corpus(fname):
    with open(fname) as f:
        text = f.read()
        text = text.replace('\n', '')
        for i, sentence in enumerate(get_sentences(text)):
            yield TaggedDocument(simple_preprocess(sentence), [i], sentence)

In [8]:
train_corpus = list(read_corpus('book.txt'))

In [16]:
model = Doc2Vec(train_corpus, vector_size=50, min_count=2, epochs=40, train_lbls=False)
model.train(train_corpus, total_examples=model.corpus_count, epochs=model.epochs)
path = get_tmpfile("word2vec.model")
model.save("word2vec.model")

In [17]:
model = Doc2Vec.load("word2vec.model")

target_id = 0
print('Target: {}'.format(train_corpus[target_id].original))

similarity = {}
for i, sentence in enumerate(train_corpus):
    similarity[i] = model.docvecs.similarity(target_id, i)
    
sorted_similarity = sorted(similarity.items(), key=lambda item: item[1], reverse=True)

for i, value in sorted_similarity[:10]:
    print('[{}] {}'.format(value, train_corpus[i].original))

Target: A rough sketch of the circumstances which led up to my being taken aprisoner of war are more or less indispensable.
[1.0] A rough sketch of the circumstances which led up to my being taken aprisoner of war are more or less indispensable.
[0.8225002884864807] My own nurse was especially attentive,and I shall be eternally grateful to her.
[0.7567362189292908] My exemplary behaviour!
[0.7565072774887085] Gad!
[0.7550219297409058] Such a speech from a German secondlieutenant to a British or French senior officer was of coursedisgraceful.
[0.7475894093513489] General Terms of Use and Redistributing Project Gutenberg-tmelectronic works1.A.
[0.7413011193275452] Seeparagraph 1.C below.
[0.7401611804962158] “My God!
[0.7326911091804504] If you wish to charge a fee or distribute a Project Gutenberg-tmelectronic work or group of works on different terms than are setforth in this agreement, you must obtain permission in writing fromboth the Project Gutenberg Literary Archive Foundation and

  if np.issubdtype(vec.dtype, np.int):
