ref:
https://github.com/RaRe-Technologies/gensim/blob/920c029ca97f961c8df264672c34936607876694/docs/notebooks/Poincare%20Tutorial.ipynb

In [2]:
import os
import logging
from tqdm import tqdm_notebook as tqdm
import numpy as np
from nltk.corpus import wordnet as wn
from gensim.models.poincare import PoincareModel, PoincareKeyedVectors, PoincareRelations
logging.basicConfig(level=logging.INFO)

In [3]:
PoincareModel(train_data=[('node.1', 'node.2'), ('node.2', 'node.3')])

INFO:gensim.models.poincare:Loading relations from train data..
INFO:gensim.models.poincare:Loaded 2 relations from train data, 3 nodes


<gensim.models.poincare.PoincareModel at 0x1148de7f0>

In [4]:
words = wn.words()

In [28]:
taxi_synets = wn.synsets("dog", pos="n")
taxi_synets

[Synset('dog.n.01'),
 Synset('frump.n.01'),
 Synset('dog.n.03'),
 Synset('cad.n.01'),
 Synset('frank.n.02'),
 Synset('pawl.n.01'),
 Synset('andiron.n.01')]

In [29]:
taxi_synets[0].hypernyms()

[Synset('canine.n.02'), Synset('domestic_animal.n.01')]

In [30]:
taxi_synets[0].hypernym_paths()

[[Synset('entity.n.01'),
  Synset('physical_entity.n.01'),
  Synset('object.n.01'),
  Synset('whole.n.02'),
  Synset('living_thing.n.01'),
  Synset('organism.n.01'),
  Synset('animal.n.01'),
  Synset('chordate.n.01'),
  Synset('vertebrate.n.01'),
  Synset('mammal.n.01'),
  Synset('placental.n.01'),
  Synset('carnivore.n.01'),
  Synset('canine.n.02'),
  Synset('dog.n.01')],
 [Synset('entity.n.01'),
  Synset('physical_entity.n.01'),
  Synset('object.n.01'),
  Synset('whole.n.02'),
  Synset('living_thing.n.01'),
  Synset('organism.n.01'),
  Synset('animal.n.01'),
  Synset('domestic_animal.n.01'),
  Synset('dog.n.01')]]

In [8]:
nouns = set([])
for word in tqdm(words):
    nouns.update(wn.synsets(word, pos='n'))




In [26]:
len(nouns)

82115

In [69]:
hypernyms = set([])
for noun in nouns:
    for path in noun.hypernym_paths():
        hypernyms.update( (noun, hypernym) for hypernym in path[1:] if hypernym.pos() == 'n')
with open("hypernyms.tsv", 'w') as f:
        for n1, n2 in hypernyms:
            print(n1.name(), n2.name(), sep="\t", file=f)

In [58]:
mammals = []
mammal_tree = []
target = wn.synset("mammal.n.01")
for noun in nouns:
    for path in noun.hypernym_paths():
        if target in path:
            pos = path.index(target)
            for i in range(pos, len(path)-1):
                mammals.append((noun, path[i]))
                mammal_tree.append((noun.name(), path[i].name()))
with open("mammals.tsv", 'w') as f:
        for n1, n2 in mammals:
            print(n1.name(), n2.name(), sep="\t", file=f)

In [59]:
len(mammals)

6575

In [60]:
relations = PoincareRelations(file_path="mammals.tsv", delimiter='\t')

In [63]:
model = PoincareModel(train_data=relations, size=2, burn_in=0)
model.train(epochs=100, print_every=500)

INFO:gensim.models.poincare:Loading relations from train data..
INFO:gensim.models.poincare:Loaded 6575 relations from train data, 1182 nodes
INFO:gensim.models.poincare:training model of size 2 with 1 workers on 6575 relations for 100 epochs and 0 burn-in epochs, using lr=0.10000 burn-in lr=0.01000 negative=10
INFO:gensim.models.poincare:Starting training (100 epochs)----------------------------------------
INFO:gensim.models.poincare:Training on epoch 1, examples #4990-#5000, loss: 23.85
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.75 s, 6701.37 examples / s
INFO:gensim.models.poincare:Training on epoch 2, examples #4990-#5000, loss: 22.97
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.72 s, 6908.86 examples / s
INFO:gensim.models.poincare:Training on epoch 3, examples #4990-#5000, loss: 21.64
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.67 s, 7459.20 examples / s
INFO:gensim.models.poincare:Training on epoch 4, examples #4990-#5000, loss: 2

INFO:gensim.models.poincare:Training on epoch 47, examples #4990-#5000, loss: 10.12
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.68 s, 7385.45 examples / s
INFO:gensim.models.poincare:Training on epoch 48, examples #4990-#5000, loss: 10.00
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.72 s, 6917.05 examples / s
INFO:gensim.models.poincare:Training on epoch 49, examples #4990-#5000, loss: 9.97
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.72 s, 6956.67 examples / s
INFO:gensim.models.poincare:Training on epoch 50, examples #4990-#5000, loss: 9.98
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.80 s, 6268.14 examples / s
INFO:gensim.models.poincare:Training on epoch 51, examples #4990-#5000, loss: 9.83
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.79 s, 6357.50 examples / s
INFO:gensim.models.poincare:Training on epoch 52, examples #4990-#5000, loss: 9.68
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.73 s,

INFO:gensim.models.poincare:Time taken for 5000 examples: 0.66 s, 7550.89 examples / s
INFO:gensim.models.poincare:Training on epoch 96, examples #4990-#5000, loss: 6.95
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.76 s, 6612.96 examples / s
INFO:gensim.models.poincare:Training on epoch 97, examples #4990-#5000, loss: 7.21
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.76 s, 6604.55 examples / s
INFO:gensim.models.poincare:Training on epoch 98, examples #4990-#5000, loss: 6.89
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.76 s, 6591.73 examples / s
INFO:gensim.models.poincare:Training on epoch 99, examples #4990-#5000, loss: 6.92
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.85 s, 5873.63 examples / s
INFO:gensim.models.poincare:Training on epoch 100, examples #4990-#5000, loss: 6.86
INFO:gensim.models.poincare:Time taken for 5000 examples: 0.80 s, 6238.53 examples / s
INFO:gensim.models.poincare:Training finished


In [64]:
model.save('./mammal_test_model')
model=PoincareModel.load('./mammal_test_model')
model.kv.save_word2vec_format('./mammal_test_vectors')
PoincareKeyedVectors.load_word2vec_format('./mammal_test_vectors')

INFO:gensim.utils:saving PoincareModel object under ./mammal_test_model, separately None
INFO:gensim.utils:saved ./mammal_test_model
INFO:gensim.utils:loading PoincareModel object from ./mammal_test_model
INFO:gensim.utils:loading kv recursively from ./mammal_test_model.kv.* with mmap=None
INFO:gensim.utils:loaded ./mammal_test_model
INFO:gensim.models.keyedvectors:storing 1182x2 projection weights into ./mammal_test_vectors
INFO:gensim.models.keyedvectors:loading projection weights from ./mammal_test_vectors
INFO:gensim.models.keyedvectors:loaded (1182, 2) matrix from ./mammal_test_vectors


<gensim.models.poincare.PoincareKeyedVectors at 0x12be8b550>

In [65]:
model.kv.distance('dog.n.01', 'cat.n.01')

4.7709195692607285

In [66]:
model.kv.distance('dog.n.01', 'german_shepherd.n.01')

6.5684571435202974

In [67]:
#model.kv.most_similar('electricity.n.01')

In [68]:
model.kv.most_similar('dog.n.01')

[('canine.n.02', 0.1399973566916916),
 ('carnivore.n.01', 0.8615759159866268),
 ('lapdog.n.01', 0.9673234516391055),
 ('hunting_dog.n.01', 0.9739280759356185),
 ('basenji.n.01', 1.2982963811506512),
 ('unguiculata.n.01', 1.4572044659051688),
 ('cynocephalus_variegatus.n.01', 1.5106376485197455),
 ('female_mammal.n.01', 1.5403693234649667),
 ('unguiculate.n.01', 1.5710320351710816),
 ('plantigrade_mammal.n.01', 1.5715445774974381)]

In [69]:
import pickle
from plotly.offline import init_notebook_mode, iplot
from gensim.viz.poincare import poincare_2d_visualization, poincare_distance_heatmap

init_notebook_mode(connected=True)

show_node_labels = [
    'mammal.n.01', 'placental.n.01', 'ungulate.n.01', 'carnivore.n.01', 'rodent.n.01',
    'canine.n.02', 'even-toed_ungulate.n.01', 'odd-toed_ungulate.n.01', 'elephant.n.01',
    'rhinoceros.n.01', 'german_shepherd.n.01', 'feline.n.01', 'tiger.n.02', 'homo_sapiens.n.01']

In [70]:
figure_title = """
<b>2-D Visualization of model trained on mammals subtree</b><br>
"""
iplot(poincare_2d_visualization(model, mammal_tree, figure_title, show_node_labels=show_node_labels))