In [None]:
import nltk
from nltk.corpus import wordnet as wn

nltk.download("wordnet")

# Simple sanity check
dog_synsets = wn.synsets("dog")
dog_synsets


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ok\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


[Synset('dog.n.01'),
 Synset('frump.n.01'),
 Synset('dog.n.03'),
 Synset('cad.n.01'),
 Synset('frank.n.02'),
 Synset('pawl.n.01'),
 Synset('andiron.n.01'),
 Synset('chase.v.01')]

In [38]:
s = dog_synsets[0]
print("ID:", s.name())
print("POS:", s.pos())
print("Definition:", s.definition())
print("Examples:", s.examples())


ID: dog.n.01
POS: n
Definition: a member of the genus Canis (probably descended from the common wolf) that has been domesticated by man since prehistoric times; occurs in many breeds
Examples: ['the dog barked all night']


In [39]:
print("Hypernyms:", s.hypernyms())
print("Hyponyms (first 5):", s.hyponyms()[:5])
print("Similar-to:", s.similar_tos())


Hypernyms: [Synset('domestic_animal.n.01'), Synset('canine.n.02')]
Hyponyms (first 5): [Synset('corgi.n.01'), Synset('dalmatian.n.02'), Synset('cur.n.01'), Synset('pooch.n.01'), Synset('lapdog.n.01')]
Similar-to: []


In [44]:
cat_synsets = wn.synsets("cat")
cat_synsets


[Synset('cat.n.01'),
 Synset('guy.n.01'),
 Synset('cat.n.03'),
 Synset('kat.n.01'),
 Synset('cat-o'-nine-tails.n.01'),
 Synset('caterpillar.n.02'),
 Synset('big_cat.n.01'),
 Synset('computerized_tomography.n.01'),
 Synset('cat.v.01'),
 Synset('vomit.v.01')]

In [45]:
s = cat_synsets[0]
print("Hypernyms:", s.hypernyms())
print("Hyponyms (first 5):", s.hyponyms()[:5])
print("Similar-to:", s.similar_tos())


Hypernyms: [Synset('feline.n.01')]
Hyponyms (first 5): [Synset('wildcat.n.03'), Synset('domestic_cat.n.01')]
Similar-to: []


In [None]:
from neo4j import GraphDatabase

URI = "neo4j://127.0.0.1:7687" 
USER = "neo4j"
PASSWORD = "wordnet-similarity1"  

driver = GraphDatabase.driver(URI, auth=(USER, PASSWORD))

with driver.session() as session:
    result = session.run("RETURN 'Connected!' AS msg").single()
    print(result["msg"])


Connected!


In [55]:
with driver.session() as session:
    session.run("""
        MERGE (n:Synset {id: $id})
        SET n.definition = $definition,
            n.pos = $pos
    """, id=s.name(), definition=s.definition(), pos=s.pos())


In [56]:
hyper = s.hypernyms()[0]

# create hypernym node + relation
with driver.session() as session:
    session.run("""
        MERGE (h:Synset {id: $h})
        MERGE (c:Synset {id: $c})
        MERGE (h)-[:HYPERNYM_OF]->(c)
    """, h=hyper.name(), c=s.name())


In [7]:
from neo4j import GraphDatabase
from nltk.corpus import wordnet as wn


In [8]:
def get_synset_ids(word, pos="n"):
    """Return WordNet synset IDs (e.g., 'dog.n.01') for a word."""
    return [s.name() for s in wn.synsets(word, pos=pos)]


def shortest_path_length(id1, id2, max_len=10):
    
    query = """
    MATCH (a:Synset {id:$id1}), (b:Synset {id:$id2}),
          p = shortestPath((a)-[:HYPERNYM_OF|HYPONYM_OF|SIMILAR_TO*..10]-(b))
    RETURN length(p) AS d
    """
    with driver.session() as session:
        res = session.run(query, id1=id1, id2=id2).single()
        if res and res["d"] is not None:
            return res["d"]
    return None


def path_similarity(word1, word2, pos="n"):
    """Path-based similarity between two words using the Neo4j graph."""
    ids1 = get_synset_ids(word1, pos)
    ids2 = get_synset_ids(word2, pos)

    if not ids1 or not ids2:
        return None

    best_sim = 0.0
    best_pair = None

    for s1 in ids1:
        for s2 in ids2:
            d = shortest_path_length(s1, s2)
            if d is not None:
                sim = 1.0 / (1.0 + d)
                if sim > best_sim:
                    best_sim = sim
                    best_pair = (s1, s2)

    if best_pair is None:
        return None
    return best_sim, best_pair


In [9]:

print(get_synset_ids("dog"))
print(get_synset_ids("cat"))


['dog.n.01', 'frump.n.01', 'dog.n.03', 'cad.n.01', 'frank.n.02', 'pawl.n.01', 'andiron.n.01']
['cat.n.01', 'guy.n.01', 'cat.n.03', 'kat.n.01', "cat-o'-nine-tails.n.01", 'caterpillar.n.02', 'big_cat.n.01', 'computerized_tomography.n.01']
