<a href="https://colab.research.google.com/github/GoldPapaya/synonym-pathfinder/blob/main/project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
!pip install nltk
!python -m nltk.downloader wordnet

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [43]:
import nltk
from nltk.corpus import wordnet as wn
import pandas as pd
import networkx as nx

In [46]:
class WordNetGraph:
    def __init__(self):
        self.graph = nx.Graph()

    def add_synset_node(self, synset):
        self.graph.add_node(synset)

In [48]:
graph = WordNetGraph()

synset1 = wn.synset('dog.n.01')
synset2 = wn.synset('bank.n.01')
graph.add_synset_node(synset1)
graph.add_synset_node(synset2)

print(f"Nodes in graph: {list(graph.graph.nodes())}")
print(f"Graph has {graph.graph.number_of_nodes()} nodes")

Nodes in graph: [Synset('dog.n.01'), Synset('bank.n.01')]
Graph has 2 nodes


In [54]:
class WordNetGraph:
    def __init__(self):
        self.graph = nx.Graph()

    def add_synset_node(self, synset):
        self.graph.add_node(synset)

    def expand_from_word(self, word: str, max_degree: int):
        """
        Create a graph of all synsets within max_degree from a starting word.

        Args:
            word: Starting word (e.g., 'bank')
            max_degree: Maximum degree to expand (0 = just starting synsets)
        """
        # Clear existing graph
        self.graph.clear()

        # Get all synsets for the starting word
        starting_synsets = wn.synsets(word)
        if not starting_synsets:
            print(f"No synsets found for '{word}'")
            return

        print(f"Starting with {len(starting_synsets)} synset(s) for '{word}':")
        for synset in starting_synsets:
            print(f"  - {synset.name()}: {synset.definition()[:50]}...")
            self.add_synset_node(synset)

        # For degree 0, we're done
        if max_degree == 0:
            return

        # Get hypernyms/hyponyms relationships for traversal
        def neighbors(synset):
            print('hyper:', synset.hypernyms(), 'hypo:', synset.hyponyms())
            return list(synset.hypernyms()) + list(synset.hyponyms())

        # Expand degree by degree
        current_layer = set(starting_synsets)

        for degree in range(1, max_degree + 1):
            print(f"\nExpanding to degree {degree}...")
            next_layer = set()

            for synset in current_layer:
                new_neighbors = [n for n in neighbors(synset)
                               if n not in self.graph]
                for neighbor in new_neighbors:
                    self.add_synset_node(neighbor)
                    next_layer.add(neighbor)
                    print(f"  Added: {neighbor.name()}")

            current_layer = next_layer
            if not current_layer:
                print(f"No more nodes at degree {degree}")
                break

In [55]:
# Create graph
graph = WordNetGraph()

# Expand from 'bank' up to degree 2
#graph.expand_from_word('bank', max_degree=2)
#print(f"\nFinal graph:")
#print(f"Nodes in graph: {len(graph.graph.nodes())}")
#print(f"Sample nodes: {list(graph.graph.nodes())[:5]}")

# Expand from 'home' up to degree 2
graph.expand_from_word('tome', max_degree=2)
print(f"\nFinal graph:")
print(f"Nodes in graph: {len(graph.graph.nodes())}")
print(f"Sample nodes: {list(graph.graph.nodes())[:5]}")

Starting with 1 synset(s) for 'tome':
  - tome.n.01: a (usually) large and scholarly book...

Expanding to degree 1...
hyper: [Synset('book.n.01')] hypo: []
  Added: book.n.01

Expanding to degree 2...
hyper: [Synset('publication.n.01')] hypo: [Synset('prayer_book.n.01'), Synset('playbook.n.02'), Synset('curiosa.n.01'), Synset('pop-up_book.n.01'), Synset('bestiary.n.01'), Synset('reference_book.n.01'), Synset('booklet.n.01'), Synset('catechism.n.02'), Synset('trade_book.n.01'), Synset('yearbook.n.01'), Synset('formulary.n.01'), Synset('catalog.n.01'), Synset('review_copy.n.01'), Synset('workbook.n.01'), Synset('tome.n.01'), Synset('copybook.n.01'), Synset('songbook.n.01'), Synset('textbook.n.01'), Synset('authority.n.07'), Synset('phrase_book.n.01'), Synset('storybook.n.01'), Synset('appointment_book.n.01')]
  Added: publication.n.01
  Added: prayer_book.n.01
  Added: playbook.n.02
  Added: curiosa.n.01
  Added: pop-up_book.n.01
  Added: bestiary.n.01
  Added: reference_book.n.01
  Add