<a href="https://colab.research.google.com/github/GoldPapaya/synonym-pathfinder/blob/main/project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install nltk
!python -m nltk.downloader wordnet

[nltk_data] Downloading package wordnet to /root/nltk_data...


In [2]:
import nltk
from nltk.corpus import wordnet as wn
import pandas as pd
import networkx as nx

Below is code to test adding nodes to a graph (to be used later).

In [3]:
class WordNetGraph:
    def __init__(self):
        self.graph = nx.Graph()

    def add_synset_node(self, synset):
        self.graph.add_node(synset)

In [4]:
graph = WordNetGraph()

synset1 = wn.synset('dog.n.01')
synset2 = wn.synset('bank.n.01')
graph.add_synset_node(synset1)
graph.add_synset_node(synset2)

print(f"Nodes in graph: {list(graph.graph.nodes())}")
print(f"Graph has {graph.graph.number_of_nodes()} nodes")

Nodes in graph: [Synset('dog.n.01'), Synset('bank.n.01')]
Graph has 2 nodes


Stuff below is for finding synonym candidates given a word and a depth.

In [None]:
def get_synset_synonyms(synset, depth):
    """
    Retrieve synonyms of a WordNet synset up to a specified depth.

    Args:
        synset: A WordNet synset object
        depth: Integer depth (0 for direct synonyms, 1 for synonyms of synonyms, etc.)

    Returns:
        Set of synonym lemma names
    """
    # Initialize set to store all synonyms
    all_synonyms = set()

    def collect_synonyms(current_synset, current_depth):
        # Add lemmas from current synset
        all_synonyms.update(lemma.name() for lemma in current_synset.lemmas())

        # Base case: if depth is reached, stop recursion
        if current_depth >= depth:
            return

        # Get synonyms of current synset's lemmas
        for lemma in current_synset.lemmas():
            # Get all synsets for this lemma's name
            for syn in wn.synsets(lemma.name()):
                # Recursively collect synonyms for each related synset
                collect_synonyms(syn, current_depth + 1)

    # Start the recursive collection
    collect_synonyms(synset, 0)

    # Remove underscores and return the set
    return {syn.replace('_', ' ') for syn in all_synonyms}


In [None]:
def get_synset_synsets(synset, depth):
    """
    Retrieve a list of unique synsets related to a WordNet synset up to a specified depth.

    Args:
        synset: A WordNet synset object
        depth: Integer depth (0 for the input synset, 1 for synsets of its lemmas, etc.)

    Returns:
        List of unique synset objects
    """
    # Initialize set to store unique synsets (to avoid duplicates)
    all_synsets = set()

    def collect_synsets(current_synset, current_depth):
        # Add the current synset
        all_synsets.add(current_synset)

        # Base case: if depth is reached, stop recursion
        if current_depth >= depth:
            return

        # Get synsets of current synset's lemmas
        for lemma in current_synset.lemmas():
            # Get all synsets for this lemma's name
            for syn in wn.synsets(lemma.name()):
                # Recursively collect synsets
                collect_synsets(syn, current_depth + 1)

    # Start the recursive collection
    collect_synsets(synset, 0)

    # Convert set to list and return
    return list(all_synsets)


In [None]:
def get_synset_synonyms_list(synset, depth):
    """
    Retrieve a list of lists, each containing a synset name and its synonyms, up to a specified depth.

    Args:
        synset: A WordNet synset object
        depth: Integer depth (0 for the input synset, 1 for synsets of its lemmas, etc.)

    Returns:
        List of lists, each in the form [synset_name, synonym1, synonym2, ...]
    """
    # Initialize set to store unique synsets (to avoid duplicates)
    all_synsets = set()

    def collect_synsets(current_synset, current_depth):
        # Add the current synset
        all_synsets.add(current_synset)

        # Base case: if depth is reached, stop recursion
        if current_depth >= depth:
            return

        # Get synsets of current synset's lemmas
        for lemma in current_synset.lemmas():
            # Get all synsets for this lemma's name
            for syn in wn.synsets(lemma.name()):
                # Recursively collect synsets
                collect_synsets(syn, current_depth + 1)

    # Start the recursive collection
    collect_synsets(synset, 0)

    # Convert to list of [synset_name, synonym1, synonym2, ...]
    result = [
        [synset.name()] + [lemma.name().replace('_', ' ') for lemma in synset.lemmas()]
        for synset in all_synsets
    ]

    return result

In [None]:
#Test stuff below

# Example synset (e.g., 'dog.n.01')
dog_synset = wn.synset('bank.n.01')

# Test depth 0
synonyms_0 = get_synset_synonyms(dog_synset, 0)
print(len(synonyms_0), "Depth 0 synonyms:", synonyms_0)

# Test depth 1
synonyms_1 = get_synset_synonyms(dog_synset, 1)
print(len(synonyms_1), "Depth 1 synonyms:", synonyms_1)

# Test depth 2
synonyms_2 = get_synset_synonyms(dog_synset, 2)
print(len(synonyms_2), "Depth 2 synonyms:", synonyms_2)

# Test depth 5
#synonyms_5 = get_synset_synonyms(dog_synset, 5)
#print(len(synonyms_5), "Depth 5 synonyms:", synonyms_5)

#RETURN SYNSETS
# Test depth 0
synsets_0 = get_synset_synsets(dog_synset, 0)
print("Depth 0 synsets:", [syn.name() for syn in synsets_0])

# Test depth 1
synsets_1 = get_synset_synsets(dog_synset, 1)
print(len(synsets_1), "Depth 1 synsets:", [syn.name() for syn in synsets_1])

# Test depth 2
synsets_2 = get_synset_synsets(dog_synset, 2)
print(len(synsets_2), "Depth 2 synsets:", [syn.name() for syn in synsets_2])

#RETURN SYNSETS/SYNONYMS
# Test depth 0
synsets_0 = get_synset_synonyms_list(dog_synset, 0)
print("Depth 0 synsets and synonyms:", synsets_0)

# Test depth 1
synsets_1 = get_synset_synonyms_list(dog_synset, 1)
print("Depth 1 synsets and synonyms:", synsets_1)

# Test depth 2
synsets_2 = get_synset_synonyms_list(dog_synset, 2)
print("Depth 2 synsets and synonyms:", synsets_2)

1 Depth 0 synonyms: {'bank'}
14 Depth 1 synonyms: {'depository financial institution', 'money box', 'savings bank', 'banking company', 'banking concern', 'swear', 'cant', 'bank', 'deposit', 'bank building', 'rely', 'coin bank', 'camber', 'trust'}
72 Depth 2 synonyms: {'money box', 'aver', 'believe', 'down payment', 'trustfulness', 'banking concern', 'swear', 'entrust', 'confide', 'tilt', 'depose', 'trustingness', 'jargon', 'reliance', 'confidence', 'stick', 'bank', 'intrust', 'cartel', 'buzzword', 'slang', 'savings bank', 'patois', 'situate', 'affirm', 'lingo', 'sediment', 'deposit', 'sedimentation', 'blaspheme', 'cant over', 'pitch', 'cuss', 'rely', 'commit', 'camber', 'cashbox', 'imprecate', 'repository', 'trust', 'slant', 'pious platitude', 'posit', 'faith', 'argot', 'hope', 'fix', 'verify', 'wedge', 'till', 'depository financial institution', 'curse', 'avow', 'vernacular', 'swan', 'alluviation', 'depository', 'combine', 'banking company', 'chamfer', 'cant', 'lodge', 'bevel', 'corpo

In [None]:
print(wn.synset('bank.n.07').definition())
print(wn.synset('bank.n.07').lemma_names())
print(wn.synsets('cant'))

a slope in the turn of a road or track; the outside is higher than the inside in order to reduce the effects of centrifugal force
['bank', 'cant', 'camber']
[Synset('buzzword.n.01'), Synset('bank.n.07'), Synset('slang.n.02'), Synset('cant.n.04'), Synset('bevel.n.01'), Synset('cant.v.01')]


In [5]:
def get_synset_neighbors(input_synset, depth):
    """
    Returns a dictionary of synsets and their lemma-associated synsets up to a specified depth.Args:
    input_synset (str): A synset name (e.g., 'trust.v.01')
    depth (int): The depth of neighbor exploration

    Returns:
        dict: Dictionary with synsets as keys and lists of lemma-associated synset names as values
    """
    # Initialize the result dictionary
    result = {}

    # Get the starting synset
    try:
        start_synset = wn.synset(input_synset)
    except:
        return f"Invalid synset: {input_synset}"

    # Set to keep track of synsets to process
    to_process = {start_synset}
    processed = set()

    # Process synsets up to the specified depth
    for _ in range(depth + 1):
        next_to_process = set()

        for synset in to_process:
            if synset not in processed:
                # Get lemmas for the current synset
                lemmas = synset.lemmas()
                # Get synsets associated with these lemmas' names
                neighbor_synsets = set()
                for lemma in lemmas:
                    lemma_name = lemma.name()
                    # Get all synsets for this lemma's word, restricted to same POS
                    for lemma_synset in wn.synsets(lemma_name, pos=synset.pos()):
                        neighbor_synsets.add(lemma_synset)

                # Add to result dictionary, excluding the synset itself
                result[synset.name()] = [n.name() for n in neighbor_synsets if n != synset]

                # Only add new synsets to process if depth allows further exploration
                if _ < depth:
                    next_to_process.update(neighbor_synsets - processed)

                processed.add(synset)

        to_process = next_to_process

    return result

In [17]:
test_synset = wn.synset('depository_financial_institution.n.01')

# Example call
result = get_synset_neighbors('depository_financial_institution.n.01', 3)
for synset, neighbors in result.items():
    print(f"{synset}: {neighbors}")
print('\n', result)

depository_financial_institution.n.01: ['bank.n.03', 'savings_bank.n.02', 'bank.n.05', 'bank.n.04', 'bank.n.01', 'bank.n.07', 'bank.n.10', 'bank.n.09', 'bank.n.06']
bank.n.03: ['savings_bank.n.02', 'bank.n.05', 'bank.n.04', 'depository_financial_institution.n.01', 'bank.n.01', 'bank.n.07', 'bank.n.10', 'bank.n.09', 'bank.n.06']
savings_bank.n.02: ['bank.n.03', 'depository_financial_institution.n.01', 'savings_bank.n.01', 'bank.n.01', 'bank.n.04', 'bank.n.05', 'bank.n.07', 'bank.n.10', 'bank.n.09', 'bank.n.06', 'cashbox.n.01']
bank.n.05: ['bank.n.03', 'savings_bank.n.02', 'bank.n.04', 'depository_financial_institution.n.01', 'bank.n.01', 'bank.n.07', 'bank.n.10', 'bank.n.09', 'bank.n.06']
bank.n.04: ['bank.n.03', 'savings_bank.n.02', 'bank.n.05', 'depository_financial_institution.n.01', 'bank.n.01', 'bank.n.07', 'bank.n.10', 'bank.n.09', 'bank.n.06']
bank.n.01: ['bank.n.03', 'savings_bank.n.02', 'bank.n.05', 'bank.n.04', 'depository_financial_institution.n.01', 'bank.n.07', 'bank.n.10',