<a href="https://colab.research.google.com/github/IsaacFigNewton/SMIED/blob/main/BFS_Semantic_Decomposition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Config

## Install dependencies

In [5]:
!pip install gensim

Collecting gensim
  Downloading gensim-4.3.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Collecting numpy<2.0,>=1.18.5 (from gensim)
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting scipy<1.14.0,>=1.7.0 (from gensim)
  Downloading scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Downloading gensim-4.3.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (26.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.6/26.6 MB[0m [31m74.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.0 MB)
[2K   [90m━━━━━━━━━━━

## Import, config stuff

In [33]:
"""
Semantic decomposition of ("cat", "eats", "mouse") using WordNet + spaCy + depth-limited GBFS.
- Uses spaCy to parse verb synset glosses and detect subject/object dependencies.
- If both subject and object tokens are present, branches directly toward original triple synsets.
- Otherwise falls back to WordNet relations.
"""
from typing import Tuple, List, Dict, Optional
import nltk
import spacy
from nltk.corpus import wordnet as wn
from heapq import heappush, heappop
import numpy as np
from nltk.corpus import wordnet as wn
from sklearn.metrics.pairwise import cosine_similarity
import gensim.downloader as api
from collections import deque
import nltk
from nltk.corpus import wordnet as wn
import spacy

In [2]:
nltk.download('wordnet')

# Load spaCy English model for dependency parsing
nlp = spacy.load("en_core_web_sm")

# Download required NLTK data (run once)
nltk.download('wordnet')
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


True

In [3]:
word2vec_model = api.load('word2vec-google-news-300')



# Helpers

In [54]:
def get_all_neighbors(synset: wn.synset):
    """Get all neighbors of a synset based on its POS."""
    neighbors = []

    # Add hypernyms and hyponyms
    neighbors.extend(synset.hypernyms())
    neighbors.extend(synset.hyponyms())

    # Add POS-specific neighbors
    if synset.pos() == 'n':
        neighbors.extend(get_noun_neighbors(synset))
    else:
        neighbors.extend(get_verb_neighbors(synset))

    return neighbors


def get_noun_neighbors(syn: wn.synset):
    """Get neighbors for a noun synset."""
    nbrs = set()
    nbrs.update(syn.part_meronyms())
    nbrs.update(syn.substance_meronyms())
    nbrs.update(syn.member_meronyms())
    nbrs.update(syn.part_holonyms())
    nbrs.update(syn.substance_holonyms())
    nbrs.update(syn.member_holonyms())
    return list(nbrs)


def get_verb_neighbors(syn: wn.synset):
    """Get neighbors for a verb synset."""
    nbrs = set()
    nbrs.update(syn.entailments())
    nbrs.update(syn.causes())
    nbrs.update(syn.also_sees())
    nbrs.update(syn.verb_groups())
    return list(nbrs)

In [61]:
# derived forms of injury
wn.synset("injury.n.01").derived()

AttributeError: 'Synset' object has no attribute 'derived'

# Embedding similarities

In [39]:
def get_synset_embedding_centroid(synset:wn.synset, model=word2vec_model):
    """
    Get the centroid (mean) of Word2Vec embeddings for all lemmas in a synset.

    Args:
        synset: WordNet Synset object (e.g., 'dog.n.01')
        model: Loaded Word2Vec model

    Returns:
        numpy array representing the centroid, or None if no lemmas found
    """
    try:
        # Get all lemma names from the synset
        lemmas = [lemma.name().lower().replace('_', ' ') for lemma in synset.lemmas()]
        # Collect embeddings for lemmas that exist in the model
        embeddings = []
        found_lemmas = []

        for lemma in lemmas:
            # Try the lemma as-is first
            if lemma in model:
                embeddings.append(model[lemma])
                found_lemmas.append(lemma)
            # Try with underscores replaced by spaces (for multi-word terms)
            elif lemma.replace(' ', '_') in model:
                embeddings.append(model[lemma.replace(' ', '_')])
                found_lemmas.append(lemma)
            # Try individual words if it's a multi-word term
            elif ' ' in lemma:
                words = lemma.split()
                word_embeddings = []
                for word in words:
                    if word in model:
                        word_embeddings.append(model[word])
                if word_embeddings:
                    # Average the embeddings of individual words
                    embeddings.append(np.mean(word_embeddings, axis=0))
                    found_lemmas.append(lemma)

        if not embeddings:
            print(f"Warning: No lemmas from {synset.name()} found in Word2Vec model")
            print(f"  Attempted lemmas: {lemmas}")
            return None

        # print(f"Synset {synset.name()}: Found {len(found_lemmas)}/{len(lemmas)} lemmas in model")
        # print(f"  Found: {found_lemmas}")

        # Return the mean of all embeddings
        return np.mean(embeddings, axis=0)

    except Exception as e:
        print(f"Error processing synset {synset.name()}: {e}")
        return None


def emb_asymmetric_lex_rels(synset: wn.synset, model):
    """
    match the asymmetric lexical relations between two synsets.
    eg: meronyms-holonyms, hypernyms-hyponyms, etc.

    Args:
        synset: WordNet Synset object (e.g., 'dog.n.01')

    Returns:
        Dict of embeddings for asymmetric lexical relations of the synset.
    """
    def _rel_centroid(get_attr):
      return np.mean([
          get_synset_embedding_centroid(s)
          for s in get_attr(synset)
      ])

    return {
        "meronyms": {
            "part_meronyms": _rel_centroid(lambda x: x.part_meronyms()),
            "substance_meronyms": _rel_centroid(lambda x: x.substance_meronyms()),
            "member_meronyms": _rel_centroid(lambda x: x.member_meronyms()),
        },
        "holonyms": {
            "part_holonyms": _rel_centroid(lambda x: x.part_holonyms()),
            "substance_holonyms": _rel_centroid(lambda x: x.substance_holonyms()),
            "member_holonyms": _rel_centroid(lambda x: x.member_holonyms()),
        },
        "hypernyms": _rel_centroid(lambda x: x.hypernyms()),
        "hyponyms": _rel_centroid(lambda x: x.hyponyms()),
        "entailments": _rel_centroid(lambda x: x.entailments()),
        "causes": _rel_centroid(lambda x: x.causes()),
        "also_sees": _rel_centroid(lambda x: x.also_sees()),
        "verb_groups": _rel_centroid(lambda x: x.verb_groups()),
    }


pairing_maps = [
    {
        "meronyms": {
            "part_meronyms": "part_holonym",
            "substance_meronyms": "substance_holonym",
            "member_meronyms": "member_holonym",
        },
        "hyponyms": "hypernym",
        # "entailments": "entailments",
        # "causes": "causes",
        # "also_sees": "also_sees",
        # "verb_groups": "verb_groups",
    },
    {
        "holonyms": {
            "part_holonyms": "part_meronym",
            "substance_holonyms": "substance_meronym",
            "member_holonyms": "member_meronym",
        },
        "hypernyms": "hyponym",
        # "entailments": "entailments",
        # "causes": "causes",
        # "also_sees": "also_sees",
        # "verb_groups": "verb_groups",
    }
]


def get_asymmetric_emb_similarity(emb1, emb2, model=word2vec_model):

    def get_asym_sim(i, s1, s2, k, l=None):
        if l is None:
            return cosine_similarity(s1[k], s2[pairing_maps[i][k]])
        else:
            return cosine_similarity(s1[k][l], s2[pairing_maps[i][k][l]])

    # check similarity of asymmetric relations
    #   i.e. similarity of synset1's merynyms to synset2's holonyms
    asymm_overlap_centroids = list()
    # need to aggregate similarities seperately to avoid destructive interference
    #   if everything was aggregated together, similarity between hypernyms
    #   might be cancelled by dissimilarity between hypernyms and hyponyms
    # get centroid for if emb1 hyponyms/meronyms are ~ emb2 hypernyms/holonyms
    for k in pairing_maps[0].keys():
        if isinstance(pairing_maps[k], str):
          asymm_overlap_centroids.append(get_asym_sim(0, emb1, emb2, k))
        else:
          for l in emb1[k].keys():
            asymm_overlap_centroids.append(get_asym_sim(0, emb1, emb2, k, l))
    centroid_high_low = np.mean(asymm_overlap_centroids)
    # get centroid for if emb1 hypernyms/holonyms are ~ emb2 hyponyms/meronyms
    for k in pairing_maps[1].keys():
        if isinstance(pairing_maps[k], str):
          asymm_overlap_centroids.append(get_asym_sim(1, emb1, emb2, k))
        else:
          for l in emb1[k].keys():
            asymm_overlap_centroids.append(get_asym_sim(1, emb1, emb2, k, l))
    centroid_low_high = np.mean(asymm_overlap_centroids)

    # it doesn't matter which one is contains/includes/etc. the other,
    #   as long as they're closer than antonyms or unrelated terms
    #   i.e. a good hyponym-hypernym pair is just as important
    #         as a good hypernym-hyponym pair
    return max(centroid_high_low, centroid_low_high)


def get_synset_similarity(
      synset1:wn.synset,
      synset2:wn.synset,
      model=word2vec_model
    ):
    """
    Compute cosine distance between centroids of two synsets.

    Args:
        synset1: WordNet Synset object (e.g., 'dog.n.01')
        synset2: WordNet Synset object (e.g., 'cat.n.01')
        model: Word2Vec model (if None, will load default)

    Returns:
        Float cosine distance between centroids (0 = identical, 1 = orthogonal, 2 = opposite)
    """
    # Get centroids for both synsets
    synset1_centroid = get_synset_embedding_centroid(synset1, model)
    synset1_asym_rel_embs = emb_asymmetric_lex_rels(synset1, model)
    synset2_centroid = get_synset_embedding_centroid(synset2, model)
    synset2_asym_rel_embs = emb_asymmetric_lex_rels(synset2, model)

    # Check if both centroids were successfully computed
    if synset1_centroid is None or synset2_centroid is None:
        raise ValueError("Could not compute centroids for one or both synsets")

    branch_1_2 = get_asymmetric_emb_similarity(synset1_asym_rel_embs, synset2_asym_rel_embs, model)
    branch_1_2 = get_asymmetric_emb_similarity(synset1_asym_rel_embs, synset2_asym_rel_embs, model)

    # Compute cosine distance
    distance = cosine_similarity([emb1], [emb1])

    return distance


def get_k_closest_synset_pairs(
    start_synsets: List[wn.synset],
    end_synsets: List[wn.synset],
    model=word2vec_model
  ):
  # pair opposing pairs
  pairs = list()
  for s1 in start_synsets:
    for s2 in end_synsets:
      pairs.append((s1.name(), s2.name(), get_synset_similarity(s1, s2)))

  # get the top k most semantically similar pairings
  return sorted(pairs, key=lambda x: x[2], reverse=True)[:k]

# Pathing

In [52]:
# ============================================================================
# Core Path Finding Functions
# ============================================================================

def path_syn_to_syn(start_synset, end_synset, max_depth=6):
    """
    Find shortest path between synsets of the same POS using bidirectional BFS.
    Returns a list of synsets forming the path, or None if no path found.
    """

    if not (start_synset.pos() == end_synset.pos() and start_synset.pos() in {'n', 'v'}):
      raise ValueError(f"{start_synset.name()} POS tag != {end_synset.name()}. Synsets must be of the same POS (noun or verb).")

    # Handle the trivial case where start and end are the same
    if start_synset.name() == end_synset.name():
        return [start_synset]

    # Initialize two search frontiers
    forward_queue = deque([(start_synset, 0)])
    forward_visited = {start_synset.name(): [start_synset]}

    backward_queue = deque([(end_synset, 0)])
    backward_visited = {end_synset.name(): [end_synset]}

    def expand_frontier(queue, visited_from_this_side, visited_from_other_side, is_forward):
        """Expand one step of the search frontier."""
        if not queue:
            return None

        curr_synset, depth = queue.popleft()

        if depth >= (max_depth + 1) // 2:
            return None

        path_to_current = visited_from_this_side[curr_synset.name()]

        for neighbor in get_all_neighbors(curr_synset):
            neighbor_name = neighbor.name()

            if neighbor_name in visited_from_this_side:
                continue

            if is_forward:
                new_path = path_to_current + [neighbor]
            else:
                new_path = [neighbor] + path_to_current

            if neighbor_name in visited_from_other_side:
                other_path = visited_from_other_side[neighbor_name]

                if is_forward:
                    full_path = path_to_current + other_path
                else:
                    full_path = other_path + path_to_current

                return full_path

            visited_from_this_side[neighbor_name] = new_path
            queue.append((neighbor, depth + 1))

        return None

    # Alternate between forward and backward search
    while forward_queue or backward_queue:
        if forward_queue:
            result = expand_frontier(forward_queue, forward_visited, backward_visited, True)
            if result:
                return result

        if backward_queue:
            result = expand_frontier(backward_queue, backward_visited, forward_visited, False)
            if result:
                return result

    return None


# ============================================================================
# Gloss Analysis Helper Functions
# ============================================================================

def extract_subjects_from_gloss(gloss_doc):
    """Extract subject tokens from a parsed gloss."""
    subjects = []

    # Direct subjects
    subjects.extend([tok for tok in gloss_doc if tok.dep_ == "nsubj"])

    # Passive subjects (which are actually objects semantically)
    # Skip these for actor identification
    passive_subjects = [tok for tok in gloss_doc if tok.dep_ == "nsubjpass"]

    # Filter out passive subjects from the main list
    subjects = [s for s in subjects if s not in passive_subjects]

    return subjects, passive_subjects


def extract_objects_from_gloss(gloss_doc):
    """Extract various types of object tokens from a parsed gloss."""
    objs = []

    # Indirect objects
    iobjs = [tok for tok in gloss_doc if tok.dep_ == "iobj"]
    objs.extend(iobjs)

    # Direct objects
    # Only include if there were no indirect objects,
    #   crude, but good for MVP
    if not iobjs:
        objs.extend([tok for tok in gloss_doc if tok.dep_ == "dobj"])

    # Prepositional objects
    objs.extend([tok for tok in gloss_doc if tok.dep_ == "pobj"])

    # General objects
    objs.extend([tok for tok in gloss_doc if tok.dep_ == "obj"])

    # Check for noun chunks related to root verb
    root_verbs = [tok for tok in gloss_doc if tok.dep_ == "ROOT" and tok.pos_ == "VERB"]
    if root_verbs and not objs:
        for noun_chunk in gloss_doc.noun_chunks:
            if any(token.head == root_verbs[0] for token in noun_chunk):
                objs.append(noun_chunk.root)

    return objs


def extract_verbs_from_gloss(gloss_doc, include_passive=False):
    """Extract verb tokens from a parsed gloss."""
    verbs = [tok for tok in gloss_doc if tok.pos_ == "VERB"]

    if include_passive:
        # Past participles used as adjectives or in relative clauses
        passive_verbs = [tok for tok in gloss_doc if
                        tok.tag_ in ["VBN", "VBD"] and
                        tok.dep_ in ["acl", "relcl", "amod"]]
        verbs.extend(passive_verbs)

    return verbs


def find_instrumental_verbs(gloss_doc):
    """Find verbs associated with instrumental use (e.g., 'used for')."""
    instrumental_verbs = []

    if "used" in gloss_doc.text.lower():
        for i, token in enumerate(gloss_doc):
            if token.text.lower() == "used":
                # Check tokens after "used"
                for j in range(i+1, min(i+4, len(gloss_doc))):
                    if gloss_doc[j].pos_ == "VERB":
                        instrumental_verbs.append(gloss_doc[j])

    return instrumental_verbs


# ============================================================================
# Cross-POS Path Finding Functions
# ============================================================================
def get_most_similar_synsets(
      candidates: List[spacy.tokens.Token],
      target_synset: wn.synset,
      k=3
    ) -> List[Tuple[str, float]]:
    """
    Given a list of candidate tokens and a target synset,
    return the synset (name, similarity) most similar to the target.
    """
    for token in candidates:
        synsets = wn.synsets(token.text, pos=target_synset.pos())
        if synsets:
            # if the target is a verb,
            #   filter out any synsets with no lemma frames matching the target
            #   frame patterns: (Somebody [v] something), (Somebody [v]), ...
            if target_synset.pos() == 'v':
                synsets = [
                    s for s in synsets
                    if any(
                        frame in s.frame_ids()
                        for frame in target_synset.frame_ids()
                    )
                ]
            return sorted(
                [
                    (s, get_synset_similarity(s, target_synset))
                    for s in synsets
                ],
                key=lambda x: x[1],
                reverse=True
            )[:k]
    return None


def find_subject_to_predicate_path(
      subject_synset: wn.synset,
      predicate_synset: wn.synset,
      max_depth=6
    ):
    """Find path from subject (noun) to predicate (verb)."""
    paths = []
    print()
    print(f"Finding path from {subject_synset.name()} to {predicate_synset.name()}")

    # Strategy 1: Look for active subjects in verb's gloss
    pred_gloss_doc = nlp(predicate_synset.definition())
    # passive subjects are semantically equivalent to objects
    active_subjects, _ = extract_subjects_from_gloss(pred_gloss_doc)
    # filter to subjects based on whether they reside in the same sub-category
    filtered_subjects = [
        s for s in active_subjects
        if s.root_hypernyms() != s.lowest_common_hypernyms(subject_synset)
    ]
    # of the remaining subjects, get the most similar
    top_k = get_most_similar_synsets(active_subjects[:3], subject_synset)
    if top_k:
      print(f"Found best matches for {subject_synset.name()}: {top_k} using strategy 1")
      for matched_synset, _ in top_k:
        path = path_syn_to_syn(subject_synset, matched_synset, max_depth) + [predicate_synset]
        if path:
            paths.append(path)

    # Strategy 2: Look for verbs in the noun's gloss
    subj_gloss_doc = nlp(subject_synset.definition())
    verbs = extract_verbs_from_gloss(subj_gloss_doc, include_passive=False)
    # filter to subjects based on whether they reside in the same sub-category
    filtered_subjects = [
        s for s in verbs
        if s.root_hypernyms() != s.lowest_common_hypernyms(predicate_synset)
    ]
    # of the remaining subjects, get the most similar
    top_k = get_most_similar_synsets(verbs[:3], predicate_synset)
    if top_k:
      print(f"Found best matches for {predicate_synset.name()}: {top_k} using strategy 2")
      for matched_synset, _ in top_k:
        path = [subject_synset] + path_syn_to_syn(matched_synset, predicate_synset, max_depth)
        if path:
            paths.append(path)

    # Strategy 3: Explore all lexical neighbors
    subject_neighbors = get_all_neighbors(subject_synset)
    predicate_neighbors = get_all_neighbors(predicate_synset)


    # Return shortest path if any found
    return min(paths, key=len) if paths else None


def find_predicate_to_object_path(
      predicate_synset: wn.synset,
      object_synset: wn.synset,
      max_depth=6
    ):
    """Find path from predicate (verb) to object (noun)."""
    paths = []
    print()
    print(f"Finding path from {predicate_synset.name()} to {object_synset.name()}")

    # === Strategy 1: Objects in predicate gloss (incl. passive subjects) ===
    pred_gloss_doc = nlp(predicate_synset.definition())
    objects = extract_objects_from_gloss(pred_gloss_doc)
    _, passive_subjects = extract_subjects_from_gloss(pred_gloss_doc)
    objects.extend(passive_subjects)
    # filter to subjects based on whether they reside in the same sub-category
    objects = [
        s for s in objects
        if s.root_hypernyms() != s.lowest_common_hypernyms(object_synset)
    ]
    top_k = get_most_similar_synsets(objects[:3], object_synset)
    if top_k:
      print(f"Found best matches for {object_synset.name()}: {top_k} using strategy 1")
      for matched_synset, _ in top_k:
        path = [predicate_synset] + path_syn_to_syn(matched_synset, object_synset, max_depth)
        if path:
            paths.append(path)

    # === Strategy 2: Verbs in object's gloss ===
    obj_gloss_doc = nlp(object_synset.definition())
    verbs = extract_verbs_from_gloss(obj_gloss_doc, include_passive=True)
    # Use instrumental verbs in object's gloss as backup
    verbs.extend(find_instrumental_verbs(obj_gloss_doc))
    # filter to subjects based on whether they reside in the same sub-category
    verbs = [
        s for s in verbs
        if s.root_hypernyms() != s.lowest_common_hypernyms(predicate_synset)
    ]
    top_k = get_most_similar_synsets(verbs[:3], predicate_synset)
    if top_k:
      print(f"Found best matches for {predicate_synset.name()}: {top_k} using strategy 2")
      for matched_synset, _ in top_k:
        path = path_syn_to_syn(predicate_synset, matched_synset, max_depth) + [object_synset]
        if path:
            paths.append(path)

    # Strategy 3: Explore all lexical neighbors
    predicate_neighbors = get_all_neighbors(predicate_synset)
    object_neighbors = get_all_neighbors(object_synset)


    # Return shortest path if any found
    return min(paths, key=len) if paths else None


# ============================================================================
# Main Connected Path Finding Function
# ============================================================================

def find_connected_shortest_paths(subject_word, predicate_word, object_word, max_depth=10):
    """
    Find shortest connected paths from subject through predicate to object.
    Ensures that the same predicate synset connects both paths.
    """

    # Get synsets for each word
    subject_synsets = wn.synsets(subject_word, pos=wn.NOUN)
    predicate_synsets = wn.synsets(predicate_word, pos=wn.VERB)
    object_synsets = wn.synsets(object_word, pos=wn.NOUN)

    best_combined_path_length = float('inf')
    best_subject_path = None
    best_object_path = None
    best_predicate = None

    # Try each predicate synset as the connector
    for pred in predicate_synsets:
        # Find paths from all subjects to this specific predicate
        subject_paths = []
        for subj in subject_synsets:
            path = find_subject_to_predicate_path(subj, pred, max_depth)
            if path:
                subject_paths.append(path)

        # Find paths from this specific predicate to all objects
        object_paths = []
        for obj in object_synsets:
            path = find_predicate_to_object_path(pred, obj, max_depth)
            if path:
                object_paths.append(path)

        # If we have both paths through this predicate, check if it's the best
        if subject_paths and object_paths:
            shortest_subj_path = min(subject_paths, key=len)
            shortest_obj_path = min(object_paths, key=len)

            # Calculate combined length (subtract 1 to avoid counting predicate twice)
            combined_length = len(shortest_subj_path) + len(shortest_obj_path) - 1

            if combined_length < best_combined_path_length:
                best_combined_path_length = combined_length
                best_subject_path = shortest_subj_path
                best_object_path = shortest_obj_path
                best_predicate = pred

    return best_subject_path, best_object_path, best_predicate


# ============================================================================
# Display Functions
# ============================================================================

def show_path(label, path):
    """Pretty print a path of synsets."""
    if path:
        print(f"{label}:")
        print(" -> ".join(f"{s.name()} ({s.definition()})" for s in path))
        print(f"Path length: {len(path)}")
        print()
    else:
        print(f"{label}: No path found")
        print()


def show_connected_paths(subject_path, object_path, predicate):
    """Display the connected paths with their shared predicate."""
    if subject_path and object_path and predicate:
        print("=" * 70)
        print(f"CONNECTED PATH through predicate: {predicate.name()}")
        print("=" * 70)

        show_path("Subject -> Predicate path", subject_path)
        show_path("Predicate -> Object path", object_path)

        # Show the complete connected path
        complete_path = subject_path + object_path[1:]  # Avoid duplicating the predicate
        print("Complete connected path:")
        print(" -> ".join(f"{s.name()}" for s in complete_path))
        print(f"Total path length: {len(complete_path)}")
        print()
    else:
        print("No connected path found through any predicate synset.")


# Testing

In [53]:
nlp = spacy.load("en_core_web_sm")

# Find shortest connected paths
subject_path, object_path, connecting_predicate = find_connected_shortest_paths(
    "burglar", "shoot", "woman", max_depth=10
)

# Display results
show_connected_paths(subject_path, object_path, connecting_predicate)


Finding path from burglar.n.01 to shoot.v.01
Found best matches for shoot.v.01: [(Synset('record.v.01'), array([[0.34882617]], dtype=float32)), (Synset('enter.v.01'), array([[0.33030206]], dtype=float32)), (Synset('figure.v.02'), array([[0.2842958]], dtype=float32))] using strategy 2

Finding path from shoot.v.01 to woman.n.01
Found best matches for woman.n.01: [(Synset('projectile.n.01'), array([[0.1060816]], dtype=float32)), (Synset('missile.n.01'), array([[0.07454426]], dtype=float32))] using strategy 1
Found best matches for shoot.v.01: [(Synset('pit.v.01'), array([[0.41661736]], dtype=float32)), (Synset('fight.v.02'), array([[0.35470244]], dtype=float32)), (Synset('react.v.02'), array([[0.28721482]], dtype=float32))] using strategy 2

Finding path from shoot.v.01 to woman.n.02
Found best matches for woman.n.02: [(Synset('projectile.n.01'), array([[0.12297723]], dtype=float32)), (Synset('missile.n.01'), array([[0.11070415]], dtype=float32))] using strategy 1
Found best matches for

TypeError: can only concatenate list (not "NoneType") to list