<a href="https://colab.research.google.com/github/IsaacFigNewton/SMIED/blob/main/Experiments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
from typing import List, Any, Dict, Tuple
import nltk
nltk.download('framenet_v17')
nltk.download('wordnet')
from nltk.corpus import framenet as fn
from nltk.corpus import wordnet as wn

[nltk_data] Downloading package framenet_v17 to /root/nltk_data...
[nltk_data]   Unzipping corpora/framenet_v17.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# Get overlapping hypernym paths

In [3]:
def overlapping_hypernym_paths(syn1, syn2) -> List[Any]:
    lchs = syn1.lowest_common_hypernyms(syn2)
    print("LCHs:", [lch.name() for lch in lchs])
    common_paths = []

    for p1 in syn1.hypernym_paths():
        for p2 in syn2.hypernym_paths():
            if any(lch in p1 for lch in lchs) and any(lch in p2 for lch in lchs):
              # truncate the paths until they've got one of the lchs
              while p1 and p2 and p1[0] not in lchs:
                  last_lch = p1[0]
                  p1 = p1[1:]
                  p2 = p2[1:]
              # get the shared lch path
              common_paths.append(p1[::-1] + p2[1:])

    return common_paths

In [4]:
# Example:
cat = wn.synset('cat.n.01')
dog = wn.synset('dog.n.01')
print()
overlaps = overlapping_hypernym_paths(cat, dog)
for path in overlaps:
    print(" → ".join(s.name() for s in path))
print()
overlaps = overlapping_hypernym_paths(dog, cat)
for path in overlaps:
    print(" → ".join(s.name() for s in path))


LCHs: ['carnivore.n.01']
cat.n.01 → feline.n.01 → carnivore.n.01 → canine.n.02 → dog.n.01

LCHs: ['carnivore.n.01']
dog.n.01 → canine.n.02 → carnivore.n.01 → feline.n.01 → cat.n.01


# Get frame info from lemmas

In [19]:
import spacy
from spacy.tokens import Token, Doc
from spacy import displacy
nlp = spacy.load("en_core_web_sm")

## Get a dict of all dependency schemas matching a synset's frames/usage

In [22]:
def _get_f_id_arg_struct_doc_dict(syn: wn.synset):
      syn_frame_ids_strs: Dict[int, Doc] = dict()
      for lemma in syn.lemmas():

          # get all FrameNet frame IDs for this lemma
          for i, f_id in enumerate(lemma.frame_ids()):

              # get the argument structure for this frame as a string
              f_str = lemma.frame_strings()[i]
              # parse f_str into an argument structure vector
              #   use tuple to make it hashable
              f_arg_structure = f_str.split(' ')

              # remove any extra arguments beyond subject, object, theme
              if f_arg_structure[-1] != f_arg_structure[-1].lower():
                  f_arg_structure = f_arg_structure[:-1]

              # create a spacy doc for the argument structure template
              syn_frame_ids_strs[f_id] = nlp(' '.join(f_arg_structure))

      return syn_frame_ids_strs

In [25]:
f_id_docs = _get_f_id_arg_struct_doc_dict(wn.synset('spin.v.01'))
for f_id, doc in f_id_docs.items():
      displacy.render(doc, style='dep')

## Get candidate frames that match the dependency schemes

In [None]:
# TODO: retain original arg structure for better SRL
def _flattened_fn_arg_schema(doc: Doc):
      arg_schema = dict()

In [None]:
def _get_candidate_frames(pred_tok: spacy.tokens.Token):
      # get candidate WordNet synsets for the predicate (verbs)
      pred_lemma = pred_tok.lemma_.lower()
      # get a dict of synset names and synset objects for quick lookup
      pred_synsets = wn.synsets(pred_lemma, pos=wn.VERB)
      pred_synsets = {syn.name(): syn for syn in pred_synsets}
      # create a dict of synset names and lists of their possible frames,
      #   indexed by argument structure (num args of each type)
      pred_frames: Dict[str, Dict[
          Tuple[int, int, int],
          List[Any]
      ]] = dict()

      # get all FrameNet frames associated with this synset
      for s_name, syn in pred_synsets.items():

          syn_frame_ids_docs = _get_f_id_arg_struct_doc_dict(syn)

          # add all frames to the dict, indexed by argument structure
          #   (num subjects, num objects, num themes)
          for f_id, f_str in syn_frame_ids_docs.items():

              frame = fn.frame_by_id(f_id)
              # get a flat
              arg_schema_reqs = {

              }
              pred_frames[s_name]= {
                  (, [])

                  if fn.frame_by_id(i) is not None
              }

In [17]:
for lemma in wn.synset('spin.v.01').lemmas():
  print(lemma, lemma.frame_ids())
  print("\n".join(lemma.frame_strings()))

Lemma('spin.v.01.spin') [4, 8, 22]
Something is spining PP
Somebody spin something
Somebody spin PP
Lemma('spin.v.01.spin_around') [4, 8, 22]
Something is spin_arounding PP
Somebody spin_around something
Somebody spin_around PP
Lemma('spin.v.01.whirl') [4, 8, 22]
Something is whirling PP
Somebody whirl something
Somebody whirl PP
Lemma('spin.v.01.reel') [4, 8, 22]
Something is reeling PP
Somebody reel something
Somebody reel PP
Lemma('spin.v.01.gyrate') [4, 8, 22]
Something is gyrateing PP
Somebody gyrate something
Somebody gyrate PP
