In [6]:
%time
from spacy.tokens import Token
from spacy.symbols import NOUN, PROPN, PRON, ADJ, ADV, ADP, VERB

def get_left_edge(word):

    # print(word.left_edge, ':', word, '=>', word.doc[word.left_edge.i : word.i + 1])
    nouns = [NOUN, PROPN, PRON]
    if word.pos not in nouns:
        return word.i
    if word.left_edge.pos == PRON:
        return word.i
    return word.left_edge.i

def get_right_edge(word):

    token = next(word.rights, False)
    if token and token.pos in (ADJ, ADV):
        return token.i
    return word.i

Token.set_extension("get_left_edge", method=get_left_edge, force=True)
Token.set_extension("get_right_edge", method=get_right_edge, force=True)
print(Token.has_extension("get_left_edge"))
print(Token.has_extension("get_right_edge"))

CPU times: user 1 µs, sys: 1 µs, total: 2 µs
Wall time: 4.05 µs
True
True


In [23]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import spacy
from dependency_relations import CustomDependencyMatcher
from spacy.matcher import DependencyMatcher
from dependency_objects import _subject, _subject_passive, _object, _prepositional_object, coordination_deps, _adposition

nlp = spacy.load("en_core_web_sm")

NOUN = {"POS": {"IN": ["NOUN", "PROPN", "PRON"]}}
VERB = {"POS": {"IN": ["VERB", "AUX"]}}
CLAUSE = {"MORPH": {"INTERSECTS": ["Tense=Past"]}}

patterns = [
        {
        "pattern_name": "NarrativeClause",
        "pattern":
            [
                {"RIGHT_ID": "PREDICATE", "RIGHT_ATTRS": CLAUSE},
                {"LEFT_ID": "PREDICATE", "REL_OP": ">", "RIGHT_ID": "SUBJECT", "RIGHT_ATTRS": _subject},
                {"LEFT_ID": "PREDICATE", "REL_OP": ">", "RIGHT_ID": "OBJECT", "RIGHT_ATTRS": _object},
            ],
        "category": "hasEvent", "inverse": "isEventOf"
    },
    {
        "pattern_name": "NarrativeClauseAdposition",
        "pattern":
            [
                {"RIGHT_ID": "PREDICATE", "RIGHT_ATTRS": CLAUSE},
                {"LEFT_ID": "PREDICATE", "REL_OP": ">", "RIGHT_ID": "SUBJECT", "RIGHT_ATTRS": _subject},
                {"LEFT_ID": "PREDICATE", "REL_OP": ".", "RIGHT_ID": "ADPOSITION", "RIGHT_ATTRS": _adposition},
                {"LEFT_ID": "ADPOSITION", "REL_OP": ">", "RIGHT_ID": "OBJECT", "RIGHT_ATTRS": _prepositional_object},
            ],
        "category": "hasEvent", "inverse": "isEventOf"
    },
    {
    "pattern_name": "NarrativeClausePassive",
    "pattern":
        [
            {"RIGHT_ID": "PREDICATE", "RIGHT_ATTRS": CLAUSE},
            {"LEFT_ID": "PREDICATE", "REL_OP": ">", "RIGHT_ID": "SUBJECT", "RIGHT_ATTRS": _subject_passive},
            {"LEFT_ID": "PREDICATE", "REL_OP": ".", "RIGHT_ID": "ADPOSITION", "RIGHT_ATTRS": _adposition},
            {"LEFT_ID": "ADPOSITION", "REL_OP": ">", "RIGHT_ID": "OBJECT", "RIGHT_ATTRS": _prepositional_object},
        ],
    "category": "hasLinkTo", "inverse": "isLinkedTo"
    }
]

# TODO: what other verb types are of interest
# if list(verb.morph) == ["Tense=Past", "VerbForm=Fin"] or list(verb.morph) == [
#     "Aspect=Perf",
#     "Tense=Past",
#     "VerbForm=Part",
# ]:
#     return True

dependency_matcher = DependencyMatcher(nlp.vocab, validate=True)
deps = CustomDependencyMatcher(dependency_matcher, patterns)

doc = nlp("Enemies of freedom have attacked us")
matches = deps(doc)

df = [(token, token.pos_, token.dep_, token.morph) for token in doc]
display(pd.DataFrame(df))

print(coordination_deps)
print(_prepositional_object)

print(f'Matches: {matches}')
display(pd.DataFrame(matches))



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Added:  NarrativeClause
Added:  NarrativeClauseAdposition
Added:  NarrativeClausePassive


Unnamed: 0,0,1,2,3
0,Enemies,NOUN,nsubj,(Number=Plur)
1,of,ADP,prep,()
2,freedom,NOUN,pobj,(Number=Sing)
3,have,AUX,aux,"(Mood=Ind, Tense=Pres, VerbForm=Fin)"
4,attacked,VERB,ROOT,"(Aspect=Perf, Tense=Past, VerbForm=Part)"
5,us,PRON,dobj,"(Case=Acc, Number=Plur, Person=1, PronType=Prs)"


{'DEP': {'IN': ['conj', 'cc', 'preconj', 'prep']}}
{'POS': {'IN': ['NOUN', 'PROPN', 'PRON']}, 'DEP': {'IN': ['pobj', 'pcomp']}}
Matches: [{'RULE': 'NarrativeClause', 'CATEGORY': 'hasEvent', 'PREDICATE': 'attacked', 'SUBJECT': 'Enemies', 'OBJECT': 'us'}]


Unnamed: 0,RULE,CATEGORY,PREDICATE,SUBJECT,OBJECT
0,NarrativeClause,hasEvent,attacked,Enemies,us


In [2]:
from cndlib.texts import bush_text
bush_text

