In [1]:
import spacy
from spacy import displacy
from spacy.matcher import DependencyMatcher

In [2]:
nlp = spacy.load("en_core_web_sm")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
OWNER = "owner"
OWNED = "owned"

In [4]:
# Pattern 1
pattern_1 = [
    {
        "RIGHT_ID": OWNED,
        "RIGHT_ATTRS": {
            "POS": {
                "IN": ["NOUN"]
            }
        }
    },
    {
        "LEFT_ID": OWNED,
        "REL_OP": ">",
        "RIGHT_ID": OWNER,
        "RIGHT_ATTRS": {
            "DEP": "poss"
        }
    }
]

# Pattern 2
pattern_2 = [
     {
        "RIGHT_ID": OWNED,
        "RIGHT_ATTRS": {
            "POS": {
                "IN": ["NOUN"]
            }
        }
    },
    {
        "LEFT_ID": OWNED,
        "REL_OP": ">",
        "RIGHT_ID": "adp",
        "RIGHT_ATTRS": {
            "DEP": "prep",
            "POS": {
                "IN": ["ADP"]
            }
        }
    },
    {
        "LEFT_ID": "adp",
        "REL_OP": ">",
        "RIGHT_ID": OWNER,
        "RIGHT_ATTRS": {
            "DEP": "pobj",
            "POS": {
                "IN": ["NOUN"]
            }
        }
    }
]

# Pattern 3
pattern_3 = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"POS": {"IN": ["VERB"]}}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": OWNER,
        "RIGHT_ATTRS": {
            "DEP": "nsubj",
            "POS": {"IN": ["PRON"]}
        }
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": OWNED,
        "RIGHT_ATTRS": {
            "DEP": "dobj",
            "POS": {"IN": ["NOUN"]}
        }
    }
]

# Pattern 4:
pattern_4 = [
    {
        "RIGHT_ID": "verb",
        "RIGHT_ATTRS": {"POS": {"IN": ["VERB"]}}
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": OWNED,
        "RIGHT_ATTRS": {
            "DEP": "nsubj",
            "POS": {"IN": ["NOUN"]}
        }
    },
    {
        "LEFT_ID": "verb",
        "REL_OP": ">",
        "RIGHT_ID": "adp",
        "RIGHT_ATTRS": {
            "DEP": "prep",
            "POS": {"IN": ["ADP"]}
        }
    },
    {
        "LEFT_ID": "adp",
        "REL_OP": ">",
        "RIGHT_ID": OWNER,
        "RIGHT_ATTRS": {
            "DEP": "pobj",
            "POS": {"IN": ["NOUN"]}
        }
    }
]

In [5]:
patterns = {
    "Pattern1": pattern_1,
    "Pattern2": pattern_2,
    "Pattern3": pattern_3,
    "Pattern4": pattern_4
}

In [6]:
matcher = DependencyMatcher(nlp.vocab)
for pattern_id, pattern in patterns.items():
    matcher.add(pattern_id, [pattern])

In [7]:
# TEST SENTENCES
sentences = [
    "The dog, whose tail wagged excitedly, was greeted warmly by his owner.",
    "Because the children's toys were scattered everywhere, their mother asked them to tidy up.",
    "While I was at my friend's house, I saw his collection of vintage cars.",
    "The artist, whose paintings were on display, received a standing ovation.",
    "If you are looking for the book, please check my sister's desk.",
    "The man whose car was stolen called the police immediately.",
    "Although the company's profits had declined, its stock price remained stable.",
    "Maria's and Jose's new car, a sleek sedan, arrived this morning, much to their delight.",
    "The Smiths' house, with its sprawling garden, always seemed like a haven of peace.",
    "After a long and arduous journey, the Martinezes' family, including their children and their pets, finally reached their destination, a cozy cabin nestled in the mountains.",
    "Predation and parasitism each reduced the abundance of the intermediate consumer (Paramecium), and parasitism indirectly reduced the abundance of the basal resource (Serratia)."
]

In [8]:
print(matcher)

for sentence in sentences:
    print(f"Sentence: {sentence}")
    doc = nlp(sentence)
    matches = matcher(doc)

    if len(matches) == 0:
        continue

    for match_id, token_ids in matches:
        pattern_id = nlp.vocab.strings[match_id]
        print(f"\tMatch {match_id} - Pattern {pattern_id[-1]}")
        for i in range(len(token_ids)):
            print(f"\t\t{patterns[pattern_id][i]['RIGHT_ID']}: {doc[token_ids[i]].text}")
        print()
    print()

<spacy.matcher.dependencymatcher.DependencyMatcher object at 0x00000138672ACA50>
Sentence: The dog, whose tail wagged excitedly, was greeted warmly by his owner.
	Match 14499690083660312615 - Pattern 1
		owned: tail
		owner: whose

	Match 14499690083660312615 - Pattern 1
		owned: owner
		owner: his


Sentence: Because the children's toys were scattered everywhere, their mother asked them to tidy up.
	Match 14499690083660312615 - Pattern 1
		owned: toys
		owner: children

	Match 14499690083660312615 - Pattern 1
		owned: mother
		owner: their


Sentence: While I was at my friend's house, I saw his collection of vintage cars.
	Match 14499690083660312615 - Pattern 1
		owned: friend
		owner: my

	Match 14499690083660312615 - Pattern 1
		owned: house
		owner: friend

	Match 14499690083660312615 - Pattern 1
		owned: collection
		owner: his

	Match 14621589392117008497 - Pattern 2
		owned: collection
		adp: of
		owner: cars

	Match 12054268835912785357 - Pattern 3
		verb: saw
		owner: I
		owne