In [1]:
import spacy
from spacy.matcher import Matcher

nlp = spacy.load("en_core_web_sm")
relation_matcher = Matcher(nlp.vocab)
event_matcher = Matcher(nlp.vocab)

In [None]:
# Define patterns for relations
pattern_works_at = [
    {"POS": {"IN": ["NOUN", "PROPN", "PRON"]}},     # Match any noun, proper noun, pronoun
    {"LEMMA": "work"},                              # Lemmatizes verb 'work' (match worked, working)
    {"LOWER": "at"}, 
    {"ENT_TYPE": "ORG"}                             # Match organizations (Google, Microsoft,...)
]
pattern_chased = [
    {"POS": {"IN": ["NOUN", "PROPN", "PRON"]}}, 
    {"LEMMA": "chase"}, 
    {"POS": "DET", "OP": "?"},                      # Optional determiner (zero or one occurrence of the, a, an, ...)
    {"POS": {"IN": ["NOUN", "PROPN"]}, "OP": "+"}   # One or more occurrences, allow multi-word objects
]
pattern_married = [
    {"POS": {"IN": ["NOUN", "PROPN", "PRON"]}}, 
    {"LOWER": "is"}, 
    {"LOWER": "married"}, 
    {"LOWER": "to"}, 
    {"POS": {"IN": ["NOUN", "PROPN"]}}
]

# Add patterns
relation_matcher = Matcher(nlp.vocab)
relation_matcher.add("WORKS_AT", [pattern_works_at])
relation_matcher.add("CHASED", [pattern_chased])
relation_matcher.add("MARRIED_TO", [pattern_married])

# Example relation sentences
relation_sentences = [
    "John works at Google.",
    "The cat chased the mouse.",
    "Alice is married to Bob."
]

# Apply matcher
for sent in relation_sentences:
    doc = nlp(sent)
    matches = relation_matcher(doc)
    print(f"\nSentence: {sent}")
    for match_id, start, end in matches:
        span = doc[start:end]
        print(f"Relation: {nlp.vocab.strings[match_id]} --> {span.text}")



Sentence: John works at Google.
Relation: WORKS_AT --> John works at Google

Sentence: The cat chased the mouse.
Relation: CHASED --> cat chased the mouse

Sentence: Alice is married to Bob.
Relation: MARRIED_TO --> Alice is married to Bob


In [3]:
# Define event patterns
pattern_concert = [
    {"POS": {"IN": ["NOUN", "PROPN", "PRON"]}}, 
    {"LEMMA": "take"}, 
    {"LOWER": "place"}, 
    {"LOWER": "on"}, 
    {"ENT_TYPE": "DATE"}
]
pattern_team_win = [
    {"POS": {"IN": ["NOUN", "PROPN", "PRON"]}}, 
    {"LEMMA": "win"}, {"POS": "DET", "OP": "?"}, 
    {"POS": {"IN": ["NOUN", "PROPN"]}, "OP": "+"}
]
pattern_birthday = [
    {"POS": {"IN": ["NOUN", "PRON", "PROPN"]}}, 
    {"LEMMA": "celebrate"}, 
    {"LOWER": {"IN": ["her", "his"]}, "OP": "?"}, 
    {"LOWER": "birthday"}, 
    {"LOWER": "on"}, 
    {"ENT_TYPE": "DATE"}
]

# Add patterns
event_matcher.add("CONCERT_EVENT", [pattern_concert])
event_matcher.add("TEAM_WIN_EVENT", [pattern_team_win])
event_matcher.add("BIRTHDAY_EVENT", [pattern_birthday])

# Example event sentences
event_sentences = [
    "The concert took place on Friday.",
    "The team won the championship.",
    "She celebrated her birthday on Saturday."
]

# Apply matcher
for sent in event_sentences:
    doc = nlp(sent)
    matches = event_matcher(doc)
    print(f"\nSentence: {sent}")
    for match_id, start, end in matches:
        span = doc[start:end]
        print(f"Event: {nlp.vocab.strings[match_id]} --> {span.text}")


Sentence: The concert took place on Friday.
Event: CONCERT_EVENT --> concert took place on Friday

Sentence: The team won the championship.
Event: TEAM_WIN_EVENT --> team won the championship

Sentence: She celebrated her birthday on Saturday.
Event: BIRTHDAY_EVENT --> She celebrated her birthday on Saturday
