# Use of Matcher for token matching

In [3]:
import spacy
nlp = spacy.load('en_core_web_sm')

from spacy.matcher import Matcher
matcher = Matcher(nlp.vocab)

In [4]:
pattern1 = [{'LOWER': 'solarpower'}]
pattern2 = [{'LOWER': 'solar'}, {'IS_PUNCT': True}, {'LOWER': 'power'}]
pattern3 = [{'LOWER': 'solar'}, {'LOWER': 'power'}]

In [20]:
doc = nlp(u'Nowadays Solar power is getting popular beacuse of solarpower industry is rising. Today solar-power is potential source of energy.')

In [21]:
matcher.add('Solarpower', [pattern1, pattern2, pattern3])

In [22]:
found_matches = matcher(doc)
found_matches

[(6544436658971563323, 1, 3),
 (6544436658971563323, 8, 9),
 (6544436658971563323, 14, 17)]

In [24]:
for match_id, start_ind, end_ind in found_matches:
    string_id = nlp.vocab.strings[match_id]
    span = doc[start_ind:end_ind]
    print(match_id, string_id, start_ind, end_ind, span.text)

6544436658971563323 Solarpower 1 3 Solar power
6544436658971563323 Solarpower 8 9 solarpower
6544436658971563323 Solarpower 14 17 solar-power


In [25]:
# We can remove the matcher id from the matcher

matcher.remove('Solarpower')

In [27]:
pattern1 = [{'LOWER': 'solarpower'}]
pattern2 = [{'LOWER': 'solar'}, {'IS_PUNCT': True, 'OP': '*'}, {'LOWER': 'power'}]

In [28]:
matcher.add('Solarpower', [pattern1, pattern2])

In [32]:
doc = nlp(u'Solar---power is solarpower, solar power, yay!')

found_matches = matcher(doc)
found_matches

[(6544436658971563323, 0, 3),
 (6544436658971563323, 4, 5),
 (6544436658971563323, 6, 8)]

# Phrase Matcher of SPACY

In [2]:
import spacy
from spacy.matcher import PhraseMatcher

nlp = spacy.load('en_core_web_sm')
ph_matcher = PhraseMatcher(nlp.vocab)

In [3]:
with open('Files\\reaganomics.txt') as f:
    doc = nlp(f.read())

In [11]:
phrase_list = ['voodoo economics', 'supply-side economics', 'trickle-down economics', 'free-market economics']
phrase_patterns = [nlp(text) for text in phrase_list]

ph_matcher.add('matchEco', None, *phrase_patterns)

In [12]:
found_matches = ph_matcher(doc)

In [13]:
for match_id, start_ind, end_ind in found_matches:
    string_id = nlp.vocab.strings[match_id]
    span = doc[start_ind:end_ind]
    print(match_id, string_id, start_ind, end_ind, span.text)

7373594532109772372 matchEco 41 45 supply-side economics
7373594532109772372 matchEco 49 53 trickle-down economics
7373594532109772372 matchEco 54 56 voodoo economics
7373594532109772372 matchEco 61 65 free-market economics
7373594532109772372 matchEco 673 677 supply-side economics
7373594532109772372 matchEco 2987 2991 trickle-down economics
