# PhraseMatcher

In [8]:
import spacy
from spacy.matcher import PhraseMatcher
from spacy import attrs
nlp = spacy.load("en_core_web_md")

In [3]:
matcher  = PhraseMatcher(vocab=nlp.vocab)

In [4]:
terms = ["Angela Merkel", "Donald Trump", "Alexis Tsipras"]
pattern = [nlp.make_doc(term) 
           for term in terms]

#? we only used make_doc this will internally use the tokenizer 
#? we dont need to use whole pipe
#! nlp(term) this will use the whole pipe , that we dont need.

In [5]:
matcher.add(key="politicList",docs=pattern)

In [6]:
doc = nlp(
    """3 EU leaders met in Berlin. German chancellor Angela Merkel first welcomed \
        the US president Donald Trump. The  following day Alexis Tsipras joined \
        them in Brandenburg.
    """
)
matches = matcher(doclike=doc,as_spans=True)

In [7]:
for span in matches:
    print(span.text)

Angela Merkel
Donald Trump
Alexis Tsipras


# ip matching

In [9]:
matcher = PhraseMatcher(vocab=nlp.vocab,attr=attrs.SHAPE)
ip_nums = ["127.0.0.0","127.256.0.0"]
pattern = [nlp.make_doc(ip)
           for ip in ip_nums]

In [15]:
matcher.add("ips",pattern)

In [16]:
doc = nlp("This log contains the following IP addresses: \
    192.1.1.1 and 192.12.1.1 and 192.160.1.1 .")

In [17]:
matches = matcher(doc,as_spans=True)
for span in matches:
    print(span.text)

192.1.1.1
192.160.1.1
