# SpaCy's EntityRuler

## 1. EntityRuler

In [3]:
import spacy

nlp = spacy.load("en_core_web_md")

text = "The village of Treblinka is in Poland. Treblinka was also an extermination camp."

doc = nlp(text)

for ent in doc.ents:
    print(ent.text, ent.label_)

Treblinka GPE
Poland GPE


In [4]:
text = "The village of Treblinka is in Poland. Treblinka was also an extermination camp."

ruler = nlp.add_pipe("entity_ruler")

patterns = [
    {"label": "GPE", "pattern": "Treblinka"}
]

ruler.add_patterns(patterns)

doc = nlp(text)

for ent in doc.ents:
    print(ent.text, ent.label_)

Treblinka GPE
Poland GPE
Treblinka GPE


In [5]:
nlp.analyze_pipes()

{'summary': {'tok2vec': {'assigns': ['doc.tensor'],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'tagger': {'assigns': ['token.tag'],
   'requires': [],
   'scores': ['tag_acc'],
   'retokenizes': False},
  'parser': {'assigns': ['token.dep',
    'token.head',
    'token.is_sent_start',
    'doc.sents'],
   'requires': [],
   'scores': ['dep_uas',
    'dep_las',
    'dep_las_per_type',
    'sents_p',
    'sents_r',
    'sents_f'],
   'retokenizes': False},
  'attribute_ruler': {'assigns': [],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'lemmatizer': {'assigns': ['token.lemma'],
   'requires': [],
   'scores': ['lemma_acc'],
   'retokenizes': False},
  'ner': {'assigns': ['doc.ents', 'token.ent_iob', 'token.ent_type'],
   'requires': [],
   'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'],
   'retokenizes': False},
  'entity_ruler': {'assigns': ['doc.ents', 'token.ent_type', 'token.ent_iob'],
   'requires': [],
   'scores': ['ents_f', 'ent

## 2. Complex Rules and Variance

In [9]:
text = "This is a sample number (555) 555-5555."

patterns = [
            {"label": "PHONE_NUMBER", "pattern": [{"ORTH": "("}, {"SHAPE": "ddd"}, {"ORTH": ")"}, {"SHAPE": "ddd"},
            {"ORTH": "-", "OP": "?"}, {"SHAPE": "dddd"}]}
]

ruler.add_patterns(patterns)

doc = nlp(text)

for ent in doc.ents:
    print (ent.text, ent.label_)

555 CARDINAL
555-5555 CARDINAL
