In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')
text = 'West Chestertenfieldville was referenced in Mr.Deed'

In [3]:
doc = nlp(text)

In [7]:
for ent in doc.ents:
    print(ent.text, ent.label_)

West Chestertenfieldville GPE
Deed PERSON


In [9]:
ruler = nlp.add_pipe('entity_ruler')

In [10]:
nlp.analyze_pipes()

{'summary': {'tok2vec': {'assigns': ['doc.tensor'],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'tagger': {'assigns': ['token.tag'],
   'requires': [],
   'scores': ['tag_acc'],
   'retokenizes': False},
  'parser': {'assigns': ['token.dep',
    'token.head',
    'token.is_sent_start',
    'doc.sents'],
   'requires': [],
   'scores': ['dep_uas',
    'dep_las',
    'dep_las_per_type',
    'sents_p',
    'sents_r',
    'sents_f'],
   'retokenizes': False},
  'attribute_ruler': {'assigns': [],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'lemmatizer': {'assigns': ['token.lemma'],
   'requires': [],
   'scores': ['lemma_acc'],
   'retokenizes': False},
  'ner': {'assigns': ['doc.ents', 'token.ent_iob', 'token.ent_type'],
   'requires': [],
   'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'],
   'retokenizes': False},
  'entity_ruler': {'assigns': ['doc.ents', 'token.ent_type', 'token.ent_iob'],
   'requires': [],
   'scores': ['ents_f', 'ent

In [11]:
patterns = [
    {'label': 'PERSON', 'pattern': 'West Chestertenfieldville'}
]

In [13]:
ruler.add_patterns(patterns)

In [14]:
doc2 = nlp(text)
for ent in doc.ents:
    print(ent.text, ent.label_)

# We can notice that it doesn't change anymore!

West Chestertenfieldville GPE
Deed PERSON


In [15]:
# So let's change the order of ner and entity ruler and do it again!
import spacy
nlp = spacy.load('en_core_web_sm')
text = 'West Chestertenfieldville was referenced in Mr.Deed'

ruler = nlp.add_pipe('entity_ruler', before = 'ner')
patterns = [
    {'label':'PERSON', 'pattern':'West Chestertenfieldville'},
    {'label':'FILM', 'pattern': 'Mr.Deed'}
]
ruler.add_patterns(patterns)

print(nlp.analyze_pipes(),'\n')

doc3 = nlp(text)
for ent in doc3.ents:
    print(ent.text, ent.label_)

{'summary': {'tok2vec': {'assigns': ['doc.tensor'], 'requires': [], 'scores': [], 'retokenizes': False}, 'tagger': {'assigns': ['token.tag'], 'requires': [], 'scores': ['tag_acc'], 'retokenizes': False}, 'parser': {'assigns': ['token.dep', 'token.head', 'token.is_sent_start', 'doc.sents'], 'requires': [], 'scores': ['dep_uas', 'dep_las', 'dep_las_per_type', 'sents_p', 'sents_r', 'sents_f'], 'retokenizes': False}, 'attribute_ruler': {'assigns': [], 'requires': [], 'scores': [], 'retokenizes': False}, 'lemmatizer': {'assigns': ['token.lemma'], 'requires': [], 'scores': ['lemma_acc'], 'retokenizes': False}, 'entity_ruler': {'assigns': ['doc.ents', 'token.ent_type', 'token.ent_iob'], 'requires': [], 'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'], 'retokenizes': False}, 'ner': {'assigns': ['doc.ents', 'token.ent_iob', 'token.ent_type'], 'requires': [], 'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'], 'retokenizes': False}}, 'problems': {'tok2vec': [], 'tagger': [], 'par