In [1]:
import spacy
from spacy.tokens import Span
from spacy.matcher import PhraseMatcher

## Pipeline components

To add a custom component to the pipeline you use `nlp.add_pipe(component)`. 

In a function call you can specify where to add a component using the following parameters:

* `last = True` - component will be added as the last pipeline step
* `first = True` - component will be added as the first pipeline step
* `before = 'ner'` - component will be added before 'ner' component
* `after = 'tagger'` - component will be added after 'tagger'

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
# Let's start with something simple and just print len of the doc on the way
def my_component(doc):
    print(f'Your doc len: {len(doc)}')
    return doc

In [4]:
# Add component to the pipeline
nlp.add_pipe(my_component, after = 'parser')

In [5]:
nlp.pipeline

[('tagger', <spacy.pipeline.pipes.Tagger at 0x1fefe51ada0>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0x1fefe6760a8>),
 ('my_component', <function __main__.my_component(doc)>),
 ('ner', <spacy.pipeline.pipes.EntityRecognizer at 0x1fefe676108>)]

In [6]:
doc = nlp('I always wanted to speak Lithuanian.')

Your doc len: 7


## Add phrase matcher to the pipeline

In [2]:
nlp2 = spacy.load('en_core_web_sm')

In [3]:
# Let's define a list of districts in Tel Aviv
distrs = ['Old Yafo', 'Shapira', 'Ezra', 'Florentin']

In [4]:
# Add patterns
patterns = list(nlp2.pipe(distrs))

In [5]:
# Initialize Matcher
matcher = PhraseMatcher(nlp2.vocab)

In [6]:
matcher.add('DISTRICT', None, *patterns)

In [7]:
# Create a component

def tlv_component(doc_):
    
    # Apply the matcher to the doc
    matches = matcher(doc_)
    
    # Create a Span for each match and assign the label 'ANIMAL'
    spans = (Span(doc_, start, end, label = 'TLV_DISTRICT')
             for match_id, start, end in matches)
    
    # Overwrite the doc.ents with the matched spans
    doc_.ents = spans
    
    return doc_

In [8]:
# Add the component to the pipeline after the 'ner' component 
nlp2.add_pipe(tlv_component, after = 'ner')

In [9]:
# Create a document
doc2 = nlp2('I stayed in Old Yafo for a couple of days and then moved to Shapira to visit my friends. They told me\
that in their opinion Ezra is nicer than Florentin. I disagreed.')

In [10]:
nlp2.pipeline

[('tagger', <spacy.pipeline.pipes.Tagger at 0x1c84c119dd8>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0x1c84c278048>),
 ('ner', <spacy.pipeline.pipes.EntityRecognizer at 0x1c84c2780a8>),
 ('tlv_component', <function __main__.tlv_component(doc_)>)]

In [11]:
for ent in doc2.ents:
    print(ent.text, ent.label_)

Old Yafo TLV_DISTRICT
Shapira TLV_DISTRICT
Ezra TLV_DISTRICT
Florentin TLV_DISTRICT
