# Custom Components with Spacy

- to customize components to our likes that are off the shelf in spacy

In [8]:
import spacy

In [10]:
nlp = spacy.load('en_core_web_sm')
doc = nlp('Britain is a place. Mary is a doctor.')

In [11]:
for ent in doc.ents: 
    print(ent.text, ent.label_)

Britain GPE
Mary PERSON


### Remove GPE from the instance

In [12]:
from spacy.language import Language

In [14]:
@Language.component('remove_gpe')
def remove_gpe(doc): 
    original_ents = list(doc.ents)
    print(original_ents)
    for ent in doc.ents:
        if ent.label_ == 'GPE': 
            original_ents.remove(ent)
    doc.ents = original_ents
    return(doc)
    

In [15]:
nlp.add_pipe('remove_gpe')

<function __main__.remove_gpe(doc)>

In [16]:
nlp.analyze_pipes()

{'summary': {'tok2vec': {'assigns': ['doc.tensor'],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'tagger': {'assigns': ['token.tag'],
   'requires': [],
   'scores': ['tag_acc'],
   'retokenizes': False},
  'parser': {'assigns': ['token.dep',
    'token.head',
    'token.is_sent_start',
    'doc.sents'],
   'requires': [],
   'scores': ['dep_uas',
    'dep_las',
    'dep_las_per_type',
    'sents_p',
    'sents_r',
    'sents_f'],
   'retokenizes': False},
  'attribute_ruler': {'assigns': [],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'lemmatizer': {'assigns': ['token.lemma'],
   'requires': [],
   'scores': ['lemma_acc'],
   'retokenizes': False},
  'ner': {'assigns': ['doc.ents', 'token.ent_iob', 'token.ent_type'],
   'requires': [],
   'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'],
   'retokenizes': False},
  'remove_gpe': {'assigns': [],
   'requires': [],
   'scores': [],
   'retokenizes': False}},
 'problems': {'tok2vec': [],
  

In [17]:
doc = nlp('Britain is a place. Mary is a doctor.')
for ent in doc.ents: 
    print(ent.text, ent.label_)

[Britain, Mary]
Mary PERSON


In [18]:
nlp.to_disk('data/new_en_core_web_sm')

When the code is being deployed the function: `remove_gpe`, this needs to saved as a library that is then fed to the NLP model.