In [9]:
import spacy

In [10]:
nlp = spacy.load("en_core_web_sm")
doc = nlp("Britain is a place. Mary is a doctor")

In [11]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Britain GPE
Mary PERSON


In [None]:
from spacy.language import Language 

In [None]:
@Language.component("remove_gpe") #This is a decorator that registers a function as a spaCy pipeline component named "remove_gpe".
def remove_gpe(doc):
    
    original_ents = list(doc.ents)#Makes a copy of all the current named entities (doc.ents).
    
    for ent in doc.ents: #Loops through all entities.
        if ent.label_ == "GPE":
            original_ents.remove(ent) #If the entity is a GPE (like "Britain"), it removes it from the copy.
    
    doc.ents = original_ents #Replaces the original doc.ents with the modified list (i.e., all entities except GPEs).
    return(doc)

In [None]:
nlp.add_pipe("remove_gpe") #add the function to the pipeline
                           #it gets add at the end (after the ner) by default

<function __main__.remove_gpe(doc)>

In [None]:
nlp.analyze_pipes()

{'summary': {'tok2vec': {'assigns': ['doc.tensor'],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'tagger': {'assigns': ['token.tag'],
   'requires': [],
   'scores': ['tag_acc',
    'pos_acc',
    'tag_micro_p',
    'tag_micro_r',
    'tag_micro_f'],
   'retokenizes': False},
  'parser': {'assigns': ['token.dep',
    'token.head',
    'token.is_sent_start',
    'doc.sents'],
   'requires': [],
   'scores': ['dep_uas',
    'dep_las',
    'dep_las_per_type',
    'sents_p',
    'sents_r',
    'sents_f'],
   'retokenizes': False},
  'attribute_ruler': {'assigns': [],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'lemmatizer': {'assigns': ['token.lemma'],
   'requires': [],
   'scores': ['lemma_acc'],
   'retokenizes': False},
  'ner': {'assigns': ['doc.ents', 'token.ent_iob', 'token.ent_type'],
   'requires': [],
   'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'],
   'retokenizes': False},
  'remove_gpe': {'assigns': [],
   'requires': [],
   

In [18]:
doc = nlp("Britain is a place. Mary is a doctor")
for ent in doc.ents:
    print(ent.text, ent.label_)

Mary PERSON


In [None]:
#nlpl.to_disk("data_sets/new_en_core_web_sm") #to the save the changes in new file