In [12]:
import spacy

from spacy.tokens import Span


nlp = spacy.load("en_core_web_sm")

doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

doc_LER = nlp("""Calvert Cliffs 2
Auxiliary Feedwater Pump Inoperable due to Improper Reset of Trip Throttle Valve
Abstract: On August 10, 2021 it was determined that the 22 Auxiliary Feedwater (AFW) Pump trip throttle valve was not reset properly in March 2021. As a result, the 22 AFW Pump was determined to be inoperable for a period longer than allowed by the technical specification condition completion time. The 22 AFW Pump was subsequently reset properly. The cause of the improper reset of 22 AFW Pump trip throttle valve was due to inadequate procedural guidance. Actions were taken to update the procedure providing additional detail to ensure the trip throttle valve is reset properly.""")

ents_LER_bf = [(e.text, e.start_char, e.end_char, e.label_) for e in doc_LER.ents]
print('Before: \n', ents_LER_bf)

# Create a span for the new entity
LER_ent = Span(doc_LER, 0, 4, label="PLACE")
orig_ents = list(doc_LER.ents)

# Option 1: Modify the provided entity spans, leaving the rest unmodified
doc_LER.set_ents([LER_ent], default="unmodified")

ents_LER_af = [(e.text, e.start, e.end, e.label_) for e in doc_LER.ents]
print('After: \n', ents_LER_af)

Before: 
 [('Calvert Cliffs', 0, 14, 'PERSON'), ('2', 15, 16, 'CARDINAL'), ('Feedwater Pump Inoperable', 27, 52, 'ORG'), ('Improper Reset of', 60, 77, 'FAC'), ('August 10, 2021', 111, 126, 'DATE'), ('22', 154, 156, 'CARDINAL'), ('Feedwater', 167, 176, 'PERSON'), ('March 2021', 234, 244, 'DATE'), ('22', 263, 265, 'CARDINAL'), ('22', 402, 404, 'CARDINAL'), ('22', 482, 484, 'CARDINAL')]
After: 
 [('Calvert Cliffs 2\n', 0, 4, 'PLACE'), ('Feedwater Pump Inoperable', 5, 8, 'ORG'), ('Improper Reset of', 10, 13, 'FAC'), ('August 10, 2021', 20, 24, 'DATE'), ('22', 29, 30, 'CARDINAL'), ('Feedwater', 31, 32, 'PERSON'), ('March 2021', 44, 46, 'DATE'), ('22', 52, 53, 'CARDINAL'), ('22', 75, 76, 'CARDINAL'), ('22', 90, 91, 'CARDINAL')]


In [14]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)
spacy.displacy.render(doc, style="ent")

Apple 0 5 ORG
U.K. 27 31 GPE
$1 billion 44 54 MONEY


In [13]:
for ent in doc_LER.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)
spacy.displacy.render(doc_LER, style="ent")

Calvert Cliffs 2
 0 17 PLACE
Feedwater Pump Inoperable 27 52 ORG
Improper Reset of 60 77 FAC
August 10, 2021 111 126 DATE
22 154 156 CARDINAL
Feedwater 167 176 PERSON
March 2021 234 244 DATE
22 263 265 CARDINAL
22 402 404 CARDINAL
22 482 484 CARDINAL


In [16]:
display(nlp.vocab)

<spacy.vocab.Vocab at 0x170227fc3a0>