In [None]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [None]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' - ' +str(ent.start_char) +' - '+ str(ent.end_char) +
                  ' - '+ent.label_+ ' - '+str(spacy.explain(ent.label_)))
    else:
        print('No named entities found.')

In [None]:
doc1 = nlp("Apple is looking at buying U.K. startup for $1 billion")
show_ents(doc1)

Apple - 0 - 5 - ORG - Companies, agencies, institutions, etc.
U.K. - 27 - 31 - GPE - Countries, cities, states
$1 billion - 44 - 54 - MONEY - Monetary values, including unit


In [None]:
doc2 = nlp(u'May I go to Washington, DC next May to see the Washington Monument?')
show_ents(doc2)

Washington, DC - 12 - 26 - GPE - Countries, cities, states
next May - 27 - 35 - DATE - Absolute or relative dates or periods
the Washington Monument - 43 - 66 - ORG - Companies, agencies, institutions, etc.


In [None]:
doc3 = nlp(u'Can I please borrow 500 dollars from you to buy some Microsoft stock?')
for ent in doc3.ents:
    print(ent.text, ent.label_)

500 dollars MONEY
Microsoft ORG


In [None]:
doc = nlp("San Francisco considers banning sidewalk delivery robots")
for e in doc.ents:
    print(e.text, e.start_char, e.end_char, e.label_)
ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents] #in a list comprehension form
print(ents)
ent_san = [doc[0].text, doc[0].ent_iob_, doc[0].ent_type_]
ent_francisco = [doc[1].text, doc[1].ent_iob_, doc[1].ent_type_]
print(ent_san)
print(ent_francisco)

San Francisco 0 13 GPE
[('San Francisco', 0, 13, 'GPE')]
['San', 'B', 'GPE']
['Francisco', 'I', 'GPE']


In [None]:
doc = nlp(u'Tesla to build a U.K. factory for $6 million')
show_ents(doc)

U.K. - 17 - 21 - GPE - Countries, cities, states
$6 million - 34 - 44 - MONEY - Monetary values, including unit


In [None]:
from spacy.tokens import Span

In [None]:
ORG = doc.vocab.strings[u'ORG']
new_ent = Span(doc, 0, 1, label=ORG)
doc.ents = list(doc.ents) + [new_ent]

In [None]:
show_ents(doc)

Tesla - 0 - 5 - ORG - Companies, agencies, institutions, etc.
U.K. - 17 - 21 - GPE - Countries, cities, states
$6 million - 34 - 44 - MONEY - Monetary values, including unit


In [None]:
doc = nlp("fb is hiring a new vice president of global policy")
ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
print('Before', ents)
fb_ent = Span(doc, 0, 1, label="ORG")
doc.ents = list(doc.ents) + [fb_ent]
ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
print('After', ents)

Before []
After [('fb', 0, 2, 'ORG')]


In [None]:
from spacy import displacy

In [None]:
text = "When S. Thrun started working on self driving cars at Google in 2007 \
few people outside of the company took him serious"
doc = nlp(text)
displacy.render(doc, style="ent", jupyter=True)

In [None]:
text = """Clearview AI, a New York-headquartered facial recognition company, has been fined £7.5 million ($9.4 million) by a U.K. privacy regulator.
Over the last few years, the firm has collected images from the web and social media of people in Britain and elsewhere to create a global online database that can be used by law enforcement for facial recognition.
The Information Commission’s Office said Monday that the company has breached U.K. data protection laws.
The ICO has ordered Clearview to delete data it has on U.K. residents and banned it from collecting any more.
Clearview writes on its website that it has collected more than 20 billion facial images of people around the world. It collects publicly posted images from social media platforms like Facebook and Instagram, as well as news media, mugshot websites and other open sources. It does so without informing the individuals or asking for their consent.
Clearview’s platform allows law enforcement agencies to upload a photo of an individual and try to match it to photos that are stored in Clearview’s database.
John Edwards, the U.K.’s information commissioner, said in a statement: “The company not only enables identification of those people, but effectively monitors their behavior and offers it as a commercial service. That is unacceptable.”
He added that people expect their personal information to be respected, regardless of where in the world their data is being used."""

doc = nlp(text)
displacy.render(doc, style='ent', jupyter=True)

In [None]:
for sent in doc.sents:
    displacy.render(nlp(sent.text), style='ent', jupyter=True)



In [None]:
options = {'ents': ['ORG', 'PRODUCT']}
displacy.render(doc, style='ent', jupyter=True, options=options)

In [None]:
colors = {'ORG': 'linear-gradient(90deg, #f2c707, #dc9ce7)', 'PRODUCT': 'radial-gradient(white, green)'}
options = {'ents': ['ORG', 'PRODUCT'], 'colors':colors}
displacy.render(doc, style='ent', jupyter=True, options=options)

In [None]:
colors = {'ORG':'linear-gradient(90deg,#aa9cde,#dc9ce7)','PRODUCT':'radial-gradient(white,red)'}
options = {'ent':['ORG','PRODUCT'],'colors':colors}
displacy.render(doc,style='ent',jupyter=True,options=options)