# Name Entity Recognition

In [1]:
import spacy

In [2]:
nlp = spacy.load("en_core_web_sm")

In [11]:
def show_ents(doc_obj):
    if doc_obj.ents:
        for ent in doc_obj.ents:
            print("{} - {}".format(ent.text, spacy.explain(ent.label_)))
    else:
        print("No entity found!")

In [5]:
doc = nlp(u"Hi! How are you?")

In [6]:
show_ents(doc)

No entity found!


In [13]:
doc_2 = nlp(u"May I go to washington, DC next may to see the Washingon Monument?")

In [14]:
show_ents(doc_2)

washington - Countries, cities, states
DC - Countries, cities, states
the Washingon Monument - Companies, agencies, institutions, etc.


## Adding A New Entity in to Dict

In [15]:
doc = nlp(u"Tesla to build an U.K factory for $6 million.")

In [16]:
show_ents(doc)

U.K - Companies, agencies, institutions, etc.
$6 million - Monetary values, including unit


In [20]:
from spacy.tokens import Span

In [21]:
org = doc.vocab.strings[u"ORG"]

In [22]:
new_ent = Span(doc, 0, 1, label=org)

In [24]:
doc.ents = list(doc.ents) + [new_ent]

In [25]:
show_ents(doc)

Tesla - Companies, agencies, institutions, etc.
U.K - Companies, agencies, institutions, etc.
$6 million - Monetary values, including unit


## Adding Multiple Phraces as Entity

In [42]:
doc = nlp(u"Our company created a brand new vacum cleaner. This new vacum cleaner is awesome.")

In [44]:
show_ents(doc)

No entity found!


In [29]:
from spacy.matcher import PhraseMatcher

In [30]:
matcher = PhraseMatcher(nlp.vocab)

In [31]:
phrase_list = ["vacum cleaner", "vacum-cleaner"]

In [32]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [36]:
phrase_patterns

[vacum cleaner, vacum-cleaner]

In [35]:
type(phrase_patterns[0])

spacy.tokens.doc.Doc

In [37]:
matcher.add("newproduct", None, *phrase_patterns)

In [45]:
found_matches = matcher(doc)

In [46]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 13)]

In [47]:
from spacy.tokens import Span

In [48]:
prod = doc.vocab.strings[u"PRODUCT"]

In [49]:
new_ents = [Span(doc, match[1], match[2], label=prod) for match in found_matches]

In [51]:
doc.ents = list(doc.ents) + new_ents

In [52]:
show_ents(doc)

vacum cleaner - Objects, vehicles, foods, etc. (not services)
vacum cleaner - Objects, vehicles, foods, etc. (not services)


## Frequency of An Entity (Money)

In [58]:
doc = nlp("Originally I paid $29.95 for this car toy, but now it is marked down by 10 dollars.")

In [61]:
freq_money = [ent for ent in doc.ents if ent.label_ == "MONEY"]

In [63]:
freq_money

[29.95, 10 dollars]

In [62]:
len(freq_money)

2