In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back")

In [4]:
print(doc.text)

The quick brown fox jumped over the lazy dog's back


In [5]:
print(doc[4].pos_)

VERB


In [6]:
print(doc[4].tag_)

VBD


In [7]:
for token in doc:
    print(f"{token.text:{10}}{token.pos_:{10}}{token.tag_:{10}}{spacy.explain(token.tag_):{10}}")

The       DET       DT        determiner
quick     ADJ       JJ        adjective 
brown     ADJ       JJ        adjective 
fox       NOUN      NN        noun, singular or mass
jumped    VERB      VBD       verb, past tense
over      ADP       IN        conjunction, subordinating or preposition
the       DET       DT        determiner
lazy      ADJ       JJ        adjective 
dog       NOUN      NN        noun, singular or mass
's        PART      POS       possessive ending
back      NOUN      NN        noun, singular or mass


In [8]:
doc1 = nlp(u"I read books on NLP.")

In [9]:
word = doc1[1]

In [10]:
word.text

'read'

In [11]:
token = word
print(f"{token.text:{10}}{token.pos_:{10}}{token.tag_:{10}}{spacy.explain(token.tag_):{10}}")

read      VERB      VBP       verb, non-3rd person singular present


In [12]:
doc2 = nlp(u"I read a book on NLP.")
token = doc2[1]
print(f"{token.text:{10}}{token.pos_:{10}}{token.tag_:{10}}{spacy.explain(token.tag_):{10}}")

read      VERB      VBD       verb, past tense


In [13]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [14]:
POS_counts= doc.count_by(spacy.attrs.POS)

In [15]:
POS_counts

{96: 1, 83: 3, 99: 1, 84: 1, 89: 2, 91: 3, 93: 1}

In [16]:
for k, v in sorted(POS_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}}{v}")

83. ADJ  3
84. ADP  1
89. DET  2
91. NOUN 3
93. PART 1
96. PUNCT1
99. VERB 1


In [17]:
TAG_counts = doc.count_by(spacy.attrs.TAG)

for k, v in sorted(TAG_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}}{v}")



74. POS  1
1292078113972184607. IN   1
10554686591937588953. JJ   3
12646065887601541794. .    1
15267657372422890137. DT   2
15308085513773655218. NN   3
17109001835818727656. VBD  1


In [18]:
 def show_ents(doc):
        if doc.ents:
            for ent in doc.ents:
                print(ent.text+'-'+ ent.label_+'-'+str(spacy.explain(ent.label_)))
        else:
            print("No entities found")

In [19]:
doc = nlp(u"Hi, how are you?")

In [20]:
show_ents(doc)

No entities found


In [21]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington Monument?")

In [22]:
show_ents(doc)

Washington, DC-GPE-Countries, cities, states
next May-DATE-Absolute or relative dates or periods
the Washington Monument-ORG-Companies, agencies, institutions, etc.


In [23]:
 doc = nlp(u"Our company created a brand new vacuum cleaner."
          u"This new vacuum-cleaner is the best in show.")

In [24]:
show_ents(doc)

No entities found


In [25]:
from spacy.matcher import PhraseMatcher

In [26]:
matcher = PhraseMatcher(nlp.vocab)

In [27]:
phrase_list=['vacuum cleaner', 'vacuum-cleaner']

In [28]:
phrase_patterns = [nlp(text) for text in phrase_list]

In [29]:
matcher.add('newproduct', None, *phrase_patterns)

In [30]:
found_matches = matcher(doc)

In [31]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [32]:
from spacy.tokens import Span

In [33]:
PROD = doc.vocab.strings[u"PRODUCT"]

In [34]:
new_ents= [Span(doc, match[1], match[2], label=PROD) for match in found_matches]

In [35]:
doc.ents = list(doc.ents)+ new_ents

In [36]:
show_ents(doc)

vacuum cleaner-PRODUCT-Objects, vehicles, foods, etc. (not services)
vacuum-cleaner-PRODUCT-Objects, vehicles, foods, etc. (not services)


In [42]:
doc = nlp(u'"Management is doing the right things; Leadership is doing the right things." -Peter Drucker')

In [38]:
doc.text

'"Management is doing the right things; Leadership is doing the right things"-Peter Drucker'

In [43]:
for sent in doc.sents:
    print(sent)
    print('\n')

"Management is doing the right things; Leadership is doing the right things."


-Peter Drucker


