<a href="https://colab.research.google.com/github/RishiRishwanth/Natural_Language_Processing/blob/main/POS_NER.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import spacy
nlp = spacy.load('en_core_web_sm')
doc = nlp(u"the quick brown fox jumped over the lazy dog's back.")
print(doc.text)

the quick brown fox jumped over the lazy dog's back.


In [3]:
#coarse tag
print(doc[4].pos_)
#fine_griained
print(doc[4].tag_)


VERB
VBD


In [4]:
for t in doc:
  print(f"{t.text:{10}} {t.pos_:{10}} {t.tag_:{10}} {spacy.explain(t.tag_)}")

the        DET        DT         determiner
quick      ADJ        JJ         adjective (English), other noun-modifier (Chinese)
brown      ADJ        JJ         adjective (English), other noun-modifier (Chinese)
fox        NOUN       NN         noun, singular or mass
jumped     VERB       VBD        verb, past tense
over       ADP        IN         conjunction, subordinating or preposition
the        DET        DT         determiner
lazy       ADJ        JJ         adjective (English), other noun-modifier (Chinese)
dog        NOUN       NN         noun, singular or mass
's         PART       POS        possessive ending
back       NOUN       NN         noun, singular or mass
.          PUNCT      .          punctuation mark, sentence closer


In [5]:
doc = nlp(u"I read books on NLP")
word = doc[1]
word.text
t=word

In [6]:
print(f"{t.text:{10}} {t.pos_:{10}} {t.tag_:{10}} {spacy.explain(t.tag_)}")

read       VERB       VBP        verb, non-3rd person singular present


In [7]:
doc = nlp(u"I read a books on NLP")
word = doc[1]
word.text
t=word
print(f"{t.text:{10}} {t.pos_:{10}} {t.tag_:{10}} {spacy.explain(t.tag_)}")

read       VERB       VBD        verb, past tense


In [8]:
doc = nlp(u"the quick brown fox jumped over the lazy dog's back.")
POS_counts = doc.count_by(spacy.attrs.POS)
POS_counts
#number:how many times it repeated

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [9]:
doc.vocab[84].text

'ADJ'

In [10]:
for k,v in sorted(POS_counts.items()):
  print(f"{k}. {doc.vocab[k].text:{5}}{v}")

84. ADJ  3
85. ADP  1
90. DET  2
92. NOUN 3
94. PART 1
97. PUNCT1
100. VERB 1


In [11]:

TAG_counts = doc.count_by(spacy.attrs.TAG)
for k,v in sorted(TAG_counts.items()):
  print(f"{k}. {doc.vocab[k].text:{5}}{v}")

74. POS  1
1292078113972184607. IN   1
10554686591937588953. JJ   3
12646065887601541794. .    1
15267657372422890137. DT   2
15308085513773655218. NN   3
17109001835818727656. VBD  1


In [12]:
DEP_counts = doc.count_by(spacy.attrs.DEP)
for k,v in sorted(DEP_counts.items()):
  print(f"{k}. {doc.vocab[k].text:{5}}{v}")

400. advmod1
402. amod 3
415. det  2
429. nsubj1
439. pobj 1
443. prep 1
445. punct1
8110129090154140942. case 1
8206900633647566924. ROOT 1


In [13]:
#visualizing POS

import spacy
nlp = spacy.load('en_core_web_sm')
doc = nlp(u"the quick brown fox jumped over the lazy dog's back.")
print(doc.text)

from spacy import displacy
displacy.render(doc,style="dep",jupyter = True)

the quick brown fox jumped over the lazy dog's back.


In [14]:
options = {'distance':110,'compact':'True','color':'yellow','bg':'#9a3d5','font':'Times'}


In [15]:
displacy.render(doc,style="dep",jupyter = True,options = options)

In [16]:
doc2 = nlp(u"This is a sentence. This is a second sentence. This is another sentence")
spans = list(doc2.sents)
displacy.serve(spans, style='dep',options = options)


Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [17]:
#NAME_ENITITY_RECOGNITION



In [18]:
import spacy
nlp = spacy.load('en_core_web_sm')

def show_ents(doc):
  if doc.ents:
    for ent in doc.ents:
      print(ent.text+ ' - '+str(spacy.explain(ent.label_)))
  else:
    print("No entities")

In [19]:
doc = nlp(u"Hi How are you?")
show_ents(doc)

No entities


In [20]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington Monument?")
show_ents(doc)

Washington, DC - Countries, cities, states
next May - Absolute or relative dates or periods
the Washington Monument - Companies, agencies, institutions, etc.


In [21]:
doc  =nlp(u"Tesla to build a U.K. factory for $6 million")
show_ents(doc)

U.K. - Countries, cities, states
$6 million - Monetary values, including unit


In [22]:
from spacy.tokens import Span
ORG = doc.vocab.strings[u"ORG"]
ORG

383

In [23]:
new_ent = Span (doc,0,1,label = ORG)
doc.ents = list(doc.ents)+[new_ent]

In [24]:
show_ents(doc)

Tesla - Companies, agencies, institutions, etc.
U.K. - Countries, cities, states
$6 million - Monetary values, including unit


In [25]:
#
doc = nlp(u"Our company created a brand new vacuum cleaner."
          u"This new vacuum-cleaner is the best in show")
show_ents(doc)

No entities


In [26]:
from spacy.matcher import PhraseMatcher
matcher = PhraseMatcher(nlp.vocab)
phrase_list = ['vacuum cleaner', 'vacuum-cleaner']
phrase_patterns = [nlp(text) for text in phrase_list]
matcher.add('newproduct',None,*phrase_patterns)
found_matches = matcher(doc)
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [27]:
from spacy.tokens import Span

PROD = doc.vocab.strings[u"PRODUCT"]
new_ents = [Span(doc,match[1],match[2],label = PROD) for match in found_matches]

In [28]:
doc.ents = list(doc.ents)+new_ents
show_ents(doc)

vacuum cleaner - Objects, vehicles, foods, etc. (not services)
vacuum-cleaner - Objects, vehicles, foods, etc. (not services)


In [32]:
found_matches

[(2689272359382549672, 6, 8), (2689272359382549672, 11, 14)]

In [36]:
#visualizing NER

import spacy
nlp = spacy.load('en_core_web_sm')
from spacy import displacy
doc = nlp(u"Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million."
          u"By contrast, Sony only sold 8 thousand walkman music players")
displacy.render(doc,style='ent',jupyter=True)

In [37]:
for sent in doc.sents:
  displacy.render(nlp(sent.text),style = 'ent', jupyter=True)

In [42]:
colors ={"ORG":'red'}

colors_radiant = {'ORG':'linear-gradient(45deg, orange, red)'}

options = {'ents':['PRODUCT', 'ORG'],'colors':colors_radiant}

In [43]:
for sent in doc.sents:
  displacy.render(nlp(sent.text),style = 'ent', jupyter=True,options=options,)