In [1]:
import spacy
nlp= spacy.load("en_core_web_sm")

In [2]:
s="""Robert Nimmo (1893–1966) was a senior Australian Army officer and 
the chief military observer (CMO) of the United Nations Military Observer
Group in India and Pakistan (UNMOGIP) from 1950 until his death.
Nimmo graduated early from the Royal Military College, Duntroon, 
to participate in World War I, serving with the Australian Light Horse."""

In [5]:
doc=nlp(s)

In [10]:
# All pos excluding verb, adjective and adverb.  
final=[]
for i in doc:
    if i.pos_ not in ["VERB","ADJ","ADV"]:
        final.append(i)
    
    

In [11]:
final

[Robert,
 Nimmo,
 (,
 1893–1966,
 ),
 was,
 a,
 Army,
 officer,
 and,
 ,
 the,
 observer,
 (,
 CMO,
 ),
 of,
 the,
 United,
 Nations,
 Military,
 Observer,
 ,
 Group,
 in,
 India,
 and,
 Pakistan,
 (,
 UNMOGIP,
 ),
 from,
 1950,
 until,
 his,
 death,
 .,
 ,
 Nimmo,
 from,
 the,
 Royal,
 Military,
 College,
 ,,
 Duntroon,
 ,,
 ,
 to,
 in,
 World,
 War,
 I,
 ,,
 with,
 the,
 Light,
 Horse,
 .]

In [13]:
# no. of times the pos has occured
count= doc.count_by(spacy.attrs.POS)

In [14]:
count

{96: 23,
 97: 11,
 87: 1,
 90: 5,
 84: 5,
 92: 3,
 89: 2,
 103: 4,
 85: 7,
 93: 1,
 95: 1,
 100: 3,
 86: 1,
 94: 1}

In [18]:
# to find at that index which pos is present
doc.vocab[97].text

'PUNCT'

In [24]:
for i, j in count.items():
    print(spacy.explain(doc.vocab[i].text),":",j)

proper noun : 23
punctuation : 11
auxiliary : 1
determiner : 5
adjective : 5
noun : 3
coordinating conjunction : 2
space : 4
adposition : 7
numeral : 1
pronoun : 1
verb : 3
adverb : 1
particle : 1


# Name Entity Recognition

In [25]:
doc=nlp("Tesla Inc is going  to acquire Twitter for $455  billion ")

In [29]:
doc.ents[0] 

Tesla Inc

In [30]:
doc.ents[1] 

Twitter

In [33]:
doc.ents[2] 

$455  billion

In [34]:
doc1=nlp(s)

In [36]:
doc1.ents[10]

Duntroon

In [38]:
doc1.ents[12]

the Australian Light Horse

In [45]:
# .Text is used to convert word into string 
# as label accepts only string data. 
for i in doc.ents:
    print(i.text,"-",i.label_,"-",i.label,"-",spacy.explain(i.label_))

Tesla Inc - ORG - 383 - Companies, agencies, institutions, etc.
Twitter - PRODUCT - 386 - Objects, vehicles, foods, etc. (not services)
$455  billion - MONEY - 394 - Monetary values, including unit


In [46]:
for i in doc1.ents:
    print(i.text,"-",i.label_,"-",i.label,"-",spacy.explain(i.label_))

Robert Nimmo - PERSON - 380 - People, including fictional
Australian Army - ORG - 383 - Companies, agencies, institutions, etc.
CMO - ORG - 383 - Companies, agencies, institutions, etc.
the United Nations Military Observer
Group - ORG - 383 - Companies, agencies, institutions, etc.
India - GPE - 384 - Countries, cities, states
Pakistan - GPE - 384 - Countries, cities, states
UNMOGIP - DATE - 391 - Absolute or relative dates or periods
1950 - DATE - 391 - Absolute or relative dates or periods
Nimmo - ORG - 383 - Companies, agencies, institutions, etc.
the Royal Military College - ORG - 383 - Companies, agencies, institutions, etc.
Duntroon - GPE - 384 - Countries, cities, states
World War I - EVENT - 387 - Named hurricanes, battles, wars, sports events, etc.
the Australian Light Horse - ORG - 383 - Companies, agencies, institutions, etc.


In [49]:
from spacy import displacy

In [53]:
# to highlight the noun
displacy.render(doc1, style="ent")

In [55]:
displacy.render(doc, style="dep")