In [12]:
import spacy

In [13]:
nlp = spacy.blank("en")

doc = nlp("Captain america ate 100$ of samosa. Then he said I can do this all day.")

for token in doc:
    print(token)

Captain
america
ate
100
$
of
samosa
.
Then
he
said
I
can
do
this
all
day
.


In [14]:
nlp.pipe_names

[]

In [15]:
nlp = spacy.load("en_core_web_sm")

In [16]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [17]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x11478c32fa0>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x11478887640>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x114784a5820>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x114784c1c80>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x11478356900>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x114784a5cf0>)]

In [18]:
doc = nlp("Captain america ate 100$ of samosa. Then he said I can do this all day.")

In [20]:
for token in doc:
    print(token, " | ", spacy.explain(token.pos_), " | ", token.lemma_)

Captain  |  proper noun  |  Captain
america  |  proper noun  |  america
ate  |  verb  |  eat
100  |  numeral  |  100
$  |  noun  |  $
of  |  adposition  |  of
samosa  |  proper noun  |  samosa
.  |  punctuation  |  .
Then  |  adverb  |  then
he  |  pronoun  |  he
said  |  verb  |  say
I  |  pronoun  |  I
can  |  auxiliary  |  can
do  |  verb  |  do
this  |  pronoun  |  this
all  |  determiner  |  all
day  |  noun  |  day
.  |  punctuation  |  .


In [21]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")

In [24]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Tesla Inc ORG
$45 billion MONEY


In [25]:
from spacy import displacy
displacy.render(doc, style = "ent")

# Exercise

In [26]:
text = '''Ravi and Raju are the best friends from school days.They wanted to go for a world tour and 
visit famous cities like Paris, London, Dubai, Rome etc and also they called their another friend Mohan to take part of this world tour.
They started their journey from Hyderabad and spent next 3 months travelling all the wonderful cities in the world and cherish a happy moments!
'''

In [27]:
doc = nlp(text)

In [29]:
for token in doc:
    print(token, " | ", spacy.explain(token.pos_), " | ", token.lemma_)

Ravi  |  noun  |  ravi
and  |  coordinating conjunction  |  and
Raju  |  proper noun  |  Raju
are  |  auxiliary  |  be
the  |  determiner  |  the
best  |  adjective  |  good
friends  |  noun  |  friend
from  |  adposition  |  from
school  |  noun  |  school
days  |  noun  |  day
.  |  punctuation  |  .
They  |  pronoun  |  they
wanted  |  verb  |  want
to  |  particle  |  to
go  |  verb  |  go
for  |  adposition  |  for
a  |  determiner  |  a
world  |  noun  |  world
tour  |  noun  |  tour
and  |  coordinating conjunction  |  and

  |  space  |  

visit  |  verb  |  visit
famous  |  adjective  |  famous
cities  |  noun  |  city
like  |  adposition  |  like
Paris  |  proper noun  |  Paris
,  |  punctuation  |  ,
London  |  proper noun  |  London
,  |  punctuation  |  ,
Dubai  |  proper noun  |  Dubai
,  |  punctuation  |  ,
Rome  |  proper noun  |  Rome
etc  |  other  |  etc
and  |  coordinating conjunction  |  and
also  |  adverb  |  also
they  |  pronoun  |  they
called  |  verb  |  c

In [35]:
count = 0
for token in doc:
    if spacy.explain(token.pos_) == "proper noun":
        count += 1
        print(token) 
print(count)

Raju
Paris
London
Dubai
Rome
Mohan
Hyderabad
7


In [36]:

text = '''The Top 5 companies in USA are Tesla, Walmart, Amazon, Microsoft, Google and the top 5 companies in 
India are Infosys, Reliance, HDFC Bank, Hindustan Unilever and Bharti Airtel'''


In [37]:
doc = nlp(text)

In [44]:
for ent in doc.ents:
    if ent.label_ == "ORG":
        print(ent)

Tesla
Walmart
Amazon
Microsoft
Google
Infosys, Reliance
HDFC Bank
Hindustan Unilever
Bharti Airtel
