In [4]:
# !pip install -U spacy
# !python -m spacy download en_core_web_sm
# !pip install -U spacy-lookups-data

In [5]:
import spacy

In [11]:
nlp = spacy.load('en_core_web_sm')

In [None]:
# Tokenization of text

In [12]:
doc = nlp('Apple is looking at buying U.K. based startup for $1 billion')

In [14]:
for token in doc:
    print(token.text)

Apple
is
looking
at
buying
U.K.
based
startup
for
$
1
billion


In [15]:
doc = nlp("Apple isn't looking at buying U.K. based startup for $1 billion")

In [16]:
for token in doc:
    print(token.text)

Apple
is
n't
looking
at
buying
U.K.
based
startup
for
$
1
billion


In [None]:
# part of speech tagging (POS)
# applying lemmatization

In [17]:
for token in doc:
    print(token.text, token.lemma_)

Apple Apple
is be
n't not
looking look
at at
buying buy
U.K. U.K.
based base
startup startup
for for
$ $
1 1
billion billion


In [None]:
# Applying Lemmatization, Part of Speech, Is stop

In [20]:
for token in doc:
    print(f'{token.text :{15}} {token.lemma_:{10}} {token.pos_:{10}} {token.is_stop:{10}}')

Apple           Apple      PROPN               0
is              be         AUX                 1
n't             not        PART                1
looking         look       VERB                0
at              at         ADP                 1
buying          buy        VERB                0
U.K.            U.K.       PROPN               0
based           base       VERB                0
startup         startup    NOUN                0
for             for        ADP                 1
$               $          SYM                 0
1               1          NUM                 0
billion         billion    NUM                 0


In [21]:
# checking the dependecy parsing

In [24]:
for chunk in doc.noun_chunks:
    print(f'{chunk.text:{30}} {chunk.root.text:{15}} {chunk.root.dep_:{10}}')

Apple                          Apple           nsubj     
U.K.                           U.K.            dobj      
based startup                  startup         dobj      


In [25]:
# named entity recoginzation
# Apple -> organization
# U.K. -> Graphical presentation entity
# $1 billion -> Money

In [26]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Apple ORG
U.K. GPE
$1 billion MONEY


In [None]:
# sentence segemntation

In [27]:
for sent in doc.sents:
    print(sent)

Apple isn't looking at buying U.K. based startup for $1 billion


In [28]:
doc1 = nlp('hi this side Rageshwar Sharma. how are you doing')

In [29]:
for sent in doc1.sents:
    print(sent)

hi this side Rageshwar Sharma.
how are you doing


In [34]:
doc1 = nlp('hi..this side Rageshwar Sharma...how are you doing')

In [35]:
for sent in doc1.sents:
    print(sent)

hi..
this side Rageshwar Sharma...how are you doing


In [36]:
# visulaization

In [37]:
from spacy import displacy

In [38]:
doc

Apple isn't looking at buying U.K. based startup for $1 billion

In [40]:
displacy.render(doc,style='dep')

In [41]:
doc1

hi..this side Rageshwar Sharma...how are you doing

In [42]:
displacy.render(doc1,style='dep')

In [43]:
displacy.render(doc1,style='dep',options={'compact':True,'distance':100})

In [44]:
displacy.render(doc1,style='ent')

In [45]:
 displacy.render(doc,style='ent')