In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [7]:
mystring = '"We\'re moving to Hyderabad!"'

In [8]:
mystring

'"We\'re moving to Hyderabad!"'

In [9]:
print(mystring)

"We're moving to Hyderabad!"


In [10]:
doc = nlp(mystring)

In [11]:
for token in doc:
    print(token.text)

"
We
're
moving
to
Hyderabad
!
"


In [13]:
doc1 = nlp(u"Hey!! We're from LetItBeBros!! Kindly send us rapid-email to know your enthusiasm on NLP. You can send email to letitbebros@gmail.com or you can visit to https://letitbebros.blogspot.com/")

In [14]:
for token in doc1:
    print(token.text)
    
#Spacy is smart to enough to know which words to be tokenized. In email we have '.' but it kept the original email address intact.
#Without breaking the email address and not considering '.' as end of word or punctuation.
#Same example can be done with money like $10.50 and length 5.29 km

Hey
!
!
We
're
from
LetItBeBros
!
!
Kindly
send
us
rapid
-
email
to
know
your
enthusiasm
on
NLP
.
You
can
send
email
to
letitbebros@gmail.com
or
you
can
visit
to
https://letitbebros.blogspot.com/


In [15]:
len(doc1)

34

In [16]:
doc1.vocab

<spacy.vocab.Vocab at 0x1e45b4832c8>

In [17]:
len(doc1.vocab)

57852

In [18]:
len(doc.vocab) #It gives the total vocabulary/tokens present in en_core_web_sm

57852

In [20]:
#Spacy does not allow mutation
doc1[0] = 'Lol!!'

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

In [22]:
doc2 = nlp(u"Tesla is going to build a factory in India of worth $1 Billion")

In [23]:
for token in doc2:
    print(token.text, end = ' |')
    

Tesla |is |going |to |build |a |factory |in |India |of |worth |$ |1 |Billion |

In [27]:
#To get entities from a sentence
for entity in doc2.ents:
    print(entity)
    print(entity.label_)
    print(str(spacy.explain(entity.label_)))
    print("\n")

Tesla
PRODUCT
Objects, vehicles, foods, etc. (not services)


India
GPE
Countries, cities, states


$1 Billion
MONEY
Monetary values, including unit




In [28]:
doc3 = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")

for chunk in doc3.noun_chunks:
    print(chunk.text)

Autonomous cars
insurance liability
manufacturers


In [29]:
from spacy import displacy

In [30]:
displacy.render(doc2,style = 'dep',jupyter=True,options = {'distance':100})#dep for syntactic dependencies

In [31]:
displacy.render(doc2,style = 'ent',jupyter=True,options = {'distance':100})#ent for entity dependencies

In [32]:
displacy.serve(doc2,style = 'dep')
#click on http://127.0.0.1:5000/ 


[93m    Serving on port 5000...[0m
    Using the 'dep' visualizer



127.0.0.1 - - [09/Jul/2022 23:22:49] "GET / HTTP/1.1" 200 10626
127.0.0.1 - - [09/Jul/2022 23:22:49] "GET /favicon.ico HTTP/1.1" 200 10626



    Shutting down server on port 5000.

