In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
mystring = '"We\'re moving to L.A.!"'

In [3]:
mystring

'"We\'re moving to L.A.!"'

In [4]:
print(mystring)

"We're moving to L.A.!"


In [7]:
doc = nlp(mystring)
for token in doc:
    print(token.text)

"
We
're
moving
to
L.A.
!
"


In [9]:
doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at https://www.oursite.com!")

In [12]:
for token in doc2:
    print(token)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
https://www.oursite.com
!


In [13]:
doc3 = nlp(u"5km NYC cab ride costs $10.30")
for t in doc3:
    print(t)

5
km
NYC
cab
ride
costs
$
10.30


In [14]:
#punctuation that exists as part of a known entity will also be kept in the same token
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")

In [15]:
for t in doc4:
    print(t)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


In [16]:
#to count the number of token
len(doc4)

11

In [18]:
#we can also count vocab entry
len(doc4.vocab)

57852

In [19]:
doc5 = nlp(u"It is better to give than receive")

In [20]:
doc5[0]

It

In [22]:
doc5[2:5] #span

better to give

In [23]:
#tokens cannot be reassigned.. Once you have assigned a token in a document,
#it cannot reassign something else. It is fixed
doc5[0] = "Test"

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

In [27]:
#spacy can understand named entity. such as company, locations etc
#named entity can be accessed using the .ents

In [25]:
doc8 = nlp(u"Apple to build a Hong Kong factory for $6 million")
for token in doc8:
    print(token.text, end=' | ')

Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | million | 

In [29]:
for entity in doc8.ents:
    print(entity)
    print(entity.label_)
    print(str(spacy.explain(entity.label_)))
    print('\n')

Apple
ORG
Companies, agencies, institutions, etc.


Hong Kong
GPE
Countries, cities, states


$6 million
MONEY
Monetary values, including unit




In [32]:
#we also have noun chunks similer to the .ents
#base noun phrases 

doc9 = nlp(u"Autonomous cars shift insurance liability towards manufacturers.")

In [33]:
for chunk in doc9.noun_chunks:
    print(chunk)

Autonomous cars
insurance liability
manufacturers


In [34]:
from spacy import displacy

In [35]:
doc = nlp(u"Apple is going to build a U.K. factory for $6 million.")

In [37]:
#the style we put is dep cause we want to see the syntactic dependency

displacy.render(doc,style='dep',jupyter=True,options={'distance':100})

In [44]:
#now we will see the entity dependency
doc = nlp(u"Over the last quarter Apple has sold nearly 20 thousands iPods for a profit of $6 million.")

In [45]:
displacy.render(doc, style='ent',jupyter=True)

In [46]:
#to run (on something other than jupyter) on a .py 
doc = nlp(u"This is a sentence.")
displacy.serve(doc, style='dep')
#then open up http://127.0.0.1:5000/ on your browser to see


[93m    Serving on port 5000...[0m
    Using the 'dep' visualizer



127.0.0.1 - - [22/Aug/2023 19:18:46] "GET / HTTP/1.1" 200 3057
127.0.0.1 - - [22/Aug/2023 19:18:46] "GET /favicon.ico HTTP/1.1" 200 3057



    Shutting down server on port 5000.



In [47]:
#more example on displacy

doc10 = nlp(u"Today is a sunny day. I would love to be at the beach! It seems pretty far-fetched now but I really want to visit Cox's Bazar right now. I hope i get to visit St. Martins this December!")


In [48]:
displacy.render(doc10, style='ent',jupyter=True)

In [49]:
displacy.render(doc10, style='dep',jupyter=True,options={'distance':70})