In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [3]:
mystring = '"We \'re moving to L.A.!"'

In [4]:
print(mystring)

"We 're moving to L.A.!"


In [5]:
doc = nlp(mystring)

In [6]:
#Spacy isolates punctuation
for token in doc:
    print(token.text)

"
We
're
moving
to
L.A.
!
"


In [7]:
doc = nlp(u"We're here to help! Send snail-mail, email support at suppport@oursite.com or visit us at https://oursite.com")

In [8]:
#Spacy understand email and website
for token in doc:
    print(token.text)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support
at
suppport@oursite.com
or
visit
us
at
https://oursite.com


In [9]:
doc = nlp(u"A 5km NYC cab ride cost $10.50 ")

In [10]:
for t in doc:
    print(t.text)

A
5
km
NYC
cab
ride
cost
$
10.50


In [11]:
doc = nlp(u"Let's visit St. Louis in the U.S. next year")

In [12]:
for t in doc:
    print(t.text)

Let
's
visit
St.
Louis
in
the
U.S.
next
year


In [13]:
len(doc)

10

In [14]:
doc.vocab

<spacy.vocab.Vocab at 0x1d5ae1a5f48>

In [16]:
#It means our library en_core_web_sm has 57582 tokens
len(doc.vocab)

57852

In [17]:
doc5 = nlp(u"It's better to give than recieve")

In [18]:
doc5[0]

It

In [19]:
doc5[2:5]

better to give

In [20]:
#Tokens cant be reassigned, makes sense! Will give error
doc5[0] = 'test'

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

In [28]:
#Named entities

doc8 = nlp(u"Apple to build a Hong Kong factory for $6 million")

In [30]:
for t in doc8:
    print(t.text, end = '|')
    

Apple|to|build|a|Hong|Kong|factory|for|$|6|million|

In [35]:
#Smart enough and knows entities
for entity in doc8.ents:
    print(entity)
    print(entity.label_)
    print(str(spacy.explain(entity.label_)))
    print('\n')

Apple
ORG
Companies, agencies, institutions, etc.


Hong Kong
GPE
Countries, cities, states


$6 million
MONEY
Monetary values, including unit




In [36]:
#Noun chunks = noun + words describing nouns

doc9 = nlp('Autonomous cars shift insurance liabilities towards manufacturers')

In [37]:
for chunk in doc9.noun_chunks:
    print(chunk)
    print('\n')

Autonomous cars


insurance liabilities


manufacturers




In [42]:
#Creating visualisations

import spacy
from spacy import displacy

In [44]:
doc = nlp(u'Apple is going to build a U.K. factory for $  billion')

In [50]:
displacy.render(doc, style = 'dep', jupyter = True, options ={'distance':50})

In [53]:
doc89 = nlp(u'Over the last quarter Apple sold nearly 20 thousand ipods for $6 million ')

In [54]:
displacy.render(doc89, style = 'ent', jupyter = True, options ={'distance':50})

In [None]:
#Host visualisation
# Go to  http://127.0.0.1:5000/
docs = nlp(u"This is a sentence.")
displacy.serve(docs, style='dep')


[93m    Serving on port 5000...[0m
    Using the 'dep' visualizer



127.0.0.1 - - [03/Jun/2020 22:13:48] "GET / HTTP/1.1" 200 3057
127.0.0.1 - - [03/Jun/2020 22:13:48] "GET /favicon.ico HTTP/1.1" 200 3057
