In [None]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [None]:
#Split by Whitespace
import re
text = 'I\'m with you for the entire life in N.I.E.T.!'
words = re.split(r'\W+', text)
print(words[:100])

['I', 'm', 'with', 'you', 'for', 'the', 'entire', 'life', 'in', 'N', 'I', 'E', 'T', '']


In [None]:
#Select Words
words = re.split(r'\W+', text)
print(words[:100])

['I', 'm', 'with', 'you', 'for', 'the', 'entire', 'life', 'in', 'N', 'I', 'E', 'T', '']


In [None]:
import string
import re
# split into words by white space
words = text.split()
# prepare regex for char filtering
re_punc = re.compile('[%s]' % re.escape(string.punctuation))
# remove punctuation from each word
stripped = [re_punc.sub('', w) for w in words]
print(stripped[:100])



['Im', 'with', 'you', 'for', 'the', 'entire', 'life', 'in', 'NIET']


In [None]:
# string.printable inverse of string.punctuation
re_print = re.compile('[^%s]' % re.escape(string.printable))
result = [re_print.sub('', w) for w in words]
print(result)


["I'm", 'with', 'you', 'for', 'the', 'entire', 'life', 'in', 'N.I.E.T.!']


In [None]:
# Normalizing Case

# split into words by white space
words = text.split()
# convert to lower case
words = [word.lower() for word in words]
print(words[:100])

["i'm", 'with', 'you', 'for', 'the', 'entire', 'life', 'in', 'n.i.e.t.!']


In [None]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [None]:
string = '"I\'m with you for the entire life in P.K.!"'
print(string)

"I'm with you for the entire life in P.K.!"


In [None]:
doc = nlp(string)
for token in doc:
    print(token.text, end=' | ')

" | I | 'm | with | you | for | the | entire | life | in | P.K. | ! | " | 

In [None]:
doc2 = nlp(u"We're here to help! Send snail-mail, email 0211cse104@niet.co.in")
for t in doc2:
    print(t)

We
're
here
to
help
!
Send
snail
-
mail
,
email
0211cse104@niet.co.in


In [None]:
doc3 = nlp(u'A 5km NYC cab ride costs $10.30')
for t in doc3:
    print(t)

A
5
km
NYC
cab
ride
costs
$
10.30


In [None]:
doc4 = nlp(u"Let's visit Preeti's Home in the U.S. next year.")
for t in doc4:
    print(t)

Let
's
visit
Preeti
's
Home
in
the
U.S.
next
year
.


In [None]:
len(doc)

13

In [None]:
len(doc.vocab)

798

In [None]:
doc5 = nlp(u'It is better to give than to receive.')
# Retrieve the third token:
doc5[2]

better

In [None]:
# Retrieve three tokens from the middle:
doc5[2:5]

better to give

In [None]:
# Retrieve the last four tokens:
doc5[-4:]

than to receive.

In [None]:
doc6 = nlp(u'My dinner was horrible.')
doc7 = nlp(u'Your dinner was delicious.')

In [None]:
# Create a list of tokens from doc6
doc6_tokens = [token.text for token in doc6]

# Replace the fourth token ("horrible") with the fourth token from doc7 ("delicious")
doc6_tokens[3] = doc7[3].text

# Join the tokens back into a string
new_sentence = " ".join(doc6_tokens)

print(new_sentence)  # Output: My dinner was delicious.

My dinner was delicious .


In [None]:
doc8 = nlp(u'Apple to build a Hong Kong factory for $6 million')

for token in doc8:
    print(token.text, end=' | ')

print('\n----')

for ent in doc8.ents:
    print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))

Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | million | 
----
Apple - ORG - Companies, agencies, institutions, etc.
Hong Kong - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [None]:
len(doc8.ents)

3

In [None]:
doc9 = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")

for chunk in doc9.noun_chunks:
    print(chunk.text)

Autonomous cars
insurance liability
manufacturers


In [None]:
doc10 = nlp(u"Red cars do not carry higher insurance rates.")

for chunk in doc10.noun_chunks:
    print(chunk.text)

Red cars
higher insurance rates


In [None]:
doc11 = nlp(u"He was a one-eyed, one-horned, flying, purple people-eater.")

for chunk in doc11.noun_chunks:
    print(chunk.text)

He
a one-eyed, one-horned, flying, purple people-eater


In [None]:
from spacy import displacy

doc = nlp(u'Apple is going to build a U.K. factory for $6 million.')
displacy.render(doc, style='dep', jupyter=True, options={'distance': 110})

In [None]:
doc = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')
displacy.render(doc, style='ent', jupyter=True)

In [None]:
doc = nlp(u'This is a sentence.')
displacy.serve(doc, style='dep')




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [None]:
doc12=nlp('Apple is going to build a U.K. factory for $6 million.')
for chunk in doc12.noun_chunks:
    print(chunk.text)

Apple
a U.K. factory


In [None]:
displacy.render(doc12, style='dep', jupyter=True, options={'distance': 110})

In [None]:
displacy.serve(doc12, style='dep')




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [None]:
doc13=nlp('I\m from Greater Noida')
for chunk in doc13.noun_chunks:
    print(chunk.text)

Greater Noida
