In [6]:
import spacy

In [7]:
nlp = spacy.load('en_core_web_sm')

In [8]:
doc = nlp(u'Tesla is looking at buying U.S. startup for $6 million')

In [10]:
for token in doc:
    print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.S. PROPN compound
startup NOUN dobj
for ADP prep
$ SYM quantmod
6 NUM compound
million NUM pobj


In [11]:
nlp.pipeline

[('tagger', <spacy.pipeline.pipes.Tagger at 0x15455cdc880>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0x15455ea8dc0>),
 ('ner', <spacy.pipeline.pipes.EntityRecognizer at 0x15455ea88e0>)]

In [12]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [15]:
doc2 = nlp(u"Tesla isn't  looking into startups anymore.")

In [16]:
for token in doc2:
    print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
n't PART neg
  SPACE 
looking VERB ROOT
into ADP prep
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


In [17]:
doc2

Tesla isn't  looking into startups anymore.

In [18]:
doc2[0]

Tesla

In [19]:
for i in doc2:
    print(i)

Tesla
is
n't
 
looking
into
startups
anymore
.


In [20]:
doc2[0].pos_

'PROPN'

In [21]:
doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [22]:
life_quote = doc3[16:30]

In [23]:
print(life_quote)

"Life is what happens to us while we are making other plans"


In [25]:
type(life_quote)

spacy.tokens.span.Span

In [26]:
type(doc3)

spacy.tokens.doc.Doc

In [30]:
doc4 = nlp(u"This is the first sentence. This is another sentence. This is the last sentence.")

In [31]:
doc4

This is the first sentence. This is another sentence. This is the last sentence.

In [32]:
for sentence in doc4.sents:
    print(sentence)

This is the first sentence.
This is another sentence.
This is the last sentence.


In [33]:
doc4[6].is_sent_start

True

In [35]:
doc9 = nlp(u'Autonomous cars shift insurance liability toward manufacturers.')

In [36]:
for chunk in doc9.noun_chunks:
    print(chunk)

Autonomous cars
insurance liability
manufacturers


In [37]:
from spacy import displacy

In [38]:
doc = nlp(u"Apple is going to build a UK factory for $6 million.")

In [39]:
displacy.render(doc,style='dep', jupyter = True, options = {'distance':110})

In [40]:
doc1 = nlp(u"Over the last quarter APple sold nearly 20 thousand ipods for a profit of $9 billion")

In [61]:
words = ['run', 'runner', 'ran', 'runs', 'easily', 'fairly', 'fairness']

In [54]:
from nltk.stem.porter import PorterStemmer
from nltk.stem.snowball import SnowballStemmer

In [64]:
p_stemmer = PorterStemmer()


In [65]:
s_stemmer = SnowballStemmer(language='english')

In [66]:
for word in words:
    print(word + '  ------>' + p_stemmer.stem(word))

run  ------>run
runner  ------>runner
ran  ------>ran
runs  ------>run
easily  ------>easili
fairly  ------>fairli
fairness  ------>fair


In [67]:
for word in words:
    print(word + '  ------>' + s_stemmer.stem(word))

run  ------>run
runner  ------>runner
ran  ------>ran
runs  ------>run
easily  ------>easili
fairly  ------>fair
fairness  ------>fair


In [70]:
words2 = ['generous', 'generation', 'generate', 'generously', 'sagacious', 'generic', 'sage']

In [71]:
for word in words2:
    print(word + '  ------>' + s_stemmer.stem(word))

generous  ------>generous
generation  ------>generat
generate  ------>generat
generously  ------>generous
sagacious  ------>sagaci
generic  ------>generic
sage  ------>sage


In [72]:
import spacy

In [73]:
nlp = spacy.load('en_core_web_sm')

In [74]:
doc1 = nlp(u"I am a runner running in a race because I love to run since I ran today.")

In [76]:
for token in doc1:
    print(token.text, '\t', token.pos_, '\t', token.lemma, '\t', token.lemma_)

I 	 PRON 	 561228191312463089 	 -PRON-
am 	 AUX 	 10382539506755952630 	 be
a 	 DET 	 11901859001352538922 	 a
runner 	 NOUN 	 12640964157389618806 	 runner
running 	 VERB 	 12767647472892411841 	 run
in 	 ADP 	 3002984154512732771 	 in
a 	 DET 	 11901859001352538922 	 a
race 	 NOUN 	 8048469955494714898 	 race
because 	 SCONJ 	 16950148841647037698 	 because
I 	 PRON 	 561228191312463089 	 -PRON-
love 	 VERB 	 3702023516439754181 	 love
to 	 PART 	 3791531372978436496 	 to
run 	 VERB 	 12767647472892411841 	 run
since 	 SCONJ 	 10066841407251338481 	 since
I 	 PRON 	 561228191312463089 	 -PRON-
ran 	 VERB 	 12767647472892411841 	 run
today 	 NOUN 	 11042482332948150395 	 today
. 	 PUNCT 	 12646065887601541794 	 .
