### Basic Functionality

In [2]:
import spacy

In [3]:
sp = spacy.load('en_core_web_sm')
sentence = sp(u'Manchester United is looking to sign a forward for $90 million')

### First Example (See the parts of speech)

In [4]:
for word in sentence:
    print(word.text)

Manchester
United
is
looking
to
sign
a
forward
for
$
90
million


In [5]:
for word in sentence:
    print(word.text,  word.pos_)

Manchester PROPN
United PROPN
is AUX
looking VERB
to PART
sign VERB
a DET
forward NOUN
for ADP
$ SYM
90 NUM
million NUM


### Second Example (See dependency parsing)

In [8]:
sentence2 = sp(u"Manchester United isn't looking to sign any forward.")
for word in sentence2:
    print(word.text,  word.pos_, word.dep_)

Manchester PROPN compound
United PROPN nsubj
is AUX aux
n't PART neg
looking VERB ROOT
to PART aux
sign VERB xcomp
any DET det
forward NOUN advmod
. PUNCT punct


In [9]:
document = sp(u'Hello from Stackabuse. The site with the best Python Tutorials. What are you looking for?')
for sentence in document.sents:
    print(sentence)

Hello from Stackabuse.
The site with the best Python Tutorials.
What are you looking for?


In [28]:
print(document[4])
print(document[3])

print(document[4].is_sent_start)
print(document[2].is_sent_end)

The
.
True
False


### Third Example (Spacy Tokenization)

In [20]:
sentence3 = sp(u'"They\'re leaving U.K. for U.S.A."')
print(sentence3)

"They're leaving U.K. for U.S.A."


In [21]:
for word in sentence3:
    print(word.text)

"
They
're
leaving
U.K.
for
U.S.A.
"


In [30]:
sentence4 = sp(u"Hello, I am non-vegetarian, email me the menu at abc-xyz@gmai.com")
for word in sentence4:
    print(word.text)
len(sentence4)

Hello
,
I
am
non
-
vegetarian
,
email
me
the
menu
at
abc-xyz@gmai.com


14

### Detecting Entities (named entity recognition)

In [34]:
sentence5 = sp(u'Manchester United Football Club is looking to sign Harry Kane for $90 million')
for word in sentence5:
    print(word.text)

Manchester
United
Football
Club
is
looking
to
sign
Harry
Kane
for
$
90
million


In [35]:
for entity in sentence5.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

Manchester United Football Club - ORG - Companies, agencies, institutions, etc.
Harry Kane - PERSON - People, including fictional
$90 million - MONEY - Monetary values, including unit


In [37]:
# You can also detect nouns!
sentence6 = sp(u'Latest Rumours: Manchester United is looking to sign Harry Kane for $90 million')  
for noun in sentence6.noun_chunks:
    print(noun.text)

Latest Rumours
Manchester United
Harry Kane


# Stemming (Porter and Snowball)
# <h4>You actually can't do Stemming with spacy so I'll use NLTK</h4>

In [42]:
import nltk
from nltk.stem.porter import PorterStemmer
from nltk.stem.snowball import SnowballStemmer

In [43]:
stemmer = PorterStemmer()
tokens = ['compute', 'computer', 'computed', 'computing']

In [44]:
for token in tokens:
    print(token + ' --> ' + stemmer.stem(token))

compute --> comput
computer --> comput
computed --> comput
computing --> comput


In [45]:
stemmer = SnowballStemmer(language='english')

In [46]:
for token in tokens:
    print(token + ' --> ' + stemmer.stem(token))

compute --> comput
computer --> comput
computed --> comput
computing --> comput


### Lemmatization

In [48]:
# Lemmatization converts words in the second or third forms to their first form variants.
sentence7 = sp(u'compute computer computed computing')
for word in sentence7:
    print(word.text,  word.lemma_)

compute compute
computer computer
computed compute
computing computing


In [50]:
# If you want to be more clear
sentence8 = sp(u'A letter has been written, asking him to be released')
for word in sentence8:
    print(word.text + '  ===>', word.lemma_)

A  ===> a
letter  ===> letter
has  ===> have
been  ===> be
written  ===> write
,  ===> ,
asking  ===> ask
him  ===> he
to  ===> to
be  ===> be
released  ===> release
