In [2]:
import spacy

In [6]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1024)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)

Virtual devices cannot be modified after being initialized


## we need to load the spaCy language model.

In [3]:
sp = spacy.load('en_core_web_sm')

In [25]:
sentence = sp(u'Can you Google it?')
for word in sentence:
    print(word.text)

Can
you
Google
it
?


In [26]:
for word in sentence:
    print(word.text, word.pos_)

Can VERB
you PRON
Google VERB
it PRON
? PUNCT


In [14]:
while True:
    txt = str(input("You: "))
    sen = sp(txt)
    
    if txt == 'stop':
        break
    for word in sen:
        print(f"{word.text:10} - {word.pos_:10} - {word.dep_:10} - {word.tag_}")

You: There were five books on the shelf. 
There      - PRON       - expl       - EX
were       - AUX        - ROOT       - VBD
five       - NUM        - nummod     - CD
books      - NOUN       - attr       - NNS
on         - ADP        - prep       - IN
the        - DET        - det        - DT
shelf      - NOUN       - pobj       - NN
.          - PUNCT      - punct      - .
You: stop


## For dependency parsing, the attribute dep_ is used as shown below:

In [22]:
sentence2 = sp(u"Manchester United isn't looking to sign any forward.")
for word in sentence2:
    print(word.text, word.pos_, word.dep_)

Manchester PROPN compound
United PROPN nsubj
is AUX aux
n't PART neg
looking VERB ROOT
to PART aux
sign VERB xcomp
any DET advmod
forward ADV advmod
. PUNCT punct


In [7]:
document = sp(u'Hello from Stackabuse. The site with the best Python Tutorials. What are you looking for?')
for sent in document.sents:
    print(sent)

Hello from Stackabuse.
The site with the best Python Tutorials.
What are you looking for?


## Now to see if any sentence in the document starts with The, we can use the is_sent_start attribute as shown below:

In [8]:
document[4].is_sent_start

True

In [9]:
sentence3 = sp(u'"They\'re leaving U.K. for U.S.A."')
for word in sentence3:
    print(word.text)

"
They
're
leaving
U.K.
for
U.S.A.
"


In [10]:
sentence4 = sp(u"Hello, I am non-vegetarian, email me the menu at abc-xyz@gmai.com")
for word in sentence4:
    print(word.text)

Hello
,
I
am
non
-
vegetarian
,
email
me
the
menu
at
abc-xyz@gmai.com


## Detecting Entities

In [11]:
sentence5 = sp(u'Manchester United is looking to sign Fedjio Raymond for $90 million')  
for entity in sentence5.ents:
    print(entity.text + ' - ' + entity.label_ + ' - ' + str(spacy.explain(entity.label_)))

Manchester United - PERSON - People, including fictional
Fedjio Raymond - PERSON - People, including fictional
$90 million - MONEY - Monetary values, including unit


## Detecting Nouns

In [12]:
sentence5 = sp(u'Latest Rumours: Manchester United is looking to sign Harry Kane for $90 million')  
for noun in sentence5.noun_chunks:
    print(noun.text)

Manchester United
Harry Kane


## Spacy doesn't support stemming
## Porter Stemmer

In [13]:
import nltk
from nltk.stem.porter import *
stemmer = PorterStemmer()

In [14]:
tokens = ['compute', 'computer', 'computed', 'computing']
for token in tokens:
    print(token + ' --> ' + stemmer.stem(token))

compute --> comput
computer --> comput
computed --> comput
computing --> comput


## Snowball Stemmer

In [15]:
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer(language='english')

tokens = ['compute', 'computer', 'computed', 'computing']
for token in tokens:
    print(token + ' --> ' + stemmer.stem(token))

compute --> comput
computer --> comput
computed --> comput
computing --> comput


## Lemmatization
 Lemmatization reduces the word to its stem as it appears in the dictionary. 

In [16]:
sentence6 = sp(u'compute computer computed computing')
for word in sentence6:
    print(word.text,  word.lemma_)

compute compute
computer computer
computed compute
computing computing


In [17]:
sentence7 = sp(u'A letter has been written, asking him to be released')

for word in sentence7:
    print(word.text + '  ===>', word.lemma_)

A  ===> a
letter  ===> letter
has  ===> have
been  ===> be
written  ===> write
,  ===> ,
asking  ===> ask
him  ===> -PRON-
to  ===> to
be  ===> be
released  ===> release
