In [None]:
''' 
Working with linguistic features

'''

In [11]:
# Parts of speech tagging
# To get en_core_web_sm need to get the package by "python3 -m spacy download en"

import spacy
import en_core_web_sm
nlp = en_core_web_sm.load()

'''
u can also load as --> nlp = spacy.load("en_core_web_sm")

'''

paragraph = "Deep learning is an artificial intelligence function that imitates the workings of the human brain in processing data and creating patterns for use in decision making. .. Also known as deep neural learning or deep neural network"

doc = nlp(paragraph)

# get the token is the nlp document
for token in doc:
    print(token, end = ' ')


Deep learning is an artificial intelligence function that imitates the workings of the human brain in processing data and creating patterns for use in decision making . .. Also known as deep neural learning or deep neural network

In [15]:
# each token string representation
# properties of each token

for token in doc:
    print(token.text,  token.lemma_, token.tag_, token.pos_, token.shape_, token.is_alpha, token.is_stop)


Deep deep JJ ADJ Xxxx True False
learning learning NN NOUN xxxx True False
is be VBZ AUX xx True True
an an DT DET xx True True
artificial artificial JJ ADJ xxxx True False
intelligence intelligence NN NOUN xxxx True False
function function NN NOUN xxxx True False
that that WDT DET xxxx True True
imitates imitate VBZ VERB xxxx True False
the the DT DET xxx True True
workings working NNS NOUN xxxx True False
of of IN ADP xx True True
the the DT DET xxx True True
human human JJ ADJ xxxx True False
brain brain NN NOUN xxxx True False
in in IN ADP xx True True
processing process VBG VERB xxxx True False
data datum NNS NOUN xxxx True False
and and CC CCONJ xxx True True
creating create VBG VERB xxxx True False
patterns pattern NNS NOUN xxxx True False
for for IN ADP xxx True True
use use NN NOUN xxx True False
in in IN ADP xx True True
decision decision NN NOUN xxxx True False
making making NN NOUN xxxx True False
. . . PUNCT . False False
.. .. NFP PUNCT .. False False
Also also RB ADV Xxx

In [17]:
'''
Dependency parsing

'''

# Noun Chunks

for chunk in doc.noun_chunks:
    print(chunk.text, chunk.root.text, chunk.root.dep_, chunk.root.head.text)

Deep learning learning nsubj is
an artificial intelligence function function attr is
the workings workings dobj imitates
the human brain brain pobj of
data data dobj processing
patterns patterns dobj creating
use use pobj for
decision making making pobj in
deep neural learning learning pobj as
deep neural network network conj learning


In [23]:
# finding verb with subject

verbs = set()

for subjects in doc:
    if subjects.dep_ == 'nsubj' and subjects.head.pos_ == 'VERB':
        verbs.add(subjects.head)
print(verbs)

{imitates}


In [32]:
# find the left and rights of a doc token

print([token.text for token in doc[2].lefts])
print([token.text for token in doc[2].rights])

print(doc[2].n_lefts) # items to the left
print(doc[2].n_rights) # items to the right


['learning']
['function', '.', '..']
1
3
