# Tokenization

In [1]:
import spacy

In [2]:
nlp= spacy.load('en_core_web_sm')   # Load predefined model

In [3]:
text = 'Apple is looking for buying a U.K. startup for $1 billion'

In [4]:
doc = nlp(text)

In [5]:
#help(nlp)

In [6]:
for token in doc:
    print(token.text)

Apple
is
looking
for
buying
a
U.K.
startup
for
$
1
billion


# Parts of Speech [POS] Tagging

In [7]:
doc

Apple is looking for buying a U.K. startup for $1 billion

In [8]:
for token in doc:
    print(token.text, token.pos_)

Apple PROPN
is AUX
looking VERB
for ADP
buying VERB
a DET
U.K. PROPN
startup NOUN
for ADP
$ SYM
1 NUM
billion NUM


In [9]:
for token in doc:
    print(f'{token.text:{15}} {token.pos_:}')

Apple           PROPN
is              AUX
looking         VERB
for             ADP
buying          VERB
a               DET
U.K.            PROPN
startup         NOUN
for             ADP
$               SYM
1               NUM
billion         NUM


# Dependency Parsing

In [10]:
for token in doc:
    print(f'{token.text:{15}} {token.dep_:{15}} {token.head.text}')

Apple           nsubj           looking
is              aux             looking
looking         ROOT            looking
for             prep            looking
buying          pcomp           for
a               det             startup
U.K.            compound        startup
startup         dobj            buying
for             prep            buying
$               quantmod        billion
1               compound        billion
billion         pobj            for


# Dependency tree

The tree generated by dependency parsing is known as a dependency tree.

The arrows represent the dependency between two words in which the word at the arrowhead is the child, and the word at the end of the arrow is head. 

In [11]:
from spacy import displacy

In [12]:
displacy.render(doc, style = 'dep')  #  looking is dependent on is,apple like that...

In [13]:
displacy.render(doc, style = 'dep', options = {'distance': 100, 'compact': True}) # visualise in more compacct way

# Named Entity Recognition

In [14]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Apple ORG
U.K. GPE
$1 billion MONEY


In [15]:
displacy.render(doc, style = 'ent')

# Sentence Segmentation

In [16]:
text = 'Apple is looking for buying a U.K. startup. Government has given permission.Its good news'

In [17]:
doc = nlp(text)

In [18]:
doc

Apple is looking for buying a U.K. startup. Government has given permission.Its good news

In [19]:
for sent in doc.sents:
    print(sent)

Apple is looking for buying a U.K. startup.
Government has given permission.
Its good news
