# spaCy is an open-source software library for advanced natural language processing, written in the programming languages Python and Cython. 
## It uses object-oriented approach rather than using list of strings like nltk


In [1]:
#  Example sentance

sent ="Hey This is me . i am looking for you since past year . where are you now a days? how's your brother ? i am helping you .he steped out from camping . i seen many bridges . nothing can be done . rahul is honest boy . america has newyork state . elon is richest among all and he owns tesla and spacex . he studied from stanford university"


In [2]:
import spacy

nlp = spacy.load('en_core_web_sm')    # python -m spacy download en_core_web_sm

doc  = nlp(sent)

## Sentence Tokenizer

In [9]:
for sent in doc.sents:
    print(sent)
    

Hey This is me .
i am looking for you since past year .
where are you now a days?
how's your brother ?
i am helping you .he
steped out from camping .
i seen many bridges .
nothing can be done .
rahul is honest boy .
america has newyork state .
elon is richest among all and he owns tesla and spacex .
he studied from stanford university


## Word Tokenizer

In [5]:
for token in doc:
    if token.i > 5:
        break
    print(token.text , token.i)    # token text , token index

Hey 0
This 1
is 2
me 3
. 4
i 5


## POS Tagging  (part Of Speech)

In [14]:
for token in doc:
    print( token.i , " - ", token.text , " - ", token.pos_)    # token text , token index
    if token.i > 5:
        break

0  -  Hey  -  INTJ
1  -  This  -  PRON
2  -  is  -  AUX
3  -  me  -  PRON
4  -  .  -  PUNCT
5  -  i  -  PRON
6  -  am  -  AUX


ADJ: adjective, e.g. big, old, green, incomprehensible, first

ADP: adposition, e.g. in, to, during

ADV: adverb, e.g. very, tomorrow, down, where, there

AUX: auxiliary, e.g. is, has (done), will (do), should (do)

CONJ: conjunction, e.g. and, or, but

CCONJ: coordinating conjunction, e.g. and, or, but

DET: determiner, e.g. a, an, the

INTJ: interjection, e.g. psst, ouch, bravo, hello

NOUN: noun, e.g. girl, cat, tree, air, beauty

NUM: numeral, e.g. 1, 2017, one, seventy-seven, IV, MMXIV

PART: particle, e.g. ’s, not,

PRON: pronoun, e.g I, you, he, she, myself, themselves, somebody

PROPN: proper noun, e.g. Mary, John, London, NATO, HBO

PUNCT: punctuation, e.g. ., (, ), ?

SCONJ: subordinating conjunction, e.g. if, while, that

SYM: symbol, e.g. $, %, §, ©, +, −, ×, ÷, =, :), 😝

VERB: verb, e.g. run, runs, running, eat, ate, eating

X: other, e.g. sfpksdpsxmsa

SPACE: space, e.g.

## Name Entity Recognition

In [24]:
for token in doc.ents:
    print( token.text ," - ", token.label_)    # token text , token index
   

past year  -  DATE
america  -  GPE
stanford university  -  ORG
