# Spacy Demo
spaCy is a library for advanced Natural Language Processing in Python and Cython.  
It's built on the very latest research, and was designed from day one to be used in real products

In [22]:
# initialization
import spacy
nlp = spacy.load("en_core_web_sm")

## Tokenization of a document using spaCy

In [6]:
# We would be separating a general document of string into various tokens
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English

nlp = English()
tokenizer = Tokenizer(nlp.vocab) #Creating a blank Tokenizer with just the English vocabulary
tokens = tokenizer("This is a demo-string")
for token in tokens:
    print(token)

This
is
a
demo-string


#### Adding special cases while Tokenizing

In [16]:
from spacy.symbols import ORTH
special_case = [{ORTH: "gim"}, {ORTH: "me"}]
nlp.tokenizer.add_special_case("gimme", special_case)
print([w.text for w in nlp("gimme that")])

['gim', 'me', 'that']


## Getting Parts of Speech using spaCY

In [23]:
nlp = spacy.load("en_core_web_sm")
text = ("Apple is looking at buying U.K. startup for $1 billion")
doc = nlp(text)
for token in doc:
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
            token.shape_, token.is_alpha, token.is_stop)

Apple Apple PROPN NNP nsubj Xxxxx True False
is be AUX VBZ aux xx True True
looking look VERB VBG ROOT xxxx True False
at at ADP IN prep xx True True
buying buy VERB VBG pcomp xxxx True False
U.K. U.K. PROPN NNP compound X.X. False False
startup startup NOUN NN dobj xxxx True False
for for ADP IN prep xxx True True
$ $ SYM $ quantmod $ False False
1 1 NUM CD compound d False False
billion billion NUM CD pobj xxxx True False


In [None]:
print("Noun ", [chunk.text for chunk in doc.noun_chunks])

## Visualizers

In [2]:
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("This is a sentence")
displacy.render(doc, style="dep")