<a href="https://colab.research.google.com/github/LahiruTjay/advanced-spacy/blob/master/10_things_to_know_about_spacy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 10 Things to Know about spaCy

In [0]:
import spacy

In [0]:
nlp = spacy.load("en_core_web_sm")

In [3]:
doc = nlp(u"Success is not final. Failure is not fatal. It is the courage to continue that counts.")
for sent in doc.sents:
    print(sent)

Success is not final.
Failure is not fatal.
It is the courage to continue that counts.


In [0]:
nlp = spacy.load("en_core_web_sm")
doc = nlp.make_doc(u"This is a sentence")   # create a Doc from raw text
for name, proc in nlp.pipeline:             # iterate over components in order
    doc = proc(doc)    

In [5]:
print(nlp.pipeline)


[('tagger', <spacy.pipeline.pipes.Tagger object at 0x7fe27aa87390>), ('parser', <spacy.pipeline.pipes.DependencyParser object at 0x7fe226874ee8>), ('ner', <spacy.pipeline.pipes.EntityRecognizer object at 0x7fe226874f48>)]


In [6]:
print(nlp.pipe_names)

['tagger', 'parser', 'ner']


In [7]:
def my_component(doc):
    print("After tokenization, this doc has {} tokens.".format(len(doc)))
    print("The part-of-speech tags are:", [token.pos_ for token in doc])
    if len(doc) < 10:
        print("This is a pretty short document.")
    return doc

nlp = spacy.load("en_core_web_sm")
nlp.add_pipe(my_component, name="print_info", last=True)
print(nlp.pipe_names)  # ['tagger', 'parser', 'ner', 'print_info']
doc = nlp(u"This is a sentence.")

['tagger', 'parser', 'ner', 'print_info']
After tokenization, this doc has 5 tokens.
The part-of-speech tags are: ['DET', 'VERB', 'DET', 'NOUN', 'PUNCT']
This is a pretty short document.


In [8]:
doc = nlp(u"Autonomous cars shift insurance liability toward manufacturers")
for chunk in doc.noun_chunks:
    print(chunk.text, chunk.root.text, chunk.root.dep_,
            chunk.root.head.text)

After tokenization, this doc has 7 tokens.
The part-of-speech tags are: ['ADJ', 'NOUN', 'VERB', 'NOUN', 'NOUN', 'ADP', 'NOUN']
This is a pretty short document.
Autonomous cars cars nsubj shift
insurance liability liability dobj shift
manufacturers manufacturers pobj toward
