[Reference](https://medium.com/analytics-vidhya/spacy-tutorial-to-learn-and-master-natural-language-processing-nlp-db6703d0a751)

In [9]:
import spacy 
nlp = spacy.load('en_core_web_sm')

![spaCy](https://miro.medium.com/max/1392/0*AifEqFz-2wSPoDIy.png)

In [2]:
# Create an nlp object
doc = nlp("He went to play basketball")

In [3]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [4]:
nlp.disable_pipes('tagger', 'parser')

[('tagger', <spacy.pipeline.pipes.Tagger at 0x7fcac965dc88>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0x7fcac94b83a8>)]

In [5]:
nlp.pipe_names

['ner']

# 1. Part-of-Speech (POS) Tagging using spaCy

In [10]:
# Create an nlp object
doc = nlp("He went to play basketball")

In [11]:
for token in doc:
    # Print the token and its part-of-speech tag
    print(token.text, "-->", token.pos_)

He --> PRON
went --> VERB
to --> PART
play --> VERB
basketball --> NOUN


In [12]:
spacy.explain("PART")

'particle'

# 2. Dependency Parsing using spaCy

In [13]:
for token in doc:
    print(token.text, "-->", token.dep_)

He --> nsubj
went --> ROOT
to --> aux
play --> advcl
basketball --> dobj


In [14]:
spacy.explain("nsubj"), spacy.explain("ROOT"), spacy.explain("aux"), spacy.explain("advcl"), spacy.explain("dobj")

('nominal subject',
 None,
 'auxiliary',
 'adverbial clause modifier',
 'direct object')

# 3. Named Entity Recognition using spaCy

In [15]:
doc = nlp("Indians spent over $71 billion on clothes in 2018")
 
for ent in doc.ents:
    print(ent.text, ent.label_)

Indians NORP
$71 billion MONEY
2018 DATE


In [16]:
spacy.explain("NORP")

'Nationalities or religious or political groups'

# 4. Rule-Based Matching using spaCy

In [17]:
# Import spaCy Matcher
from spacy.matcher import Matcher

# Initialize the matcher with the spaCy vocabulary
matcher = Matcher(nlp.vocab)

doc = nlp("Some people start their day with lemon water")

# Define rule
pattern = [{'TEXT': 'lemon'}, {'TEXT': 'water'}]

# Add rule
matcher.add('rule_1', None, pattern)

In [20]:
matches = matcher(doc)

In [21]:
matches

[(7604275899133490726, 6, 8)]

In [22]:
# Extract matched text
for match_id, start, end in matches:
    # Get the matched span
    matched_span = doc[start:end]
    print(matched_span.text)

lemon water


In [23]:
doc1 = nlp("You read this book")
doc2 = nlp("I will book my ticket")

pattern = [{'TEXT': 'book', 'POS': 'NOUN'}]

# Initialize the matcher with the shared vocab
matcher = Matcher(nlp.vocab)
matcher.add('rule_2', None, pattern)

In [24]:
matches = matcher(doc1) 
matches

[(375134486054924901, 3, 4)]

In [25]:
matches = matcher(doc2) 
matches

[]