1. Lemmatization
2. Named Entity Recognition
3. Dependency Parsing
4. Morphology
5. Similarity

In [95]:
import spacy
from spacy import displacy

In [79]:
nlp = spacy.load("en_core_web_sm")

In [80]:
nlp.get_pipe("lemmatizer")

<spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x7ffdcb8ec3c8>

In [89]:
text = "Apple is looking to buy Google for $1 trillion dollars in cash."

### Lemmatization

swim, swimming, swam -> swim

In [87]:
lemma_text = "look looking looks"

In [88]:
doc = nlp(lemma_text)
[word.lemma_ for word in doc]

['look', 'look', 'look']

### NER

In [101]:
doc = nlp(text)
[(wrd.text,wrd.label_) for wrd in doc.ents]

[('Apple', 'ORG'), ('Google', 'ORG'), ('$1 trillion dollars', 'MONEY')]

### Dependency Parsing

In [96]:
[wrd.dep_ for wrd in doc]

['nsubj',
 'aux',
 'ROOT',
 'aux',
 'xcomp',
 'dobj',
 'prep',
 'quantmod',
 'compound',
 'nummod',
 'pobj',
 'prep',
 'pobj',
 'punct']

In [97]:
displacy.serve(doc, style="dep")




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


In [98]:
text_2 = "My phone is defective and I want to replace it."

In [100]:
doc_2 = nlp(text_2)
displacy.serve(doc, style="dep")

KeyboardInterrupt: 

### Morphology

success, successful, unsuccessful

In [104]:
doc

Apple is looking to buy Google for $1 trillion dollars in cash.

In [103]:
[wrd.morph for wrd in doc]

[Number=Sing,
 Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin,
 Aspect=Prog|Tense=Pres|VerbForm=Part,
 ,
 VerbForm=Inf,
 Number=Sing,
 ,
 ,
 NumType=Card,
 NumType=Card,
 Number=Plur,
 ,
 Number=Sing,
 PunctType=Peri]

### Similarity

In [111]:
text_1 = "I watch news on sports"
text_2 = "I watch news on politics"

In [112]:
doc_1, doc_2 = nlp(text_1), nlp(text_2)

In [113]:
doc_1.similarity(doc_2)

0.8985338457599955

In [108]:
!python -m spacy download en_core_web_md

Collecting en-core-web-md==3.3.0 from https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.3.0/en_core_web_md-3.3.0-py3-none-any.whl#egg=en_core_web_md==3.3.0
[?25l  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.3.0/en_core_web_md-3.3.0-py3-none-any.whl (33.5MB)
[K     |████████████████████████████████| 33.5MB 2.4MB/s eta 0:00:01     |██████████████████              | 18.8MB 1.3MB/s eta 0:00:12     |███████████████████████████████▊| 33.2MB 2.4MB/s eta 0:00:01
Installing collected packages: en-core-web-md
Successfully installed en-core-web-md-3.3.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')


In [109]:
nlp = spacy.load("en_core_web_md")
doc_1, doc_2 = nlp(text_1), nlp(text_2)

In [110]:
doc_1.similarity(doc_2)

0.905841970438971