In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")

doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
for token in doc:
  print(token.text, token.pos_ , token.dep_)

Apple PROPN nsubj
is AUX aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.K. PROPN dobj
startup NOUN dep
for ADP prep
$ SYM quantmod
1 NUM compound
billion NUM pobj


Tokenization

In [2]:
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
for token in doc:
  print(token.text)

Apple
is
looking
at
buying
U.K.
startup
for
$
1
billion


Part of Speech (POS) tags and dependencies

In [3]:
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
for token in doc:
  print(token.text, token.pos_, token.dep_, token.lemma_ , token.tag_ , token.shape_ , token.is_alpha , token.is_stop)

Apple PROPN nsubj Apple NNP Xxxxx True False
is AUX aux be VBZ xx True True
looking VERB ROOT look VBG xxxx True False
at ADP prep at IN xx True True
buying VERB pcomp buy VBG xxxx True False
U.K. PROPN dobj U.K. NNP X.X. False False
startup NOUN dep startup NN xxxx True False
for ADP prep for IN xxx True True
$ SYM quantmod $ $ $ False False
1 NUM compound 1 CD d False False
billion NUM pobj billion CD xxxx True False


In [4]:
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("Google , Apple crack down on fake coronavirus apps")
displacy.serve(doc, style="dep")




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


Named Entities

In [5]:
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp("Coronabirus: Delhi resident tests positive for coronavirus, total 31 people infected in India")
for ent in doc.ents:
  print(ent.text, ent.start_char, ent.end_char, ent.label_)

Delhi 13 18 GPE
31 66 68 CARDINAL
India 88 93 GPE


Visualizing the Named Entity Recogizer

In [6]:
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_sm")
text = nlp("Coronabirus: Delhi resident tests positive for coronavirus, total 31 people infected in India")
doc = nlp(text)
displacy.serve(doc, style="ent")




Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


Words Vector and similarity

In [7]:
import spacy.cli
spacy.cli.download("en_core_web_md")
import en_core_web_md
nlp = en_core_web_md.load()

[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [8]:
import spacy

nlp = spacy.load("en_core_web_md")
tokens = nlp("lion bear apple banana fadsfdshds")
for token in tokens:
  print(token.text, token.has_vector, token.vector_norm, token.is_oov)

lion True 55.145737 False
bear True 52.114674 False
apple True 43.366478 False
banana True 31.620354 False
fadsfdshds False 0.0 True


In [9]:
import spacy
nlp = spacy.load("en_core_web_md")
tokens = nlp("lion bear cow apple banana")
for token1 in tokens:
  for token2 in tokens:
    print(token1.text, token2.text, token1.similarity(token2))

lion lion 1.0
lion bear 0.40031397342681885
lion cow 0.4524093568325043
lion apple 0.06742795556783676
lion banana 0.11355724185705185
bear lion 0.40031397342681885
bear bear 1.0
bear cow 0.2781473696231842
bear apple 0.18584337830543518
bear banana 0.15492628514766693
cow lion 0.4524093568325043
cow bear 0.2781473696231842
cow cow 1.0
cow apple 0.2575658857822418
cow banana 0.29682525992393494
apple lion 0.06742795556783676
apple bear 0.18584337830543518
apple cow 0.2575658857822418
apple apple 1.0
apple banana 0.6646699905395508
banana lion 0.11355724185705185
banana bear 0.15492628514766693
banana cow 0.29682525992393494
banana apple 0.6646699905395508
banana banana 1.0
