In [1]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [2]:
doc = nlp("The quick brown fox jumped over the lazy dog's back.")

In [3]:
print(doc)

The quick brown fox jumped over the lazy dog's back.


In [4]:
print(doc.text)

The quick brown fox jumped over the lazy dog's back.


In [6]:
# Print the fifth word and associated tags:
print(doc[4].text, doc[4].lemma_, doc[4].pos_,doc[4].dep_, doc[4].tag_, spacy.explain(doc[4].tag_))

jumped jump VERB ROOT VBD verb, past tense


In [8]:
for token in doc:
    print(f'{token.text:{10}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}')

The        DET      DT     determiner
quick      ADJ      JJ     adjective (English), other noun-modifier (Chinese)
brown      ADJ      JJ     adjective (English), other noun-modifier (Chinese)
fox        NOUN     NN     noun, singular or mass
jumped     VERB     VBD    verb, past tense
over       ADP      IN     conjunction, subordinating or preposition
the        DET      DT     determiner
lazy       ADJ      JJ     adjective (English), other noun-modifier (Chinese)
dog        NOUN     NN     noun, singular or mass
's         PART     POS    possessive ending
back       NOUN     NN     noun, singular or mass
.          PUNCT    .      punctuation mark, sentence closer


In [9]:
doc = nlp("I read books on NLP.")

In [10]:
r= doc[1]

In [11]:
print(f"{r.text:{10}} {r.pos_:{8}} {r.tag_:{6}} {spacy.explain(token.tag_)}")

read       VERB     VBP    punctuation mark, sentence closer


In [12]:
doc = nlp("The quick brown fox jumped over the lazy dog's back.")

In [13]:
POS_Counts = doc.count_by(spacy.attrs.POS)

In [14]:
POS_Counts

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [15]:
doc.vocab[4].text

'IS_LOWER'

In [16]:
doc.vocab[100].text

'VERB'

In [17]:
len(doc.vocab)

779

In [18]:
for k,v in sorted(POS_Counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} : {v}")

84. ADJ   : 3
85. ADP   : 1
90. DET   : 2
92. NOUN  : 3
94. PART  : 1
97. PUNCT : 1
100. VERB  : 1


In [20]:
TAG_Counts = doc.count_by(spacy.attrs.TAG)

for k,v in TAG_Counts.items():
    print(f"{k}. {doc.vocab[k].text:{4}}:{v}")

15267657372422890137. DT  :2
10554686591937588953. JJ  :3
15308085513773655218. NN  :3
17109001835818727656. VBD :1
1292078113972184607. IN  :1
74. POS :1
12646065887601541794. .   :1


In [21]:
DEP_Counts = doc.count_by(spacy.attrs.DEP)

for k,v in DEP_Counts.items():
    print(f"{k}. {doc.vocab[k].text:{4}}:{v}")

415. det :2
402. amod:3
429. nsubj:1
8206900633647566924. ROOT:1
443. prep:1
439. pobj:1
8110129090154140942. case:1
400. advmod:1
445. punct:1


In [22]:
# Here we've shown spacy.attrs.POS, spacy.attrs.TAG and spacy.attrs.DEP.