In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')
with open('peterrabbit.txt', 'r') as f:
    doc = nlp(f.read())

In [2]:
# Get the third sentence
third_sentence = list(doc.sents)[2]
for token in third_sentence:
    print(f'Text: {token.text:<12} POS: {token.pos_:<6} Tag: {token.tag_:<6} Description: {spacy.explain(token.tag_)}')

Text: They         POS: PRON   Tag: PRP    Description: pronoun, personal
Text: lived        POS: VERB   Tag: VBD    Description: verb, past tense
Text: with         POS: ADP    Tag: IN     Description: conjunction, subordinating or preposition
Text: their        POS: PRON   Tag: PRP$   Description: pronoun, possessive
Text: Mother       POS: NOUN   Tag: NN     Description: noun, singular or mass
Text: in           POS: ADP    Tag: IN     Description: conjunction, subordinating or preposition
Text: a            POS: DET    Tag: DT     Description: determiner
Text: sand         POS: NOUN   Tag: NN     Description: noun, singular or mass
Text: -            POS: PUNCT  Tag: HYPH   Description: punctuation mark, hyphen
Text: bank         POS: NOUN   Tag: NN     Description: noun, singular or mass
Text: ,            POS: PUNCT  Tag: ,      Description: punctuation mark, comma
Text: underneath   POS: ADP    Tag: IN     Description: conjunction, subordinating or preposition
Text: the         

In [3]:
from collections import Counter
# Count POS tags
pos_counts = Counter(token.pos_ for token in doc)
print(pos_counts)

Counter({'NOUN': 172, 'PUNCT': 171, 'VERB': 135, 'ADP': 125, 'PRON': 110, 'SPACE': 99, 'DET': 90, 'PROPN': 74, 'ADV': 63, 'CCONJ': 61, 'ADJ': 53, 'AUX': 49, 'PART': 28, 'SCONJ': 19, 'NUM': 9})


In [4]:
# Calculate the percentage of nouns
total_tokens = len(doc)
noun_count = sum(1 for token in doc if token.pos_ == 'NOUN')
noun_percentage = (noun_count / total_tokens) * 100

print(f'Percentage of nouns: {noun_percentage:.2f}%')

Percentage of nouns: 13.67%


In [10]:
from spacy import displacy
# Display dependency parse for the third sentence
displacy.render(third_sentence, style='dep', jupyter=True, options={'distance': 100})

In [6]:
# Print the first two named entities
for ent in doc.ents[:2]:
    print(f'Entity: {ent.text} - Label: {ent.label_} - Description: {spacy.explain(ent.label_)}')

Entity: The Tale of Peter Rabbit - Label: WORK_OF_ART - Description: Titles of books, songs, etc.
Entity: Beatrix Potter - Label: PERSON - Description: People, including fictional


In [7]:
# Count sentences
num_sentences = len(list(doc.sents))
print(f'Number of sentences: {num_sentences}')

Number of sentences: 55


In [8]:
# Count sentences with named entities
sentences_with_ners = [sent for sent in doc.sents if sent.ents]
count_sentences_with_ners = len(sentences_with_ners)

print(f'Number of sentences containing named entities: {count_sentences_with_ners}')

Number of sentences containing named entities: 35


In [9]:
# Display named entity visualization for the first sentence
displacy.render(sentences_with_ners[0], style='ent', jupyter=True)