In [1]:
import pandas as pd
import numpy as np
import spacy
from collections import Counter
from spacy import displacy

In [2]:
nlp=spacy.load("en_core_web_sm")

In [3]:
# Ques-1-Create a Doc object from the file peterrabbit.txt
with open("reaganomics.txt","r") as file:
    text=file.read()

In [4]:
doc=nlp(text)
doc

REAGANOMICS
https://en.wikipedia.org/wiki/Reaganomics

Reaganomics (a portmanteau of [Ronald] Reagan and economics attributed to Paul Harvey)[1] refers to the economic policies promoted by U.S. President Ronald Reagan during the 1980s. These policies are commonly associated with supply-side economics, referred to as trickle-down economics or voodoo economics by political opponents, and free-market economics by political advocates.

The four pillars of Reagan's economic policy were to reduce the growth of government spending, reduce the federal income tax and capital gains tax, reduce government regulation, and tighten the money supply in order to reduce inflation.[2]

The results of Reaganomics are still debated. Supporters point to the end of stagflation, stronger GDP growth, and an entrepreneur revolution in the decades that followed.[3][4] Critics point to the widening income gap, an atmosphere of greed, and the national debt tripling in eight years which ultimately reversed the pos

In [5]:
# Ques-2-For every token in the third sentence, print the token text, the POS tag, the fine-grained TAG tag, and the description of 
# the fine-grained tag.
sent1 = list(doc.sents)
third_sentence = sent1[2]
third_sentence

The four pillars of Reagan's economic policy were to reduce the growth of government spending, reduce the federal income tax and capital gains tax, reduce government regulation, and tighten the money supply in order to reduce inflation.[2]

The results of Reaganomics are still debated.

In [6]:
# Ques-3-Provide a frequency list of POS tags from the entire document
for token in doc:
    print(f'{token.text:{12}} {token.pos_:{6}}')

REAGANOMICS  PROPN 

            SPACE 
https://en.wikipedia.org/wiki/Reaganomics NOUN  


           SPACE 
Reaganomics  PROPN 
(            PUNCT 
a            DET   
portmanteau  NOUN  
of           ADP   
[            X     
Ronald       X     
]            PUNCT 
Reagan       PROPN 
and          CCONJ 
economics    NOUN  
attributed   VERB  
to           ADP   
Paul         PROPN 
Harvey)[1    PROPN 
]            PUNCT 
refers       VERB  
to           ADP   
the          DET   
economic     ADJ   
policies     NOUN  
promoted     VERB  
by           ADP   
U.S.         PROPN 
President    PROPN 
Ronald       PROPN 
Reagan       PROPN 
during       ADP   
the          DET   
1980s        NUM   
.            PUNCT 
These        DET   
policies     NOUN  
are          AUX   
commonly     ADV   
associated   VERB  
with         ADP   
supply       NOUN  
-            PUNCT 
side         NOUN  
economics    NOUN  
,            PUNCT 
referred     VERB  
to           ADP   
as         

In [7]:
# Ques-4-CHALLENGE: What percentage of tokens are nouns?
# Total no of tokens
POS_counts = Counter(token.pos_ for token in doc)
for pos, count in POS_counts.items():
    print(f"POS: {pos}, Count: {count}")Display the Dependency Parse for the third sentence.
    


POS: PROPN, Count: 458
POS: SPACE, Count: 76
POS: NOUN, Count: 1352
POS: PUNCT, Count: 583
POS: DET, Count: 521
POS: ADP, Count: 847
POS: X, Count: 65
POS: CCONJ, Count: 123
POS: VERB, Count: 499
POS: ADJ, Count: 453
POS: NUM, Count: 387
POS: AUX, Count: 105
POS: ADV, Count: 124
POS: PART, Count: 95
POS: SCONJ, Count: 91
POS: PRON, Count: 124
POS: SYM, Count: 30


In [8]:
total_tokens = 0
noun_tokens = 0

for token in doc:
    total_tokens += 1
    if token.pos_ in ["NOUN", "PROPN"]:
        noun_tokens += 1
        
noun_percentage = (noun_tokens / total_tokens) * 100 if total_tokens > 0 else 0

print(f"Percentage of nouns: {noun_percentage:.2f}%")



Percentage of nouns: 30.51%


In [9]:
# Ques-5-Display the Dependency Parse for the third sentence.

print("Dependency Parse (Text Format):")
for token in third_sentence:
    print(f"Token: {token.text}, Head: {token.head.text}, Dependency: {token.dep_}")
    
# Display the dependency parse in graphical format
print("\nDependency Parse (Graphical Format):")
displacy.render(third_sentence, style="dep", jupyter=True)


Dependency Parse (Text Format):
Token: The, Head: pillars, Dependency: det
Token: four, Head: pillars, Dependency: nummod
Token: pillars, Head: were, Dependency: nsubj
Token: of, Head: pillars, Dependency: prep
Token: Reagan, Head: policy, Dependency: poss
Token: 's, Head: Reagan, Dependency: case
Token: economic, Head: policy, Dependency: amod
Token: policy, Head: of, Dependency: pobj
Token: were, Head: were, Dependency: ROOT
Token: to, Head: reduce, Dependency: aux
Token: reduce, Head: were, Dependency: xcomp
Token: the, Head: growth, Dependency: det
Token: growth, Head: reduce, Dependency: dobj
Token: of, Head: growth, Dependency: prep
Token: government, Head: spending, Dependency: compound
Token: spending, Head: of, Dependency: pobj
Token: ,, Head: were, Dependency: punct
Token: reduce, Head: were, Dependency: conj
Token: the, Head: tax, Dependency: det
Token: federal, Head: tax, Dependency: amod
Token: income, Head: tax, Dependency: compound
Token: tax, Head: reduce, Dependency: d

In [10]:
# Ques-6-Show the first two named entities from Beatrix Potter's The Tale of Peter Rabbit

named_entities = []

# Iterate over the entities in the document and add to the list

for ent in doc.ents:
    named_entities.append((ent.text, ent.label_))
    # Break after we collect the first two named entities
    if len(named_entities) == 2:
        break
    
# Print the first two named entities
for i, (text, label) in enumerate(named_entities, start=1):
    print(f"Entity {i}: Text = '{text}', Label = '{label}'")

Entity 1: Text = 'REAGANOMICS', Label = 'ORG'
Entity 2: Text = 'Ronald] Reagan', Label = 'PERSON'


In [12]:
# Ques-7-How many sentences are contained in The Tale of Peter Rabbit?

num_sentences=len(sent1)
print(f"The Tale of Peter Rabbit contains {num_sentences} sentences.")

The Tale of Peter Rabbit contains 125 sentences.


In [13]:
# Ques-6-CHALLENGE: How many sentences contain named entities?

sentences_with_entities = 0

for sentence in doc.sents:
    if any(ent for ent in sentence.ents):
        sentences_with_entities += 1
print(f"Number of sentences containing named entities: {sentences_with_entities}")

Number of sentences containing named entities: 109


In [14]:
# Ques-9-Display the named entity visualization for list_of_sents[0] from the previous problem

list_of_sents = list(doc.sents)
displacy.render(list_of_sents[0], style="ent", jupyter=True)