#### Load the text in the book

In [1]:
with open('miracle_in_the_andes.txt','r', encoding="utf8") as file:
    book = file.read()

In [2]:
type(book)

str

In [3]:
import re

#### Find no. of chapters in the book

In [4]:
pattern = re.compile("Chapter [0-9]+")
findings = re.findall(pattern, book)
findings

['Chapter 1',
 'Chapter 2',
 'Chapter 3',
 'Chapter 4',
 'Chapter 5',
 'Chapter 6',
 'Chapter 7',
 'Chapter 8',
 'Chapter 9',
 'Chapter 10']

In [5]:
len(findings)

10

#### Find the no. of occurences of 'love'

In [6]:
pattern = re.compile("[A-Z]{1}[^.]*[^A-Za-z]+love[^A-Za-z]+[^.]*.")
findings =re.findall(pattern, book)
len(findings)
# findings

67

#### Find the most used words

In [7]:
pattern = re.compile("[A-Za-z]+")
findings = re.findall(pattern, book.lower())
findings[:5]

['chapter', 'before', 'it', 'was', 'friday']

In [8]:
d = {}
for each in findings:
    if each in d.keys():
        d[each] = d[each] + 1
    else:
        d[each] = 1

In [15]:
d_list = [(value, key) for key, value in d.items()]
len(sorted(d_list, reverse=True))
# sorted(d_list, reverse=True)

6992

#### Find the most used words (Non-articles/ Stop Words)

In [11]:
import nltk
from nltk.corpus import stopwords

english_stopwords = stopwords.words('english')

In [16]:
filtered_words = []
for count, word in d_list:
    if word not in english_stopwords:
        filtered_words.append((count, word))
sorted(filtered_words, reverse=True)[:10]
# sorted(filtered_words, reverse=True)

[(575, 'would'),
 (519, 'us'),
 (292, 'said'),
 (284, 'roberto'),
 (252, 'could'),
 (249, 'one'),
 (227, 'snow'),
 (183, 'mountain'),
 (182, 'time'),
 (165, 'like')]

#### Sentiment Intensity Analyzer Example

In [20]:
from nltk.sentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
scores = analyzer.polarity_scores("Wow, There are so many trees arund. I am feeling so happy.")
scores

{'neg': 0.0, 'neu': 0.443, 'pos': 0.557, 'compound': 0.8762}

In [22]:
if scores['pos'] > scores['neg']:
    print("It is a POSITIVE text.")
else:
    print("It is a NEGATIVE text.")

It is a POSITIVE text.


#### Chapter-wise Sentiment Intensity Analyzer

In [28]:
pattern = re.compile('Chapter [0-9]+')
chapter_content_list = re.split(pattern, book)
for index, content in enumerate(chapter_content_list[1:]):
    scores = analyzer.polarity_scores(content)
    print(f"Chapter {index + 1}:",scores)

Chapter 1: {'neg': 0.061, 'neu': 0.779, 'pos': 0.16, 'compound': 1.0}
Chapter 2: {'neg': 0.12, 'neu': 0.726, 'pos': 0.154, 'compound': 0.9991}
Chapter 3: {'neg': 0.145, 'neu': 0.751, 'pos': 0.105, 'compound': -0.9999}
Chapter 4: {'neg': 0.141, 'neu': 0.721, 'pos': 0.138, 'compound': -0.9963}
Chapter 5: {'neg': 0.118, 'neu': 0.742, 'pos': 0.141, 'compound': 0.9997}
Chapter 6: {'neg': 0.124, 'neu': 0.761, 'pos': 0.115, 'compound': -0.9979}
Chapter 7: {'neg': 0.136, 'neu': 0.761, 'pos': 0.103, 'compound': -0.9999}
Chapter 8: {'neg': 0.12, 'neu': 0.786, 'pos': 0.094, 'compound': -0.9998}
Chapter 9: {'neg': 0.097, 'neu': 0.824, 'pos': 0.079, 'compound': -0.9996}
Chapter 10: {'neg': 0.086, 'neu': 0.733, 'pos': 0.181, 'compound': 1.0}
