# Loading the book

In [1]:
with open("miracle_in_the_andes.txt", "r") as file:
    book = file.read()

## The most used words(non-articles)

In [2]:
import re
pattern = re.compile("[a-zA-Z]+")
findings = re.findall(pattern, book.lower())

In [3]:
findings[:8]

['chapter', 'before', 'it', 'was', 'friday', 'the', 'thirteenth', 'of']

In [4]:
word_dict = {}
for word in findings:
    if word in word_dict.keys():
        word_dict[word] = word_dict[word] + 1
    else:
        word_dict[word] = 1

In [5]:
word_list = [(value, key) for (key, value) in word_dict.items()]

In [6]:
word_list[:7]

[(11, 'chapter'),
 (93, 'before'),
 (800, 'it'),
 (1430, 'was'),
 (1, 'friday'),
 (5346, 'the'),
 (1, 'thirteenth')]

In [7]:
WL_sort = sorted(word_list, reverse=True)

In [8]:
WL_sort[:8]

[(5346, 'the'),
 (2795, 'and'),
 (2729, 'i'),
 (2400, 'to'),
 (2060, 'of'),
 (1566, 'a'),
 (1430, 'was'),
 (1419, 'in')]

In [41]:
import nltk
nltk.download('vader_lexicon')
from nltk.corpus import stopwords

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /home/user/nltk_data...


In [12]:
english_stopwords = stopwords.words("english")

In [13]:
english_stopwords[30:41]

['they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that']

In [14]:
filtered_words = []
for count, word in WL_sort:
    if word not in english_stopwords:
        filtered_words.append((word, count))
        

In [16]:
filtered_words[:15]

[('would', 575),
 ('us', 519),
 ('said', 292),
 ('roberto', 284),
 ('could', 252),
 ('one', 249),
 ('snow', 227),
 ('mountain', 183),
 ('time', 182),
 ('like', 165),
 ('way', 164),
 ('life', 161),
 ('knew', 155),
 ('mountains', 147),
 ('fuselage', 140)]

## Sentiment Analysis: What is the most positive and the most negative chapter?

## An example

In [42]:
from nltk.sentiment import SentimentIntensityAnalyzer

In [43]:
analyzer = SentimentIntensityAnalyzer()

In [45]:
scores = analyzer.polarity_scores("You are very beautiful.I love you.")

In [46]:
scores

{'neg': 0.0, 'neu': 0.528, 'pos': 0.472, 'compound': 0.6682}

In [47]:
if scores["pos"] > scores["neg"]:
    print("It is a positive text")
else:
    print("It is a negative text")

It is a positive text


In [48]:
analyzer.polarity_scores(book)

{'neg': 0.116, 'neu': 0.76, 'pos': 0.125, 'compound': 1.0}

## Chapters sentiment analysis

In [49]:
pattern = re.compile("Chapter [0-9]+")

In [50]:
pattern

re.compile(r'Chapter [0-9]+', re.UNICODE)

In [51]:
chapters = re.split(pattern, book)

In [53]:
chapters = chapters[1:]

In [58]:
for num, chapter in enumerate(chapters):
    scores = analyzer.polarity_scores(chapter)
    print(f"Chapter {num + 1} scores: {scores}")

Chapter 1 scores: {'neg': 0.061, 'neu': 0.779, 'pos': 0.16, 'compound': 1.0}
Chapter 2 scores: {'neg': 0.12, 'neu': 0.726, 'pos': 0.154, 'compound': 0.9991}
Chapter 3 scores: {'neg': 0.145, 'neu': 0.751, 'pos': 0.105, 'compound': -0.9999}
Chapter 4 scores: {'neg': 0.141, 'neu': 0.721, 'pos': 0.138, 'compound': -0.9963}
Chapter 5 scores: {'neg': 0.118, 'neu': 0.742, 'pos': 0.141, 'compound': 0.9997}
Chapter 6 scores: {'neg': 0.124, 'neu': 0.761, 'pos': 0.115, 'compound': -0.9979}
Chapter 7 scores: {'neg': 0.136, 'neu': 0.761, 'pos': 0.103, 'compound': -0.9999}
Chapter 8 scores: {'neg': 0.12, 'neu': 0.786, 'pos': 0.094, 'compound': -0.9998}
Chapter 9 scores: {'neg': 0.097, 'neu': 0.824, 'pos': 0.079, 'compound': -0.9996}
Chapter 10 scores: {'neg': 0.086, 'neu': 0.733, 'pos': 0.181, 'compound': 1.0}
