In [1]:
import os
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import plotly.express as px
import plotly.graph_objects as go

import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer

from wordcloud import WordCloud

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('vader_lexicon')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sayan\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\sayan\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\sayan\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\sayan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [2]:
file_path = 'speech.txt'
with open(file_path, 'r', encoding='utf-8') as f:
    content = f.read()

In [23]:
def dataCleaning(text):
    text = text.strip().lower()
    clean_text = re.sub(r'[^\w\s]','',text)
    word_token = word_tokenize(clean_text)
    STOPWORDS = set(stopwords.words('english'))
    stem = WordNetLemmatizer()
    word_token = [stem.lemmatize(word) for word in word_token if word not in STOPWORDS] 
    return word_token

In [24]:
clean_content = dataCleaning(content)

In [30]:
sa = SentimentIntensityAnalyzer()
sentiment_score = [sa.polarity_scores(word)['compound'] for 
                  word in clean_content]
avg_score = sum(sentiment_score)/len(sentiment_score)

avg_score

0.014298377028714108

In [40]:
pos_words = set([word for i, word in enumerate(clean_content) 
             if sentiment_score[i] > 0.1])
neutral_words = set([word for i, word in enumerate(clean_content) 
             if sentiment_score[i] >=- 0.1 and sentiment_score[i] <=0.1])
neg_words = set([word for i, word in enumerate(clean_content) 
             if sentiment_score[i] <- 0.1])

In [41]:
print(pos_words)

{'honest', 'celebrated', 'stronger', 'agree', 'confidence', 'like', 'dream', 'assurance', 'peace', 'honoured', 'trust', 'great', 'courage', 'interested', 'fascination', 'good', 'growing', 'credited', 'faith', 'huge', 'free', 'top', 'marvel', 'trusting', 'fulfill', 'hero', 'dwelled', 'winning', 'wish', 'better', 'important', 'devoted', 'opportunity', 'best', 'save', 'certain', 'devote', 'commitment', 'energy', 'blessing', 'vision', 'definite', 'inspires', 'increased', 'well', 'freebie', 'reached', 'responsible', 'helping', 'assured', 'rich', 'party', 'gratitude', 'freedom', 'assure', 'success', 'help', 'determination', 'emotional', 'award', 'attachment', 'fighter', 'ensure', 'dedicated', 'profit', 'strength', 'succeeded', 'strong', 'interest', 'lucky', 'growth', 'revered'}


In [42]:
print(neutral_words)

{'every', 'naval', '50000', 'convey', 'splintered', 'stream', 'india', 'per', 'r', 'transportation', 'eye', 'slogan', '2019', 'patriotism', '1962', 'kashmir', 'never', 'parliament', 'daughter', 'pointed', 'moving', 'represented', 'jal', 'rectified', 'burfukan', 'food', 'movement', 'project', 'realize', 'told', 'recently', 'led', 'made', 'body', 'development', 'founding', 'developed', 'minister', 'marker', 'utmost', 'approach', 'putting', 'omen', 'open', 'destination', 'case', 'objectionable', 'going', 'arose', 'utilized', 'understanding', 'mahatma', 'today', 'transmission', 'fisherman', 'nadu', 'house', 'northeast', '4', 'narrative', 'showing', 'burn', 'door', 'time', 'left', 'woman', 'bjp', 'bharati', 'constant', 'introduced', 'majority', 'call', 'garib', 'rail', 'past', '5', 'people', 'continue', 'youth', 'achieve', 'able', 'two', 'must', 'effort', 'participated', 'later', 'shine', 'system', 'startup', 'election', 'beck', 'common', 'standing', 'rajya', 'night', 'mentioned', 'current'

In [43]:
print(neg_words)

{'insecurity', 'severely', 'crisis', 'warned', 'scam', 'criticism', 'ill', 'lack', 'loss', 'pressure', 'unsuccessful', 'hard', 'misinformation', 'damage', 'violence', 'disappointed', 'forbidden', 'protest', 'neglect', 'misinformed', 'distrusting', 'strike', 'questioned', 'worse', 'petty', 'suffered', 'suffering', 'low', 'unacceptable', 'victim', 'distrust', 'poverty', 'arrogant', 'guilty', 'deprived', 'confuse', 'failure', 'havoc', 'crime', 'reckless', 'punished', 'perturbed', 'stuck', 'lamented', 'poor', 'attack', 'bad', 'criticizing', 'enemy', 'fool', 'saddening', 'conflict', 'stressed', 'pain', 'stop', 'attacked', 'contradiction', 'arrogance', 'betrayal'}
