Initialize Sentiment Analyzer

In [1]:
import nltk
import warnings
warnings.filterwarnings('ignore')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.downloader.download('vader_lexicon')

sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/shubham/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Updating Vader Lexicon with External Lexicon

In [2]:
# import csv
import pandas as pd


filtering positive and negative words from L&M dictionary into seperate csv.

In [3]:
lexicons = pd.read_csv('./datasets/Loughran-McDonald_MasterDictionary_1993-2021.csv')


In [4]:
len(lexicons)

86531

In [5]:
len(lexicons[lexicons['Negative'] != 0])

2355

In [6]:
positive_words = lexicons[lexicons['Positive'] != 0]

In [8]:
#positive_words.to_csv('./datasets/positive_words.csv')

In [9]:
negative_words = lexicons[lexicons['Negative'] != 0]


In [10]:
#negative_words.to_csv('./datasets/negative_words.csv')

working on stock market lexicon dictionary from https://github.com/nunomroliveira/stock_market_lexicon

In [12]:
stock_lex = pd.read_csv('./datasets/stock_lexicons.csv')


In [13]:
stock_lex['sentiment'] = (stock_lex['Aff_Score'] + stock_lex['Neg_Score'])/2


creating a dictionary w/ item as key and sentiment score as value

In [14]:
stock_lex = dict(zip(stock_lex.Item, stock_lex.sentiment))


dictionary comprehension to remove any key:value pair where key's length > 1

In [15]:
stock_lex = {k:v for k,v in stock_lex.items() if len(k.split(' '))==1}

standardizing the sentiment score of lexicon dictionary between (-4,4)

In [16]:
stock_lex_scaled = {}


In [17]:
for k, v in stock_lex.items():
    if v > 0:
        stock_lex_scaled[k] = v / max(stock_lex.values()) * 4
    else:
        stock_lex_scaled[k] = v / min(stock_lex.values()) * -4


In [18]:
positive = []

In [19]:
for word in positive_words['Word'].to_list():
    item = word.strip().split(' ')
    if len(item) > 1:
        positive.extend(item)
    else:
        positive.append(item[0])


In [20]:
negative = []

In [21]:
for word in negative_words['Word'].to_list():
    item = word.strip().split(' ')
    if len(item) > 1:
        negative.extend(item)
    else:
        negative.append(item[0])


In [22]:
final_lex = {}


In [23]:
final_lex.update(sia.lexicon)
# using dict comprehension creating a dictionary from list and assigning each word value 2/-2
final_lex.update({word:2.0 for word in positive})
final_lex.update({word:-2.0 for word in negative})
final_lex.update(stock_lex_scaled)

In [44]:
final_lex_df = pd.DataFrame(list(final_lex.items()), columns=['word', 'sentiment_score'])

In [45]:
#final_lex_df.to_csv('./datasets/lexicon_dictionary.csv')

updating the original SentimentIntensityANalyzer lexicon dictionary

In [28]:
sia.lexicon = final_lex


reading nifty 500 articles

In [29]:
nifty_articles = pd.read_csv('./datasets/NIFTY_500_Articles.csv')

In [30]:
art_scores = pd.DataFrame(nifty_articles['Headline'].apply(sia.polarity_scores).to_list())

In [32]:
art_scores_df = pd.merge(nifty_articles, art_scores,
                         left_index=True, right_index=True)


In [41]:
(art_scores_df.iloc[:,1:])[art_scores_df['Ticker'] == 'ADANIENT']

Unnamed: 0,Ticker,Headline,Date,Time,neg_x,neu_x,pos_x,compound_x,neg_y,neu_y,pos_y,compound_y
483,ADANIENT,Adani Enterprises incorporates Wholly Owned Su...,2023/04/12,09:18,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
484,ADANIENT,Adani Enterprises informs about clarification ...,2023/03/29,09:57,0.0,1.0,0.0,0.0,0.0,0.327,0.673,0.2886
485,ADANIENT,Adani Enterprises’ arm completes acquisition o...,2023/03/28,09:38,0.0,1.0,0.0,0.0,0.146,0.416,0.439,0.4982
486,ADANIENT,"BSE, NSE remove 3 Adani group companies from s...",2023/03/17,15:30,0.0,1.0,0.0,0.0,0.209,0.502,0.289,0.2012
487,ADANIENT,Adani Group repays loans of $2.65 billion to c...,2023/03/13,12:12,0.0,1.0,0.0,0.0,0.366,0.495,0.139,0.064
488,ADANIENT,"Adani group repays Rs 7,374 crore share-backed...",2023/03/09,11:19,0.0,1.0,0.0,0.0,0.0,0.686,0.314,0.1883
489,ADANIENT,Adani portfolio companies sell stakes worth Rs...,2023/03/03,14:52,0.0,0.853,0.147,0.2263,0.078,0.616,0.306,0.2402
490,ADANIENT,Adani Enterprises informs about disclosure,2023/03/03,12:54,0.0,1.0,0.0,0.0,0.0,0.578,0.422,0.0497
491,ADANIENT,Adani Enterprises’ arm emerges as preferred bi...,2023/03/03,12:11,0.225,0.775,0.0,-0.4404,0.0,0.363,0.637,0.5802
492,ADANIENT,Adani Enterprises’ arm emerges as preferred bi...,2023/03/01,11:58,0.195,0.805,0.0,-0.4404,0.069,0.379,0.553,0.5677


In [46]:
lex_fin = pd.read_csv('./datasets/lexicon_dictionary.csv')

In [47]:
lex_fin

Unnamed: 0.1,Unnamed: 0,word,sentiment_score
0,0,$:,-1.500000
1,1,%),-0.400000
2,2,%-),-1.500000
3,3,&-:,-0.400000
4,4,&:,-0.700000
...,...,...,...
15191,15191,zoom,-0.373068
15192,15192,zortrades.com,1.718507
15193,15193,zte,4.000000
15194,15194,zuck,-0.121264


In [50]:
lex_dict = dict(zip(lex_fin.word, lex_fin.sentiment_score))