# Install Dependencies

In [1]:
# !pip install nltk
# !pip install pandas
# !pip install vaderSentiment

# Run

In [2]:
import json
import re
import nltk
import pandas as pd
from openpyxl import Workbook, load_workbook

# nltk.download('wordnet')
# nltk.download('vader_lexicon')
# nltk.download('punkt')

from nltk.tokenize import RegexpTokenizer, sent_tokenize
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer as SIA

## Input File

In [3]:
file = './data.xlsx'

## Load tickers

In [4]:
df = pd.read_csv('./cleaned_tickers.csv')
tickers = df['ticker'].tolist()

## Function to get sentiment from a text

In [5]:
def get_sentiment(text):
    # Split sentences
    text = re.sub(r'\sand\s|\sor\s', '. ', text)
    tokenized_str = sent_tokenize(text)

    # Apply a sentiment analyzer
    sia = SIA()
    result = dict()

    for sentence in tokenized_str:
        pol_score = sia.polarity_scores(sentence)
        tokenizer = RegexpTokenizer('\w+|\$[\d\.]+|http\S+')
        words = tokenizer.tokenize(sentence)
        ticker = None

        for word in words:
            if word in tickers:
                ticker = word

        if not ticker:
            continue

        if ticker in result:
            result[ticker] = pol_score['compound'] if pol_score['compound'] > result[ticker] else result[ticker]
        else:
            result[ticker] = pol_score['compound']

    data = []
    for ticker, sentiment_score in result.items():
        data.append({
            'ticker': ticker,
            'sentiment_score': sentiment_score
        })
    return data

In [6]:
wb = load_workbook(filename=file)
ws = wb.worksheets[0]

ws.cell(row=1, column=4, value='Library Output')

for i in range(2, ws.max_row + 1):
    title = ws.cell(row=i, column=1).value
    body = ws.cell(row=i, column=2).value
    
    if not title and not body:
        break
    
    sentiment = get_sentiment(f'{title}\n\n{body}')
    ws.cell(row=i, column=4, value=str(sentiment))
    
wb.save(filename=file)
print('completed')

completed
