In [7]:
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import plotly.express as px

In [8]:
# Include the tickers you want to generate sentiment score for
ticker_list = ['JPM']
# url for fetching news headlines
finwizUrl = "https://finviz.com/quote.ashx?t="
requestHeader_dict = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}
news_dict = {}
# Scrape news headlines table from webpage for each stock
for i in range(len(ticker_list)):
    response = urlopen(Request(url=finwizUrl + ticker_list[i], headers=requestHeader_dict))
    htmlTable = BeautifulSoup(response).find(id="news-table")
    news_dict[ticker_list[i]] = htmlTable

In [9]:
# prepare the data by labelling the news headlines along with date and time
parsed_news = []
news_date, news_time = "", ""
for ticker_key, ticker_table in news_dict.items():
    for row in ticker_table.findAll("tr"):
        # extract the actual news headline text
        news_headline = row.a.get_text()
        date_info = row.td.get_text().split()
        # date info may contain only time or both date and time
        if len(date_info) == 2:
            news_date = date_info[0]
            news_time = date_info[1]
        else:
            news_time = date_info[0]
        parsed_news.append([ticker_key, news_date, news_time, news_headline])

In [None]:
# initialize the vader sentiment analyzer
vader_analyser = SentimentIntensityAnalyzer()
# Setup dataframe using news headlines and date info
df_news = pd.DataFrame(parsed_news, columns=['Ticker', 'Date', 'Time', 'News Headline'])
# create date object from date string
df_news['Date'] = pd.to_datetime(df_news.Date).dt.date
# generate sentiment scores for all news headlines
sentiment_scores = pd.DataFrame(df_news['News Headline'].apply(vader_analyser.polarity_scores).tolist())
# attach these scores to original dataframe
df_news = df_news.join(sentiment_scores, rsuffix='_right')
# calculate daily average compound score for each ticker
average_daily_scores = df_news.groupby(['Ticker', 'Date']).mean()
average_daily_compound_scores = average_daily_scores.unstack().xs('compound', axis="columns").transpose()
average_daily_compound_scores

In [None]:
# plot the bar graph displaying the average daily compound score
fig = px.bar(average_daily_compound_scores)
fig.update_layout(barmode='group')
fig.show()

In [12]:
fig.write_image("sentScore.png")