In [1]:
# Web Scraping tools
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import plotly.express as px
import pandas as pd

# VADER for Sentiment Analysis
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# for extracting data from finviz
finviz_url = 'https://finviz.com/quote.ashx?t='

In [3]:
def get_news(ticker):
    url = finviz_url + ticker
    req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}) 
    response = urlopen(req)  
    # Read the contents of the file into 'html'
    html = BeautifulSoup(response)
    # Find 'news-table' in the Soup and load it into 'news_table'
    news_table = html.find(id='news-table')
    return news_table

ticker = 'AAPL'
news_table = get_news(ticker)

In [4]:
# parse news into dataframe
def parse_news(news_table):
    parsed_news = []
    
    for x in news_table.findAll('tr'):
        # read the text from each tr tag into text
        # get text from a only
        text = x.a.get_text()
        # split text in the td tag into a list 
        date_scrape = x.td.text.split()
        # if the length of 'date_scrape' is 1, load 'time' as the only element

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        # else load 'date' as the 1st element and 'time' as the second    
        else:
            date = date_scrape[0]
            time = date_scrape[1]
        
        # Append ticker, date, time and headline as a list to the 'parsed_news' list
        parsed_news.append([date, time, text])        
        # Set column names
        columns = ['date', 'time', 'headline']
        # Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
        parsed_news_df = pd.DataFrame(parsed_news, columns=columns)        
        # Create a pandas datetime object from the strings in 'date' and 'time' column
        parsed_news_df['datetime'] = pd.to_datetime(parsed_news_df['date'] + ' ' + parsed_news_df['time'])
        
    return parsed_news_df
        
parsed_news_df = parse_news(news_table)

Apple Turns to the Pros
1 Move That Could Supercharge Apple's iPhone Revenue
Don't Worry About Stock-Picking Accuracy When You're Swinging for a Home Run
2 Beaten-Down Value Stocks That Could Crush the Market
Europe Considers Making Big Tech Pay for Building the Internet
Tim Cook and Apple Make a Move That Could Annoy China
Netflix Makes an Apple and Amazon Mistake it Can't Afford to Copy
Apple (AAPL) Gains As Market Dips: What You Should Know
Apple iPhone 14 Pro Models Could Give Company A Profit Boost
iPhone 14 Demand in China Weaker Than Expected, Jefferies Says
Apple iPhone 14 demand trending ahead of iPhone 13: Wedbushs Dan Ives
Alphabet (GOOGL) to Boost YouTube Music With New Capabilities
TikTok Deal Remains Elusive as Biden Administration Works to Solve Data Concerns
Author Morgan Housel Talks About Inflation, Buffett, Bear Markets, and More
Stock market: Here's one veteran strategist's guess at a bear market bottom
Apples iPhone 14 sales come up short in China, report says
Why 

In [36]:
# Instantiate the sentiment intensity analyzer
new_words = {
'crushes': 10,
'beats': 5,
'misses': -5,
'trouble': -10,
'falls': -100,
'drop': -10,
'crash': -10,
'bearish': -10,
'bear': -5}
vader = SentimentIntensityAnalyzer()
vader.lexicon.update(new_words)

In [37]:
def score_news(parsed_news_df):
    
    # Iterate through the headlines and get the polarity scores using vader
    scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
    # Convert the 'scores' list of dicts into a DataFrame
    scores_df = pd.DataFrame(scores)

    # Join the DataFrames of the news and the list of dicts
    parsed_and_scored_news = parsed_news_df.join(scores_df)        
    parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')    
    parsed_and_scored_news = parsed_and_scored_news.drop(['date', 'time'],axis= 1)          
    parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})

    return parsed_and_scored_news

parsed_and_scored_news = score_news(parsed_news_df)
parsed_and_scored_news.head()

Unnamed: 0_level_0,headline,neg,neu,pos,sentiment_score
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-09-26 03:48:00,Apple Begins Making iPhone 14 in India Weeks A...,0.0,1.0,0.0,0.0
2022-09-26 03:46:00,Exclusive-India's push for home-grown navigati...,0.0,1.0,0.0,0.0
2022-09-26 02:05:00,New iPhones Initial China Sales Lag Predecesso...,0.231,0.769,0.0,-0.34
2022-09-26 01:31:00,UPDATE 1-Apple says it will manufacture iPhone...,0.0,1.0,0.0,0.0
2022-09-26 01:16:00,Tech Stocks Face Another 10% Drop or More as S...,0.4,0.364,0.236,-0.8187


In [8]:
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
   
    # Group by date and ticker columns from scored_news and calculate the mean
    mean_scores = parsed_and_scored_news.resample('H').mean()

    # Plot a bar chart with plotly 
    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Hourly Sentiment Scores')
    fig.show()

In [7]:
ticker = 'AMZN'
news_table = get_news(ticker)
parsed_news_df = parse_news(news_table)
parsed_and_scored_news = score_news(parsed_news_df)
plot_hourly_sentiment(parsed_and_scored_news, ticker)
parsed_and_scored_news.head()

Unnamed: 0_level_0,headline,neg,neu,pos,sentiment_score
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-09-26 01:16:00,Tech Stocks Face Another 10% Drop or More as S...,0.113,0.538,0.349,0.6573
2022-09-26 01:08:00,Amazon to hold mid-October sale to capture mor...,0.0,0.63,0.37,0.5709
2022-09-26 01:01:00,Amazon to hold mid-October sale to capture mor...,0.0,0.63,0.37,0.5709
2022-09-26 01:00:00,Introducing Amazons Prime Early Access SaleA N...,0.046,0.708,0.246,0.7003
2022-09-25 11:32:00,5 Things That Are Overpriced at Costco,0.0,1.0,0.0,0.0
