In [91]:
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
import pandas as pd

# import nltk
# nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [93]:
finviz_url = 'https://finviz.com/quote.ashx?t='
tickers = ['TSLA', 'AMZN', 'FB']
news_tables = dict()
parsed_data = []

for ticker in tickers:
    
    url = finviz_url + ticker

    #set up response for Beautiful Soup to be able to parse through
    req = Request(url=url, headers={'user-agent':'my-app'})
    response = urlopen(req)

    html = BeautifulSoup(response, 'html')
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table


for ticker, news_table in news_tables.items():
    
    for row in news_table.findAll('tr'):
        title = row.a.text
        source = row.span.text
        timestamp = row.td.text
        
        date_data = [x.replace('\xa0\xa0', '') for x in timestamp.split(' ')]
        
        if len(date_data) == 1:
            time = date_data[0]
        else:
            date = date_data[0]
            time = date_data[1]
        
        parsed_data.append([ticker, date, time, title, source])


In [101]:
df = pd.DataFrame(parsed_data, columns=['ticker', 'date', 'time', 'headline', 'source'])
vader = SentimentIntensityAnalyzer()

f = lambda title: vader.polarity_scores(title)['compound']

df['compound'] = df.headline.apply(f)
df['date'] = pd.to_datetime(df.date).dt.date

In [106]:
df.head()

Unnamed: 0,ticker,date,time,headline,source,compound
0,TSLA,2022-06-13,12:42AM,Dow Jones Futures Signal Sharp Stock Market Lo...,Investor's Business Daily,-0.765
1,TSLA,2022-06-12,11:49PM,China Is Walking Back Virus Loosening Just Wee...,Bloomberg,0.0
2,TSLA,2022-06-12,07:41PM,Lightyear Debuts With Features Other Electric ...,TheStreet.com,0.0
3,TSLA,2022-06-12,01:47PM,Elon Musk Takes Sides in the Gender Identity D...,TheStreet.com,0.0
4,TSLA,2022-06-12,09:12AM,19-year-old tracking the jets of billionaires:...,Yahoo Finance,0.0
