In [11]:
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
import matplotlib.pyplot as plt

finviz_url = 'https://finviz.com/quote.ashx?t='
tickers = ['AMZN', 'GOOG', 'META']  

news_tables = {}
for ticker in tickers:
    url = finviz_url + ticker

    req = Request(url=url, headers={'user-agent': 'my-app'})
    
    try:
        response = urlopen(req)
        html = BeautifulSoup(response, features='html.parser')
        news_table = html.find(id='news-table')
        
        # Handle missing news table
        if news_table:
            news_tables[ticker] = news_table
        else:
            print(f"No news table found for {ticker}")
    
    except Exception as e:
        print(f"An error occurred for {ticker}: {e}")



In [12]:
parsed_data = []

for ticker, news_table in news_tables.items():
    for row in news_table.findAll('tr'):
        # Check if 'a' and 'td' tags exist in the row
        title_tag = row.find('a')
        date_tag = row.find('td')
        
        if title_tag and date_tag:
            title = title_tag.text
            date_data = date_tag.text.split(' ')
            
            if len(date_data) == 1:
                time = date_data[0]
                date = None  # Assign None if date is missing
            else:
                date = date_data[0]
                time = date_data[1]

            parsed_data.append([ticker, date, time, title])



In [14]:
# Convert to DataFrame
df = pd.DataFrame(parsed_data, columns=['ticker', 'date', 'time', 'title'])

# Handle missing dates gracefully
df['date'] = pd.to_datetime(df['date'], errors='coerce').dt.date



In [18]:
import nltk
nltk.download('vader_lexicon')


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/aditya/nltk_data...


True

In [19]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
import matplotlib.pyplot as plt

# Sentiment analysis using VADER
vader = SentimentIntensityAnalyzer()

# Apply VADER sentiment analysis to each title
df['compound'] = df['title'].apply(lambda title: vader.polarity_scores(title)['compound'])

# Ensure date column is correctly formatted (if not already)
df['date'] = pd.to_datetime(df['date'], errors='coerce').dt.date

# Calculate mean sentiment score grouped by ticker and date
mean_df = df.groupby(['ticker', 'date'])['compound'].mean().unstack()

# Handle case where mean_df might be empty or NaN
if mean_df.empty or mean_df.isnull().values.all():
    print("No valid data available for plotting.")
else:
    plt.figure(figsize=(10, 8))
    mean_df.plot(kind='bar')
    plt.title('Average Sentiment Score by Ticker and Date')
    plt.ylabel('Average Sentiment Score')
    plt.show()


No valid data available for plotting.
