In [76]:
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

finviz_url = 'https://finviz.com/quote.ashx?t='
tickers = ['AMZN', 'GOOG']

# Initialize sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Create a dictionary to store DataFrames for each ticker
ticker_dfs = {}

news_tables = {}
for ticker in tickers:
    url = finviz_url + ticker

    req = Request(url=url, headers={'user-agent': 'my-app'})
    response = urlopen(req)

    html = BeautifulSoup(response, features='html.parser')
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

# Parse news data and create DataFrames for each ticker
for ticker, news_table in news_tables.items():
    parsed_data = []
    if news_table:
        for row in news_table.findAll('tr'):
            try:
                date_time_td = row.find_all('td')[0].text.strip() if len(row.find_all('td')) > 0 else ''
                title_element = row.find_all('td')[1].find('a') if len(row.find_all('td')) > 1 else None

                if title_element:
                    title = title_element.text.strip()
                    link = title_element.get('href')

                    # Parse date and time
                    if 'Today' in date_time_td:
                        date = datetime.now().strftime('%Y-%m-%d')
                        time = date_time_td.replace('Today', '').strip()
                    else:
                        date_parts = date_time_td.split(' ')
                        if len(date_parts) == 2:
                            date = datetime.now().strftime('%Y-%m-%d')  # Placeholder if date is not included
                            time = date_parts[1]
                        else:
                            date = ''
                            time = ''
                    
                    # Combine date and time into datetime
                    date_time = f"{date} {time}"
                    
                    # Append parsed data
                    parsed_data.append([ticker, date_time, title, link])
            except Exception as e:
                print(f"Error parsing row: {e}")

    # Create DataFrame and store in dictionary
    df = pd.DataFrame(parsed_data, columns=['Ticker', 'DateTime', 'Title', 'URL'])
    df['DateTime'] = pd.to_datetime(df['DateTime'], format='%Y-%m-%d %I:%M%p', errors='coerce')
    df = df.dropna(subset=['DateTime'])
    ticker_dfs[ticker] = df

# Function to extract text from a URL
def extract_text_from_url(url):
    try:
        req = Request(url=url, headers={'user-agent': 'my-app'})
        response = urlopen(req)
        html = BeautifulSoup(response, features='html.parser')
        paragraphs = html.find_all('p')
        text = ' '.join([p.get_text() for p in paragraphs])
        return text
    except Exception as e:
        print(f"Error extracting text from {url}: {e}")
        return ""

# Update each DataFrame with the full news text
for ticker, df in ticker_dfs.items():
    df['News'] = df['URL'].apply(lambda url: extract_text_from_url(url))



In [77]:
# Print updated DataFrames for verification
for ticker, df in ticker_dfs.items():
    print(f"Updated DataFrame for {ticker}:")
    print(df.head())

Updated DataFrame for AMZN:
   Ticker            DateTime  \
0    AMZN 2024-08-30 17:08:00   
14   AMZN 2024-08-30 20:49:00   
34   AMZN 2024-08-30 20:29:00   
49   AMZN 2024-08-30 18:22:00   
66   AMZN 2024-08-30 20:24:00   

                                                Title  \
0                        Stocks to Watch in September   
14  7 Best Fast Money Stocks To Buy According To H...   
34        4 Key Takeaways From Nvidia's Earnings Call   
49  Kelce brothers podcast, private-equity vote: L...   
66  Stock-Split Watch: 3 Top Stocks That Look Read...   

                                                  URL  \
0   https://www.investopedia.com/stocks-to-watch-s...   
14  https://www.insidermonkey.com/blog/7-best-fast...   
34  https://www.investopedia.com/4-key-takeaways-f...   
49  https://finance.yahoo.com/video/kelce-brothers...   
66  https://finance.yahoo.com/m/7220beb0-033f-3524...   

                                                 News  
0   Editors' Picks for Companie