In [29]:
!pip install requests beautifulsoup4 pandas



In [30]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [31]:
ticker = 'AAPL'

In [32]:
def fetch_finviz_news(stock_symbol):
    url = f"https://finviz.com/quote.ashx?t={stock_symbol}"
    headers = {'User-Agent': 'Mozilla/5.0'}  # Some websites require a user-agent header
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        headlines = []

        # Attempt to locate the news table by its class. Update if the class name has changed.
        news_table = soup.find('table', class_='fullview-news-outer')
        if news_table:
            for row in news_table.findAll('tr'):
                try:
                    text = row.a.text.strip() if row.a else 'No headline available'
                    link = row.a['href'] if row.a else 'No link available'
                    date = row.td.text.strip().split('\n')[0] if row.td else 'No date available'
                    headlines.append({'headline': text, 'link': link, 'date': date})
                except AttributeError:
                    continue  # Skip rows where expected elements are missing
        else:
            return "News table not found."

        return headlines
    else:
        return "Failed to retrieve news."

In [33]:
# Fetch news for AAPL
news = fetch_finviz_news(ticker)

# Convert to DataFrame for easier viewing
news_df = pd.DataFrame(news)

# Display the first few rows
print(news_df.head())

                                            headline  \
0  Magnificent Seven Stocks To Buy And Watch: Met...   
1  My Thoughts Regarding the Apple / Gemini AI Li...   
2  Prediction: Nvidia Will Become the World's Lar...   
3    Project Titan Follow-Up, Apple vs. EU Follow-Up   
4  Redwire Corporation (NYSE:RDW) Q4 2023 Earning...   

                                                link           date  
0  https://finance.yahoo.com/m/4205eaa9-f620-3a0b...  Today 11:50AM  
1  https://www.aboveavalon.com/notes/2024/3/19/my...        10:08AM  
2  https://finance.yahoo.com/m/c4bb27e6-3f95-3526...        10:07AM  
3  https://www.aboveavalon.com/notes/2024/3/19/pr...        10:05AM  
4  https://www.insidermonkey.com/blog/redwire-cor...        09:20AM  


In [34]:
news_df.to_csv(f'{ticker}_stock_news.csv', index=False)
print(f"{ticker} stock news saved to aapl_stock_news.csv.")

AAPL stock news saved to aapl_stock_news.csv.


In [35]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [36]:
import os

folder_path = f'/content/drive/My Drive/IGP Notebooks/Prepared Data/Stock News/{ticker}'
if not os.path.exists(folder_path):
    os.makedirs(folder_path)


In [37]:
from datetime import datetime

# Current date and time, formatted as YYYY-MM-DD_HHMMSS
now = datetime.now().strftime("%Y-%m-%d_%H%M%S")

In [38]:
file_path = os.path.join(folder_path, f'{ticker}_stock_news_{now}.csv')
news_df.to_csv(file_path, index=False)
print(f"File saved to {file_path}")


File saved to /content/drive/My Drive/IGP Notebooks/Prepared Data/Stock News/AAPL/AAPL_stock_news_2024-03-19_164008.csv
