# Stock News Retrieval Test

This notebook tests retrieving the latest news for a given stock ticker from the yfinance API.

Uses the existing `StockDataDownloader` class from `app.py`.


In [18]:
# Import the stock data downloader
from app import StockDataDownloader
import pandas as pd
from datetime import datetime
import json

# Clear any cached imports to ensure we get the latest version
import importlib
import sys
if 'app' in sys.modules:
    importlib.reload(sys.modules['app'])


In [19]:
# Initialize the downloader
downloader = StockDataDownloader()
print("✓ StockDataDownloader initialized")


2025-10-06 22:41:41,116 - app - INFO - Initialized StockDataDownloader with base path: data/price-history


✓ StockDataDownloader initialized


In [20]:
# Configure the ticker and max news items
ticker = "NVDA"  # Change this to any ticker you want
max_news_items = 10  # Number of news items to retrieve

print(f"Ticker: {ticker}")
print(f"Max news items: {max_news_items}")


Ticker: NVDA
Max news items: 10


In [21]:
# First, let's inspect the raw news data structure from yfinance
import yfinance as yf

print(f"\n{'='*80}")
print(f"RAW NEWS DATA INSPECTION FOR {ticker}")
print(f"{'='*80}\n")

stock = yf.Ticker(ticker)
raw_news = stock.news

if raw_news:
    print(f"✓ Found {len(raw_news)} news items\n")
    
    # Show the structure of the first news item
    if len(raw_news) > 0:
        print("First news item structure:")
        print("-" * 80)
        for key, value in raw_news[0].items():
            if key == 'content':
                print(f"{key}: [NESTED CONTENT - see below]")
                # Show the content structure
                content = value
                print("  Content fields:")
                for ckey, cvalue in content.items():
                    if isinstance(cvalue, str) and len(cvalue) > 100:
                        print(f"    {ckey}: {cvalue[:100]}...")
                    else:
                        print(f"    {ckey}: {cvalue}")
            else:
                print(f"{key}: {value}")
        print("-" * 80)
else:
    print("✗ No news data returned from API")

print(f"\n{'='*80}")
print("NOW FETCHING USING UPDATED StockDataDownloader")
print(f"{'='*80}\n")

# Now fetch using our updated downloader
news_df = downloader.get_recent_news(ticker=ticker, max_items=max_news_items)

if news_df is not None:
    print(f"\n✓ Successfully retrieved {len(news_df)} news items")
    print(f"✓ Columns: {list(news_df.columns)}")
else:
    print(f"\n✗ No news found for {ticker}")



RAW NEWS DATA INSPECTION FOR NVDA



2025-10-06 22:41:41,396 - app - INFO - Fetching recent news for NVDA (attempt 1/3)
2025-10-06 22:41:41,585 - app - INFO - Successfully fetched 10 news items for NVDA


✓ Found 10 news items

First news item structure:
--------------------------------------------------------------------------------
id: 8acf3d76-a464-49b9-9f9d-efaa95b1dc3c
content: [NESTED CONTENT - see below]
  Content fields:
    id: 8acf3d76-a464-49b9-9f9d-efaa95b1dc3c
    contentType: STORY
    title: Stock market today: Dow, S&P 500, Nasdaq futures hover after Wall Street's latest record run
    description: 
    summary: US stock futures were little changed late Monday after Wall Street's latest record-setting run.
    pubDate: 2025-10-06T22:12:48Z
    displayTime: 2025-10-06T22:12:49Z
    isHosted: True
    bypassModal: False
    previewUrl: None
    thumbnail: {'originalUrl': 'https://s.yimg.com/os/creatr-uploaded-images/2025-08/cb17a5c0-7865-11f0-bb7e-62177fee3fee', 'originalWidth': 7893, 'originalHeight': 5216, 'caption': '', 'resolutions': [{'url': 'https://s.yimg.com/uu/api/res/1.2/d.lzd02tXzECkJ.KcR8aRw--~B/aD01MjE2O3c9Nzg5MzthcHBpZD15dGFjaHlvbg--/https://s.yimg.com/os/cre

In [22]:
# Display DataFrame info and check for actual content
if news_df is not None:
    print("\n" + "="*80)
    print("NEWS DATA INFO")
    print("="*80)
    print(f"\nShape: {news_df.shape}")
    print(f"\nColumns: {list(news_df.columns)}")
    print(f"\nData types:")
    print(news_df.dtypes)
    
    # Check if data is actually populated
    print("\n" + "="*80)
    print("CONTENT CHECK")
    print("="*80)
    
    # Check first row to see if we have real data
    if len(news_df) > 0:
        first_row = news_df.iloc[0]
        print(f"\nFirst row values:")
        print(f"  TICKER: '{first_row['TICKER']}'")
        print(f"  ID: '{first_row['ID']}'")
        print(f"  TITLE: '{first_row['TITLE']}'")
        print(f"  SUMMARY: '{first_row['SUMMARY']}'")
        print(f"  DESCRIPTION: '{first_row['DESCRIPTION']}'")
        print(f"  PUBLISHER: '{first_row['PUBLISHER']}'")
        print(f"  LINK: '{first_row['LINK']}'")
        print(f"  PUBLISH_TIME: {first_row['PUBLISH_TIME']}")
        print(f"  CONTENT_TYPE: '{first_row['CONTENT_TYPE']}'")
        print(f"  IS_PREMIUM: {first_row['IS_PREMIUM']}")
        print(f"  IS_HOSTED: {first_row['IS_HOSTED']}")
        
        # Check if all titles are empty
        empty_titles = news_df['TITLE'].str.strip() == ''
        print(f"\n  Empty titles: {empty_titles.sum()} / {len(news_df)}")
        
        # Check if all links are empty
        empty_links = news_df['LINK'].str.strip() == ''
        print(f"  Empty links: {empty_links.sum()} / {len(news_df)}")
        
        # Check if we have summaries
        has_summaries = news_df['SUMMARY'].str.strip() != ''
        print(f"  Has summaries: {has_summaries.sum()} / {len(news_df)}")
        
        # Check if we have descriptions
        has_descriptions = news_df['DESCRIPTION'].str.strip() != ''
        print(f"  Has descriptions: {has_descriptions.sum()} / {len(news_df)}")



NEWS DATA INFO

Shape: (10, 14)

Columns: ['TICKER', 'ID', 'TITLE', 'SUMMARY', 'DESCRIPTION', 'PUBLISHER', 'LINK', 'PUBLISH_TIME', 'DISPLAY_TIME', 'CONTENT_TYPE', 'THUMBNAIL_URL', 'IS_PREMIUM', 'IS_HOSTED', 'DOWNLOAD_TIMESTAMP']

Data types:
TICKER                             object
ID                                 object
TITLE                              object
SUMMARY                            object
DESCRIPTION                        object
PUBLISHER                          object
LINK                               object
PUBLISH_TIME          datetime64[ns, UTC]
DISPLAY_TIME          datetime64[ns, UTC]
CONTENT_TYPE                       object
THUMBNAIL_URL                      object
IS_PREMIUM                           bool
IS_HOSTED                            bool
DOWNLOAD_TIMESTAMP         datetime64[ns]
dtype: object

CONTENT CHECK

First row values:
  TICKER: 'NVDA'
  ID: '8acf3d76-a464-49b9-9f9d-efaa95b1dc3c'
  TITLE: 'Stock market today: Dow, S&P 500, Nasdaq futures 

In [23]:
# Display all news items with full text content
if news_df is not None:
    print("\n" + "="*80)
    print(f"ALL NEWS ITEMS FOR {ticker} - FULL CONTENT")
    print("="*80)
    
    # Set pandas display options to show full content
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_colwidth', 200)  # Show more text
    pd.set_option('display.width', None)
    
    # Show key columns for full-text database
    key_columns = ['ID', 'TITLE', 'SUMMARY', 'DESCRIPTION', 'PUBLISHER', 'LINK', 'PUBLISH_TIME', 'CONTENT_TYPE']
    display(news_df[key_columns])
    
    print(f"\n{'='*80}")
    print("FULL TEXT CONTENT FOR DATABASE")
    print("="*80)
    
    # Show the full text content that would go into a full-text database
    for idx, row in news_df.iterrows():
        print(f"\n[{idx + 1}] ID: {row['ID']}")
        print(f"    TITLE: {row['TITLE']}")
        print(f"    SUMMARY: {row['SUMMARY']}")
        print(f"    DESCRIPTION: {row['DESCRIPTION']}")
        print(f"    PUBLISHER: {row['PUBLISHER']}")
        print(f"    CONTENT_TYPE: {row['CONTENT_TYPE']}")
        print(f"    PUBLISH_TIME: {row['PUBLISH_TIME']}")
        print(f"    LINK: {row['LINK']}")
        print("-" * 80)



ALL NEWS ITEMS FOR NVDA - FULL CONTENT


Unnamed: 0,ID,TITLE,SUMMARY,DESCRIPTION,PUBLISHER,LINK,PUBLISH_TIME,CONTENT_TYPE
0,8acf3d76-a464-49b9-9f9d-efaa95b1dc3c,"Stock market today: Dow, S&P 500, Nasdaq futures hover after Wall Street's latest record run",US stock futures were little changed late Monday after Wall Street's latest record-setting run.,,Yahoo Finance,https://finance.yahoo.com/news/live/stock-market-today-dow-sp-500-nasdaq-futures-hover-after-wall-streets-latest-record-run-221248313.html,2025-10-06 22:12:48+00:00,STORY
1,4731fc11-ab62-444a-af68-cddf98eca95e,AMD CEO Lisa Su says AI critics are 'thinking too small' after massive OpenAI deal,"Chipmaker AMD inked a multi-gigawatt GPU agreement with OpenAI, setting the stage for a potential 10-year AI supercycle.",,Yahoo Finance,https://finance.yahoo.com/news/amd-ceo-lisa-su-says-ai-critics-are-thinking-too-small-after-massive-openai-deal-202818700.html,2025-10-06 20:28:18+00:00,STORY
2,7fcdd7dd-7b69-3eec-b980-d1cd6fa739cb,AMD-OpenAI deal is helping build 'foundation' of the AI world: CEO,"Advanced Micro Devices (AMD) announced an AI chip deal with OpenAI (OPAI.PVT) on Monday, sending the semiconductor stock soaring; this deal also has OpenAI taking up a 10% stake in AMD. This all c...","<p>Advanced Micro Devices (<a data-i13n=""cpos:1;pos:1"" href=""https://finance.yahoo.com/quote/AMD"">AMD</a>) announced <a data-i13n=""cpos:2;pos:1"" href=""https://finance.yahoo.com/news/amd-stock-rock...",Yahoo Finance Video,https://finance.yahoo.com/video/amd-openai-deal-helping-build-193159548.html,2025-10-06 19:31:59+00:00,VIDEO
3,42fabe11-43a3-3187-bb98-d153f87128c6,AMD-OpenAI deal: Is AI appetite close to peaking?,Advanced Micro Devices (AMD) announced a new chip partnership with OpenAI (OPAI.PVT) — sending the semiconductor stock ripping and tearing higher Monday morning — a move that could position AMD to...,"<p>Advanced Micro Devices (<a data-i13n=""cpos:1;pos:1"" href=""https://finance.yahoo.com/quote/AMD"">AMD</a>) announced a new chip partnership with OpenAI (<a data-i13n=""cpos:2;pos:1"" href=""https://f...",Yahoo Finance Video,https://finance.yahoo.com/video/amd-openai-deal-ai-appetite-145500695.html,2025-10-06 14:55:00+00:00,VIDEO
4,edcf7e86-1bf6-3db2-8f90-7b7ffc7dd5ac,"Jim Cramer on building wealth, stock market lessons, and bold predictions","Opening Bid Unfiltered is available on Apple Podcasts, Spotify, YouTube, or wherever you get your podcasts. Few have championed the need for people to invest in individual stocks like Jim Cramer. ...","<p><strong>Opening Bid Unfiltered is available on </strong><a data-i13n=""cpos:1;pos:1"" href=""https://podcasts.apple.com/us/podcast/opening-bid/id1749109417""><strong>Apple Podcasts</strong></a><str...",Yahoo Finance Video,https://finance.yahoo.com/video/jim-cramer-building-wealth-stock-120058904.html,2025-10-06 12:00:58+00:00,VIDEO
5,84f7eb93-21e6-4aff-8d32-677ac4ef363e,"Wall Street, crypto industry say tokenization will reshape global markets: 'It’s going to eat the entire financial system'","The crypto industry, along with Wall Street heavyweights, wants to bring tokenized stocks, or digital versions of traditional shares that are recorded on a blockchain's digital ledger, into the ma...",,Yahoo Finance,https://finance.yahoo.com/news/wall-street-crypto-industry-say-tokenization-will-reshape-global-markets-its-going-to-eat-the-entire-financial-system-133005463.html,2025-10-05 13:30:05+00:00,STORY
6,7b5190a0-be5b-3d46-87d1-f497c2925de0,"Dow Jones Futures Fall As AMD Soars On OpenAI, AppLovin Dives; Cheap Tesla Model Y On Tap?As Hot Stock Dives",The Nasdaq hit a fresh high as the AMD-OpenAI deal fueled many stocks. Tesla jumped ahead of big news. AppLovin dived on an SEC probe report.,,Investor's Business Daily,https://www.investors.com/market-trend/stock-market-today/dow-jones-futures-nasdaq-high-amd-openai-deal-tesla-stock-news/?src=A00220&yptr=yahoo,2025-10-07 02:03:54+00:00,STORY
7,61cc31e2-0627-3b2d-b418-ab206e5d9ce7,Surprising AI deal puts AMD under the spotlight,AMD stock has increased 73% year-to-date.,,TheStreet,https://www.thestreet.com/technology/surprising-ai-deal-sends-amd-stock-skyrocketing,2025-10-07 01:37:00+00:00,STORY
8,530d0afc-1034-3fe2-9222-8060dfb0b328,2 Electric Grid Stocks to Buy as Soaring Artificial Intelligence (AI) Demand Drives Surging Electricity Demand,A great reason to buy electric grid stocks: Nvidia and OpenAI's AI deal alone will need more power than New York City's average demand!,,Motley Fool,https://www.fool.com/investing/2025/10/06/best-ai-stocks-electric-grid-utility-stocks-to-buy/,2025-10-07 01:03:00+00:00,STORY
9,7bd9ab1b-2640-323b-bd8b-6e6db54e8c20,This Artificial Intelligence (AI) Stock Is Quietly Outperforming Nvidia in 2025,This company is essential to the success of Nvidia and nearly every other major chipmaker.,,Motley Fool,https://www.fool.com/investing/2025/10/06/this-artificial-intelligence-ai-stock-is-quietly-o/,2025-10-07 01:02:00+00:00,STORY



FULL TEXT CONTENT FOR DATABASE

[1] ID: 8acf3d76-a464-49b9-9f9d-efaa95b1dc3c
    TITLE: Stock market today: Dow, S&P 500, Nasdaq futures hover after Wall Street's latest record run
    SUMMARY: US stock futures were little changed late Monday after Wall Street's latest record-setting run.
    DESCRIPTION: 
    PUBLISHER: Yahoo Finance
    CONTENT_TYPE: STORY
    PUBLISH_TIME: 2025-10-06 22:12:48+00:00
    LINK: https://finance.yahoo.com/news/live/stock-market-today-dow-sp-500-nasdaq-futures-hover-after-wall-streets-latest-record-run-221248313.html
--------------------------------------------------------------------------------

[2] ID: 4731fc11-ab62-444a-af68-cddf98eca95e
    TITLE: AMD CEO Lisa Su says AI critics are 'thinking too small' after massive OpenAI deal
    SUMMARY: Chipmaker AMD inked a multi-gigawatt GPU agreement with OpenAI, setting the stage for a potential 10-year AI supercycle.
    DESCRIPTION: 
    PUBLISHER: Yahoo Finance
    CONTENT_TYPE: STORY
    PUBLISH_TIME: 2

In [24]:
# Display news in a more readable format with all text content
if news_df is not None:
    print("\n" + "="*80)
    print(f"FORMATTED NEWS FOR {ticker} - READABLE FORMAT")
    print("="*80)
    
    for idx, row in news_df.iterrows():
        print(f"\n[{idx + 1}] {row['TITLE']}")
        print(f"    ID: {row['ID']}")
        print(f"    Summary: {row['SUMMARY']}")
        print(f"    Description: {row['DESCRIPTION']}")
        print(f"    Publisher: {row['PUBLISHER']}")
        print(f"    Published: {row['PUBLISH_TIME']}")
        print(f"    Content Type: {row['CONTENT_TYPE']}")
        print(f"    Is Premium: {row['IS_PREMIUM']}")
        print(f"    Is Hosted: {row['IS_HOSTED']}")
        print(f"    Link: {row['LINK']}")
        if row['THUMBNAIL_URL']:
            print(f"    Thumbnail: {row['THUMBNAIL_URL']}")
        print("-" * 80)



FORMATTED NEWS FOR NVDA - READABLE FORMAT

[1] Stock market today: Dow, S&P 500, Nasdaq futures hover after Wall Street's latest record run
    ID: 8acf3d76-a464-49b9-9f9d-efaa95b1dc3c
    Summary: US stock futures were little changed late Monday after Wall Street's latest record-setting run.
    Description: 
    Publisher: Yahoo Finance
    Published: 2025-10-06 22:12:48+00:00
    Content Type: STORY
    Is Premium: False
    Is Hosted: True
    Link: https://finance.yahoo.com/news/live/stock-market-today-dow-sp-500-nasdaq-futures-hover-after-wall-streets-latest-record-run-221248313.html
    Thumbnail: https://s.yimg.com/uu/api/res/1.2/Xd0uKXIdqkpMPTtNdsiKSg--~B/Zmk9c3RyaW07aD0xMjg7dz0xNzA7YXBwaWQ9eXRhY2h5b24-/https://s.yimg.com/os/creatr-uploaded-images/2025-08/cb17a5c0-7865-11f0-bb7e-62177fee3fee
--------------------------------------------------------------------------------

[2] AMD CEO Lisa Su says AI critics are 'thinking too small' after massive OpenAI deal
    ID: 4731fc11-a

In [25]:
# Display summary statistics
if news_df is not None:
    print("\n" + "="*80)
    print("SUMMARY STATISTICS")
    print("="*80)
    
    # Count by publisher
    print("\nNews items by publisher:")
    publisher_counts = news_df['PUBLISHER'].value_counts()
    for publisher, count in publisher_counts.items():
        print(f"  {publisher}: {count}")
    
    # Count by content type
    print("\nNews items by content type:")
    type_counts = news_df['CONTENT_TYPE'].value_counts()
    for news_type, count in type_counts.items():
        print(f"  {news_type}: {count}")
    
    # Count by premium status
    print("\nNews items by premium status:")
    premium_counts = news_df['IS_PREMIUM'].value_counts()
    for is_premium, count in premium_counts.items():
        print(f"  Premium: {is_premium} - {count} items")
    
    # Date range
    print(f"\nDate range:")
    print(f"  Earliest: {news_df['PUBLISH_TIME'].min()}")
    print(f"  Latest: {news_df['PUBLISH_TIME'].max()}")
    
    # Check for thumbnails
    has_thumbnail = news_df['THUMBNAIL_URL'].notna() & (news_df['THUMBNAIL_URL'] != '')
    print(f"\nItems with thumbnails: {has_thumbnail.sum()} / {len(news_df)}")
    
    # Text content statistics
    print(f"\nText content statistics:")
    has_titles = news_df['TITLE'].str.strip() != ''
    has_summaries = news_df['SUMMARY'].str.strip() != ''
    has_descriptions = news_df['DESCRIPTION'].str.strip() != ''
    print(f"  Items with titles: {has_titles.sum()} / {len(news_df)}")
    print(f"  Items with summaries: {has_summaries.sum()} / {len(news_df)}")
    print(f"  Items with descriptions: {has_descriptions.sum()} / {len(news_df)}")
    
    # Average text lengths
    if has_titles.any():
        avg_title_length = news_df[has_titles]['TITLE'].str.len().mean()
        print(f"  Average title length: {avg_title_length:.1f} characters")
    if has_summaries.any():
        avg_summary_length = news_df[has_summaries]['SUMMARY'].str.len().mean()
        print(f"  Average summary length: {avg_summary_length:.1f} characters")
    if has_descriptions.any():
        avg_desc_length = news_df[has_descriptions]['DESCRIPTION'].str.len().mean()
        print(f"  Average description length: {avg_desc_length:.1f} characters")



SUMMARY STATISTICS

News items by publisher:
  Yahoo Finance: 3
  Yahoo Finance Video: 3
  Motley Fool: 2
  Investor's Business Daily: 1
  TheStreet: 1

News items by content type:
  STORY: 7
  VIDEO: 3

News items by premium status:
  Premium: False - 10 items

Date range:
  Earliest: 2025-10-05 13:30:05+00:00
  Latest: 2025-10-07 02:03:54+00:00

Items with thumbnails: 10 / 10

Text content statistics:
  Items with titles: 10 / 10
  Items with summaries: 10 / 10
  Items with descriptions: 3 / 10
  Average title length: 82.8 characters
  Average summary length: 370.6 characters
  Average description length: 1950.7 characters


In [26]:
# Test with multiple tickers
print("\n" + "="*80)
print("TESTING MULTIPLE TICKERS")
print("="*80)

test_tickers = ["AAPL", "MSFT", "GOOGL", "TSLA"]
results = {}

for test_ticker in test_tickers:
    print(f"\nFetching news for {test_ticker}...")
    df = downloader.get_recent_news(ticker=test_ticker, max_items=5)
    
    if df is not None:
        results[test_ticker] = len(df)
        print(f"  ✓ Retrieved {len(df)} items")
        # Show first headline
        if len(df) > 0:
            print(f"  Latest: {df.iloc[0]['TITLE'][:80]}...")
    else:
        results[test_ticker] = 0
        print(f"  ✗ No news found")

print("\n" + "="*80)
print("SUMMARY")
print("="*80)
for ticker_name, count in results.items():
    print(f"{ticker_name}: {count} news items")


2025-10-06 22:41:41,620 - app - INFO - Fetching recent news for AAPL (attempt 1/3)



TESTING MULTIPLE TICKERS

Fetching news for AAPL...


2025-10-06 22:41:41,863 - app - INFO - Successfully fetched 5 news items for AAPL
2025-10-06 22:41:41,864 - app - INFO - Fetching recent news for MSFT (attempt 1/3)


  ✓ Retrieved 5 items
  Latest: Jim Cramer: Nvidia's next act will be bigger than gaming or AI...

Fetching news for MSFT...


2025-10-06 22:41:42,130 - app - INFO - Successfully fetched 5 news items for MSFT
2025-10-06 22:41:42,131 - app - INFO - Fetching recent news for GOOGL (attempt 1/3)
2025-10-06 22:41:42,289 - app - INFO - Successfully fetched 5 news items for GOOGL
2025-10-06 22:41:42,290 - app - INFO - Fetching recent news for TSLA (attempt 1/3)


  ✓ Retrieved 5 items
  Latest: 2 Brilliant Growth Stocks to Buy in October...

Fetching news for GOOGL...
  ✓ Retrieved 5 items
  Latest: Wall Street strategists lift S&P targets ahead of earnings season on 'fundamenta...

Fetching news for TSLA...


2025-10-06 22:41:42,470 - app - INFO - Successfully fetched 5 news items for TSLA


  ✓ Retrieved 5 items
  Latest: Fed, McCormick, Tesla, Amazon's Big Deal Days: What to Watch...

SUMMARY
AAPL: 5 news items
MSFT: 5 news items
GOOGL: 5 news items
TSLA: 5 news items


In [27]:
# Save the news to a parquet file
if news_df is not None:
    print("\n" + "="*80)
    print("SAVE NEWS TO FILE")
    print("="*80)
    
    # Save the data to ./data/news/{ticker}/news-{ticker}.parquet
    success = downloader.format_and_save_data(news_df, ticker, data_type="news")
    
    if success:
        print(f"\n✓ News data saved successfully!")
        
        # Get the full absolute path
        import os
        filepath = f"./data/news/{ticker}/news-{ticker}.parquet"
        abs_filepath = os.path.abspath(filepath)
        
        print(f"  Location: {filepath}")
        print(f"  Full Path: {abs_filepath}")
        print(f"  Records: {len(news_df)}")
        
        # Check if there were any existing records
        if os.path.exists(filepath):
            saved_df = pd.read_parquet(filepath)
            print(f"  Total records in file: {len(saved_df)}")
            
            # Show the schema of the saved file
            print(f"\n  Schema of saved file:")
            print(f"    Columns: {list(saved_df.columns)}")
            print(f"    Data types:")
            for col, dtype in saved_df.dtypes.items():
                print(f"      {col}: {dtype}")
            
            # Show sample of saved data
            print(f"\n  Sample of saved data:")
            sample_cols = ['ID', 'TITLE', 'SUMMARY', 'PUBLISHER', 'CONTENT_TYPE']
            print(saved_df[sample_cols].head(2).to_string())
            
    else:
        print(f"\n✗ Failed to save news data")


2025-10-06 22:41:42,481 - app - INFO - Loaded 10 existing records for NVDA
2025-10-06 22:41:42,482 - app - INFO - Merging 10 new records with 10 existing records
2025-10-06 22:41:42,486 - app - INFO - Successfully saved 20 total records to data/news/NVDA/news-NVDA.parquet



SAVE NEWS TO FILE

✓ News data saved successfully!
  Location: ./data/news/NVDA/news-NVDA.parquet
  Full Path: /Users/jdacosta/Library/CloudStorage/GoogleDrive-john.dacosta@snowflake.com/My Drive/_local/Downloads/_cursor_demos/Snowflake_Intelligence_HOL/jdacosta/pricehistory/data/news/NVDA/news-NVDA.parquet
  Records: 10
  Total records in file: 20

  Schema of saved file:
    Columns: ['TICKER', 'ID', 'TITLE', 'SUMMARY', 'DESCRIPTION', 'PUBLISHER', 'LINK', 'PUBLISH_TIME', 'DISPLAY_TIME', 'CONTENT_TYPE', 'THUMBNAIL_URL', 'IS_PREMIUM', 'IS_HOSTED', 'DOWNLOAD_TIMESTAMP']
    Data types:
      TICKER: object
      ID: object
      TITLE: object
      SUMMARY: object
      DESCRIPTION: object
      PUBLISHER: object
      LINK: object
      PUBLISH_TIME: datetime64[ns, UTC]
      DISPLAY_TIME: datetime64[ns, UTC]
      CONTENT_TYPE: object
      THUMBNAIL_URL: object
      IS_PREMIUM: bool
      IS_HOSTED: bool
      DOWNLOAD_TIMESTAMP: datetime64[ns]

  Sample of saved data:
            

In [28]:
# Create full-text database schema and sample
if news_df is not None:
    print("\n" + "="*80)
    print("FULL-TEXT DATABASE SCHEMA")
    print("="*80)
    
    # Define the full-text database schema
    full_text_schema = {
        'id': 'VARCHAR(36) PRIMARY KEY',  # UUID
        'ticker': 'VARCHAR(10)',
        'title': 'TEXT',
        'summary': 'TEXT', 
        'description': 'TEXT',
        'full_text': 'TEXT',  # Combined searchable text
        'publisher': 'VARCHAR(100)',
        'content_type': 'VARCHAR(50)',
        'publish_time': 'TIMESTAMP',
        'is_premium': 'BOOLEAN',
        'link': 'VARCHAR(1000)',
        'thumbnail_url': 'VARCHAR(1000)',
        'download_timestamp': 'TIMESTAMP'
    }
    
    print("Full-text database schema:")
    for field, data_type in full_text_schema.items():
        print(f"  {field}: {data_type}")
    
    # Create sample full-text records
    print(f"\n{'='*80}")
    print("SAMPLE FULL-TEXT RECORDS")
    print("="*80)
    
    for idx, row in news_df.head(3).iterrows():
        # Combine all text fields for full-text search
        full_text = f"{row['TITLE']} {row['SUMMARY']} {row['DESCRIPTION']}".strip()
        
        print(f"\nRecord {idx + 1}:")
        print(f"  ID: {row['ID']}")
        print(f"  Ticker: {row['TICKER']}")
        print(f"  Title: {row['TITLE']}")
        print(f"  Summary: {row['SUMMARY']}")
        print(f"  Description: {row['DESCRIPTION']}")
        print(f"  Full Text (for search): {full_text[:200]}...")
        print(f"  Publisher: {row['PUBLISHER']}")
        print(f"  Content Type: {row['CONTENT_TYPE']}")
        print(f"  Publish Time: {row['PUBLISH_TIME']}")
        print(f"  Is Premium: {row['IS_PREMIUM']}")
        print(f"  Link: {row['LINK']}")
        print("-" * 80)
    
    # Show SQL for creating full-text search table
    print(f"\n{'='*80}")
    print("SQL FOR FULL-TEXT SEARCH TABLE")
    print("="*80)
    
    sql_create = """
CREATE TABLE stock_news_fulltext (
    id VARCHAR(36) PRIMARY KEY,
    ticker VARCHAR(10),
    title TEXT,
    summary TEXT,
    description TEXT,
    full_text TEXT,
    publisher VARCHAR(100),
    content_type VARCHAR(50),
    publish_time TIMESTAMP,
    is_premium BOOLEAN,
    link VARCHAR(1000),
    thumbnail_url VARCHAR(1000),
    download_timestamp TIMESTAMP,
    
    -- Full-text search index
    FULLTEXT(title, summary, description, full_text)
);
"""
    print(sql_create)
    
    # Show sample INSERT statements
    print(f"\n{'='*80}")
    print("SAMPLE INSERT STATEMENTS")
    print("="*80)
    
    for idx, row in news_df.head(2).iterrows():
        full_text = f"{row['TITLE']} {row['SUMMARY']} {row['DESCRIPTION']}".strip()
        
        insert_sql = f"""
INSERT INTO stock_news_fulltext VALUES (
    '{row['ID']}',
    '{row['TICKER']}',
    '{row['TITLE'].replace("'", "''")}',
    '{row['SUMMARY'].replace("'", "''")}',
    '{row['DESCRIPTION'].replace("'", "''")}',
    '{full_text.replace("'", "''")}',
    '{row['PUBLISHER']}',
    '{row['CONTENT_TYPE']}',
    '{row['PUBLISH_TIME']}',
    {row['IS_PREMIUM']},
    '{row['LINK']}',
    '{row['THUMBNAIL_URL']}',
    '{row['DOWNLOAD_TIMESTAMP']}'
);
"""
        print(insert_sql)



FULL-TEXT DATABASE SCHEMA
Full-text database schema:
  id: VARCHAR(36) PRIMARY KEY
  ticker: VARCHAR(10)
  title: TEXT
  summary: TEXT
  description: TEXT
  full_text: TEXT
  publisher: VARCHAR(100)
  content_type: VARCHAR(50)
  publish_time: TIMESTAMP
  is_premium: BOOLEAN
  link: VARCHAR(1000)
  thumbnail_url: VARCHAR(1000)
  download_timestamp: TIMESTAMP

SAMPLE FULL-TEXT RECORDS

Record 1:
  ID: 8acf3d76-a464-49b9-9f9d-efaa95b1dc3c
  Ticker: NVDA
  Title: Stock market today: Dow, S&P 500, Nasdaq futures hover after Wall Street's latest record run
  Summary: US stock futures were little changed late Monday after Wall Street's latest record-setting run.
  Description: 
  Full Text (for search): Stock market today: Dow, S&P 500, Nasdaq futures hover after Wall Street's latest record run US stock futures were little changed late Monday after Wall Street's latest record-setting run....
  Publisher: Yahoo Finance
  Content Type: STORY
  Publish Time: 2025-10-06 22:12:48+00:00
  Is Premi

In [29]:
newsFile = "/Users/jdacosta/Library/CloudStorage/GoogleDrive-john.dacosta@snowflake.com/My Drive/_local/Downloads/_cursor_demos/Snowflake_Intelligence_HOL/jdacosta/pricehistory/data/news/NVDA/news-NVDA.parquet"

In [30]:
dfNews = pd.read_parquet(newsFile)

In [31]:
dfNews

Unnamed: 0,TICKER,ID,TITLE,SUMMARY,DESCRIPTION,PUBLISHER,LINK,PUBLISH_TIME,DISPLAY_TIME,CONTENT_TYPE,THUMBNAIL_URL,IS_PREMIUM,IS_HOSTED,DOWNLOAD_TIMESTAMP
0,NVDA,7b5190a0-be5b-3d46-87d1-f497c2925de0,"Dow Jones Futures Fall As AMD Soars On OpenAI, AppLovin Dives; Cheap Tesla Model Y On Tap?As Hot Stock Dives",The Nasdaq hit a fresh high as the AMD-OpenAI deal fueled many stocks. Tesla jumped ahead of big news. AppLovin dived on an SEC probe report.,,Investor's Business Daily,https://www.investors.com/market-trend/stock-market-today/dow-jones-futures-nasdaq-high-amd-openai-deal-tesla-stock-news/?src=A00220&yptr=yahoo,2025-10-07 02:03:54+00:00,2025-10-07 02:03:54+00:00,STORY,https://s.yimg.com/uu/api/res/1.2/TrGGkBu9xrTeGsfZyv_VFQ--~B/Zmk9c3RyaW07aD0xMjg7dz0xNzA7YXBwaWQ9eXRhY2h5b24-/https://media.zenfs.com/en/ibd.com/2b18bc6a2254941f70591a54e89a49b0,False,False,2025-10-06 22:41:41.582781
1,NVDA,61cc31e2-0627-3b2d-b418-ab206e5d9ce7,Surprising AI deal puts AMD under the spotlight,AMD stock has increased 73% year-to-date.,,TheStreet,https://www.thestreet.com/technology/surprising-ai-deal-sends-amd-stock-skyrocketing,2025-10-07 01:37:00+00:00,2025-10-07 01:37:00+00:00,STORY,https://s.yimg.com/uu/api/res/1.2/NCrSX6rTGAZfz6eD7gBbQg--~B/Zmk9c3RyaW07aD0xMjg7dz0xNzA7YXBwaWQ9eXRhY2h5b24-/https://media.zenfs.com/en/thestreet_881/cc3a856b04dbe2b49c4ac3c6474b0461,False,True,2025-10-06 22:41:41.583185
2,NVDA,530d0afc-1034-3fe2-9222-8060dfb0b328,2 Electric Grid Stocks to Buy as Soaring Artificial Intelligence (AI) Demand Drives Surging Electricity Demand,A great reason to buy electric grid stocks: Nvidia and OpenAI's AI deal alone will need more power than New York City's average demand!,,Motley Fool,https://www.fool.com/investing/2025/10/06/best-ai-stocks-electric-grid-utility-stocks-to-buy/,2025-10-07 01:03:00+00:00,2025-10-07 01:03:00+00:00,STORY,https://s.yimg.com/uu/api/res/1.2/Eg95c4dtFrvoZM0MgyjfOA--~B/Zmk9c3RyaW07aD0xMjg7dz0xNzA7YXBwaWQ9eXRhY2h5b24-/https://media.zenfs.com/en/motleyfool.com/02a212314ed374e7009bc1cd0bb88214,False,True,2025-10-06 22:41:41.583589
3,NVDA,7bd9ab1b-2640-323b-bd8b-6e6db54e8c20,This Artificial Intelligence (AI) Stock Is Quietly Outperforming Nvidia in 2025,This company is essential to the success of Nvidia and nearly every other major chipmaker.,,Motley Fool,https://www.fool.com/investing/2025/10/06/this-artificial-intelligence-ai-stock-is-quietly-o/,2025-10-07 01:02:00+00:00,2025-10-07 01:02:00+00:00,STORY,https://s.yimg.com/uu/api/res/1.2/Qxr_Ur53rISLLAI.A6eSDw--~B/Zmk9c3RyaW07aD0xMjg7dz0xNzA7YXBwaWQ9eXRhY2h5b24-/https://media.zenfs.com/en/motleyfool.com/60af19dbac32f6714a7ad0e1a037492b,False,True,2025-10-06 22:41:41.584002
4,NVDA,8acf3d76-a464-49b9-9f9d-efaa95b1dc3c,"Stock market today: Dow, S&P 500, Nasdaq futures hover after Wall Street's latest record run",US stock futures were little changed late Monday after Wall Street's latest record-setting run.,,Yahoo Finance,https://finance.yahoo.com/news/live/stock-market-today-dow-sp-500-nasdaq-futures-hover-after-wall-streets-latest-record-run-221248313.html,2025-10-06 22:12:48+00:00,2025-10-06 22:12:49+00:00,STORY,https://s.yimg.com/uu/api/res/1.2/Xd0uKXIdqkpMPTtNdsiKSg--~B/Zmk9c3RyaW07aD0xMjg7dz0xNzA7YXBwaWQ9eXRhY2h5b24-/https://s.yimg.com/os/creatr-uploaded-images/2025-08/cb17a5c0-7865-11f0-bb7e-62177fee3fee,False,True,2025-10-06 22:41:41.580708
5,NVDA,be690b22-890e-3dc7-aa0c-1147985cd5cc,OpenAI is putting its 'tentacles into everything.' Here's why.,OpenAI (OPAI.PVT) is holding its third annual developer conference after announcing partnerships with chipmakers Nvidia (NVDA) and AMD (AMD). Intelligent Alpha founder and CEO Doug Clinton joins M...,"<p>OpenAI (<a data-i13n=""cpos:1;pos:1"" href=""https://finance.yahoo.com/quote/OPAI.PVT"">OPAI.PVT</a>) is holding its third annual developer conference after announcing partnerships with chipmakers ...",Yahoo Finance Video,https://finance.yahoo.com/video/openai-putting-tentacles-everything-heres-202942361.html,2025-10-06 20:29:42+00:00,NaT,VIDEO,https://s.yimg.com/uu/api/res/1.2/39EgL.94e.OIuELiX2Wx0g--~B/Zmk9c3RyaW07aD0xMjg7dz0xNzA7YXBwaWQ9eXRhY2h5b24-/https://s.yimg.com/os/creatr-uploaded-images/2025-10/1da5ab60-a2f1-11f0-bdff-41da60098d47,False,True,2025-10-06 16:55:58.675939
6,NVDA,51e56900-ac09-3624-8495-f6c9a639f163,"OpenAI, AMD Announce Massive Computing Deal, Marking New Phase of AI Boom",The five-year agreement will challenge Nvidia’s market dominance and gives OpenAI 10% of AMD if it hits milestones for chip deployment.,,The Wall Street Journal,https://www.wsj.com/tech/ai/openai-amd-deal-ai-chips-ed92cc42?siteid=yhoof2&yptr=yahoo,2025-10-06 20:29:00+00:00,2025-10-06 20:29:00+00:00,STORY,https://s.yimg.com/uu/api/res/1.2/gsWaxOjGuAN9Ew_qh2qWAQ--~B/Zmk9c3RyaW07aD0xMjg7dz0xNzA7YXBwaWQ9eXRhY2h5b24-/https://media.zenfs.com/en/wsj.com/3f372a30bd46d281f0554da641785617,False,False,2025-10-06 16:55:58.677555
7,NVDA,4731fc11-ab62-444a-af68-cddf98eca95e,AMD CEO Lisa Su says AI critics are 'thinking too small' after massive OpenAI deal,"Chipmaker AMD inked a multi-gigawatt GPU agreement with OpenAI, setting the stage for a potential 10-year AI supercycle.",,Yahoo Finance,https://finance.yahoo.com/news/amd-ceo-lisa-su-says-ai-critics-are-thinking-too-small-after-massive-openai-deal-202818700.html,2025-10-06 20:28:18+00:00,2025-10-06 20:28:19+00:00,STORY,https://s.yimg.com/uu/api/res/1.2/PxX7uqUOkGCwsrjbk3QimA--~B/Zmk9c3RyaW07aD0xMjg7dz0xNzA7YXBwaWQ9eXRhY2h5b24-/https://s.yimg.com/os/creatr-uploaded-images/2025-10/8b3d5a70-a2f0-11f0-bff3-5539b367b53e,False,True,2025-10-06 22:41:41.581199
8,NVDA,ef86cf58-c7db-384c-9ab2-841b0d1ee3b7,OpenAI Bets Big That AMD Can Catch Nvidia,Advanced Micro Devices is a significant underdog to Nvidia in the market for AI chips. The chip maker better known as AMD saw its stock price soar 24% Monday following a massive deal with OpenAI....,,The Wall Street Journal,https://www.wsj.com/livecoverage/stock-market-today-government-shutdown-10-06-2025/card/openai-bets-big-that-amd-can-catch-nvidia-TeuhIteBOHdwxlsXww8L?siteid=yhoof2&yptr=yahoo,2025-10-06 20:18:59+00:00,2025-10-06 20:18:59+00:00,STORY,https://s.yimg.com/uu/api/res/1.2/c34MZxHLZOfDYN2BBjM9ww--~B/Zmk9c3RyaW07aD0xMjg7dz0xNzA7YXBwaWQ9eXRhY2h5b24-/https://media.zenfs.com/en/wsj.com/7b2c285183be07ca3c4e8242b25a59a8,False,False,2025-10-06 16:55:58.677959
9,NVDA,dc33a07c-c0c0-3c3d-96d0-fdb4359b8bca,Why Verizon (VZ) Stock Is Trading Lower Today,"Shares of telecommunications giant Verizon (NYSE:VZ) fell 4.8% in the afternoon session after the company announced a surprise change in leadership, with former PayPal CEO Dan Schulman taking over...",,StockStory,https://finance.yahoo.com/news/why-verizon-vz-stock-trading-200555239.html,2025-10-06 20:05:55+00:00,2025-10-06 20:05:55+00:00,STORY,https://s.yimg.com/uu/api/res/1.2/2aY9HrsbvPkV09htmezBTg--~B/Zmk9c3RyaW07aD0xMjg7dz0xNzA7YXBwaWQ9eXRhY2h5b24-/https://media.zenfs.com/en/stockstory_922/711987a3fcbd038ec4fc4c13e90d2fa9,False,True,2025-10-06 16:55:58.678352
