In [None]:
# Run-on-demand version: fetch news from last N days, analyze sentiment, save CSV
import requests
import time
from datetime import datetime, timedelta, timezone
import os
import sys
import subprocess

# Install required packages if missing (works inside notebook)
def ensure_pkg(module_name, pip_name=None):
    try:
        __import__(module_name)
    except ImportError:
        name = pip_name if pip_name else module_name
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', name])

ensure_pkg('pandas')
ensure_pkg('pytz')
ensure_pkg('vaderSentiment')
ensure_pkg('python-dotenv')

import pandas as pd
import pytz
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from dotenv import load_dotenv

# Load environment variables from .env if present
load_dotenv()

# Stock symbols (15) â€” unchanged
STOCKS = {
    'NVIDIA': 'NVDA',
    'Apple': 'AAPL',
    'Alphabet (Google)': 'GOOGL',
    'Microsoft': 'MSFT',
    'Amazon': 'AMZN',
    'Broadcom': 'AVGO',
    'Saudi Aramco': '2222.SR',
    'Meta Platforms': 'META',
    'TSMC': 'TSM',
    'Tesla': 'TSLA',
    'Berkshire Hathaway': 'BRK.B',
    'Eli Lilly': 'LLY',
    'Walmart': 'WMT',
    'JPMorgan Chase': 'JPM',
    'Tencent': 'TCEHY'
}

def read_api_key():
    """Return AlphaVantage API key from environment (.env or system env)."""
    key = os.environ.get('ALPHAVANTAGE_API_KEY')
    return key.strip() if key else None


def fetch_news_list(symbol, company_name, api_key):
    """Fetch news list for a ticker using AlphaVantage NEWS_SENTIMENT. Returns list (may be empty)."""
    url = f'https://www.alphavantage.co/query?function=NEWS_SENTIMENT&tickers={symbol}&apikey={api_key}'
    try:
        resp = requests.get(url, timeout=30)
        data = resp.json()
        if not isinstance(data, dict):
            return []
        feed = data.get('feed') or data.get('articles') or []
        return feed or []
    except Exception as e:
        print(f'Error fetching news for {company_name} ({symbol}): {e}')
        return []

def parse_iso_datetime(s):
    if not s:
        return None
    s = s.strip()
    try:
        if s.endswith('Z'):
            return datetime.fromisoformat(s.replace('Z', '+00:00'))
        return datetime.fromisoformat(s)
    except Exception:
        from datetime import datetime as _dt
        for fmt in ('%Y-%m-%dT%H:%M:%S%z', '%Y-%m-%d %H:%M:%S', '%a, %d %b %Y %H:%M:%S %Z'):
            try:
                return _dt.strptime(s, fmt)
            except Exception:
                continue
    return None

analyzer = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    if not text:
        return 0.0
    return analyzer.polarity_scores(text)['compound']

def recommendation_from_score(score, buy_thresh=0.05, sell_thresh=-0.05):
    if score is None:
        return None
    if score >= buy_thresh:
        return 'Buy'
    elif score <= sell_thresh:
        return 'Sell'
    else:
        return 'Hold'

def fetch_all_news_and_analyze(days=14, max_per_stock=None):
    """Fetch news from the past `days` days for each stock, analyze sentiment, and save CSV."""
    api_key = read_api_key()
    if not api_key:
        print('No API key available. Set ALPHAVANTAGE_API_KEY in your .env or environment.')
        return None

    cutoff = datetime.utcnow() - timedelta(days=days)
    results = []
    for company_name, symbol in STOCKS.items():
        feed = fetch_news_list(symbol, company_name, api_key)
        if not feed:
            print(f'No articles for {company_name} ({symbol})')
        count = 0
        for article in feed:
            pub_raw = article.get('time_published') or article.get('published') or article.get('published_at')
            pub = parse_iso_datetime(pub_raw)
            if pub is None:
                continue
            # normalize to UTC naive for comparison
            if pub.tzinfo is not None:
                pub_naive = pub.astimezone(timezone.utc).replace(tzinfo=None)
            else:
                pub_naive = pub
            if pub_naive < cutoff:
                continue
            title = article.get('title') or ''
            summary = article.get('summary') or article.get('summary_unescaped') or ''
            text_for_sentiment = (title + ' ' + summary).strip() or title
            score = analyze_sentiment(text_for_sentiment) if text_for_sentiment else None
            rec = recommendation_from_score(score) if score is not None else None
            results.append({
                'date': pub_naive.strftime('%Y-%m-%d'),
                'company': company_name,
                'symbol': symbol,
                'title': title,
                'source': article.get('source'),
                'published': pub.isoformat(),
                'url': article.get('url'),
                'sentiment_score': score,
                'recommendation': rec
            })
            count += 1
            if max_per_stock is not None and count >= max_per_stock:
                break
        # Respect Alpha Vantage free tier: 5 calls per minute
        time.sleep(12)

    if not results:
        print('No recent articles found for any stock in the given window.')
    df = pd.DataFrame(results)
    out_filename = f"sentiment_stock_news_{datetime.utcnow().strftime('%Y%m%d')}.csv"
    df.to_csv(out_filename, index=False)
    print(f'Saved sentiment results to {out_filename}')
    return df

if __name__ == '__main__':
    # Run on-demand when the notebook/script is executed
    fetch_all_news_and_analyze()


  cutoff = datetime.utcnow() - timedelta(days=days)


No articles for Alphabet (Google) (GOOGL)
No articles for Saudi Aramco (2222.SR)
No articles for Saudi Aramco (2222.SR)
No articles for Berkshire Hathaway (BRK.B)
No articles for Berkshire Hathaway (BRK.B)
No articles for JPMorgan Chase (JPM)
No articles for JPMorgan Chase (JPM)
Saved sentiment results to sentiment_stock_news_20251125.csv
Saved sentiment results to sentiment_stock_news_20251125.csv


  out_filename = f'sentiment_stock_news_{datetime.utcnow().strftime('%Y%m%d')}.csv'
