In [None]:
# FINANCIAL NEWS SENTIMENT ANALYZER (FIXED VERSION)
# Enhanced to reliably detect financial markets

# [1] Install packages (if needed)
!pip install transformers torch pandas requests tqdm python-dotenv

# [2] Import libraries
from transformers import pipeline
import pandas as pd
import requests
from datetime import datetime, timedelta
from tqdm import tqdm
import os
from dotenv import load_dotenv

# Initialize environment
load_dotenv()

# [3] Financial Market Detection Config (UPDATED)
MARKET_KEYWORDS = {
    "Stocks": ["stock", "equity", "share", "S&P", "NASDAQ", "Dow", "index", "bull", "bear", "market"],
    "Forex": ["forex", "currency", "USD", "EUR", "GBP", "JPY", "FX", "exchange rate", "dollar", "yen"],
    "Commodities": ["oil", "gold", "silver", "commodity", "crude", "barrel", "ounce", "copper", "futures"],
    "Economy": ["GDP", "inflation", "CPI", "unemployment", "economic", "growth", "recession", "Fed", "ECB", "central bank"],
    "Crypto": ["bitcoin", "crypto", "blockchain", "BTC", "ETH", "digital currency", "token", "DeFi"],
    "Bonds": ["bond", "yield", "treasury", "debt", "10-year", "notes", "credit rating"]
}

# [4] Relaxed Financial Content Filter (FIXED)
def is_financial(text):
    """More inclusive financial content detection"""
    text_lower = text.lower()
    
    # Must contain at least 2 financial terms
    financial_terms = sum(
        1 for terms in MARKET_KEYWORDS.values() 
        for term in terms if term in text_lower
    )
    
    # Exclude non-financial content
    non_financial = any(
        term in text_lower 
        for term in ["sports", "entertainment", "celebrity", "movie", "music"]
    )
    
    return financial_terms >= 2 and not non_financial

# [5] News Fetcher with Better Filtering (FIXED)
def fetch_news():
    """Get news from multiple sources with improved filtering and date handling"""
    sources = [
        ("NewsAPI", "https://newsapi.org/v2/top-headlines?category=business&apiKey=" + os.getenv('NEWSAPI_KEY')),
        ("AlphaVantage", "https://www.alphavantage.co/query?function=NEWS_SENTIMENT&apikey=" + os.getenv('ALPHAVANTAGE_KEY'))
    ]

    seen_texts = set()
    all_news = []

    for source_name, url in sources:
        try:
            response = requests.get(url, timeout=10)
            data = response.json()

            if source_name == "NewsAPI":
                for article in data.get('articles', []):
                    title = article.get('title', '')
                    description = article.get('description', '')
                    text = f"{title}. {description}".strip()
                    published = article.get('publishedAt', '')[:10]  # 'YYYY-MM-DD'

                    if is_financial(text) and text not in seen_texts:
                        seen_texts.add(text)
                        all_news.append({"text": text[:1000], "date": published})

            elif source_name == "AlphaVantage":
                for item in data.get('feed', []):
                    title = item.get('title', '')
                    summary = item.get('summary', '')
                    text = f"{title}. {summary}".strip()
                    published = item.get('time_published', '')[:8]  # 'YYYYMMDD'
                    published = f"{published[:4]}-{published[4:6]}-{published[6:]}"  # format to YYYY-MM-DD

                    if is_financial(text) and text not in seen_texts:
                        seen_texts.add(text)
                        all_news.append({"text": text[:1000], "date": published})

        except Exception as e:
            print(f"⚠️ {source_name} error: {str(e)}")

    return all_news

# [6] Initialize Models
print("Loading AI models...")
finbert = pipeline("text-classification", model="yiyanghkust/finbert-tone")
emotion = pipeline("text-classification", model="SamLowe/roberta-base-go_emotions")

# [7] Enhanced Signal Generator
def get_signal(text):
    """Generate trading signal with market context"""
    try:
        sentiment = finbert(text[:512])[0]['label']
        emotion_label = emotion(text[:512])[0]['label']
        
        # Market-specific adjustments
        market = next(
            (mkt for mkt, terms in MARKET_KEYWORDS.items() 
             if any(term in text.lower() for term in terms)),
            "General"
        )
        
        # Score calculation
        score = {
            "Positive": 1.0,
            "Neutral": 0.0,
            "Negative": -1.0
        }[sentiment]
        
        # Emotion adjustments
        score += {
            "joy": 0.3, "optimism": 0.2, "excitement": 0.1,
            "fear": -0.3, "anger": -0.2, "annoyance": -0.1
        }.get(emotion_label.lower(), 0)
        
        # Market multipliers
        score *= {
            "Stocks": 1.2,
            "Crypto": 1.3,
            "Commodities": 1.1
        }.get(market, 1.0)
        
        # Generate signal
        if score >= 1.5: return "🟢 Strong Buy"
        elif score >= 0.7: return "🟢 Buy"
        elif score <= -1.5: return "🔴 Strong Sell"
        elif score <= -0.7: return "🟠 Sell"
        return "⚪ Neutral"
        
    except Exception as e:
        print(f"⚠️ Analysis error: {str(e)}")
        return "⚪ Neutral"

# [8] Main Analysis Function (FIXED)
def analyze_financial_markets():
    """Run complete market analysis"""
    print("\n🔍 Fetching financial news...")
    news_items = fetch_news()

    results = []
    for market, keywords in MARKET_KEYWORDS.items():
        market_news = [
            item for item in news_items
            if any(keyword in item["text"].lower() for keyword in keywords)
        ]

        print(f"\n📊 {market} Sector ({len(market_news)} articles)")
        for item in tqdm(market_news, desc=market):
            results.append({
                "Market": market,
                "Headline": item["text"][:100] + "..." if len(item["text"]) > 100 else item["text"],
                "Signal": get_signal(item["text"]),
                "Date": item["date"]
            })

    return pd.DataFrame(results)
# [9] Run Analysis
results_df = analyze_financial_markets()

# [10] Display Results
if not results_df.empty:
    print("\n📈 Financial Market Signals")
    print("="*50)
    display(results_df.style.apply(
        lambda x: ["background: darkgreen" if "Strong Buy" in v 
                  else "background: lightgreen" if "Buy" in v
                  else "background: darkred" if "Strong Sell" in v
                  else "background: salmon" if "Sell" in v 
                  else "" for v in x],
        subset=['Signal']
    ))
else:
    print("\n⚠️ No financial market data found - check your news sources")

# [11] Save Results
results_df.to_csv(f"market_signals_{datetime.now().strftime('%Y%m%d')}.csv", index=False)
print("\n✅ Analysis saved to CSV")