In [None]:
!pip install -q google-generativeai

from google.colab import userdata
import requests
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from datetime import datetime, timedelta
import google.generativeai as genai
import json
import time
import re

# --- API Keys ---
NEWSDATA_API_KEY = userdata.get("NEWSDATA_API_KEY")
GEMINI_API_KEY = userdata.get("GEMINI_API_KEY")

# --- Configure Gemini ---
genai.configure(api_key=GEMINI_API_KEY)
model = genai.GenerativeModel('gemini-2.0-flash')

# --- Load FinBERT ---
print("Loading FinBERT model...")
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
finbert_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
print("FinBERT model loaded successfully!")

# --- Logger ---
def log(msg):
    print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] - {msg}")

ticker_cache = {}

# --- Sentiment Analysis ---
def get_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = finbert_model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
        score, idx = torch.max(probs, dim=1)
        label = finbert_model.config.id2label[idx.item()]
    return label, score.item()

# --- Gemini for Ticker Extraction ---
def get_ticker_from_gemini(headline):
    cache_key = headline.strip().lower()
    if cache_key in ticker_cache:
        log("Cache hit")
        return ticker_cache[cache_key]

    company_hints = []
    patterns = [
        r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*)\s+(?:shares?|stock)',
        r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)*(?:\s+(?:Industries|Limited|Ltd|Bank|Motors|Energy|Power|Systems))?)\s+(?:shares?|stock|reports?|posts?|announces?)'
    ]
    for pattern in patterns:
        company_hints += re.findall(pattern, headline)

    hint_text = f"\nCompany names detected in headline: {', '.join(set(company_hints))}" if company_hints else ""

    prompt = f"""
You are a seasoned financial analyst specializing in Indian stock markets.

Headline: "{headline}"{hint_text}

Return ONLY valid JSON:
{{"company_name": "Full Official Company Name", "ticker_symbol": "NSE_SYMBOL"}}
"""

    try:
        response = model.generate_content(
            prompt,
            generation_config=genai.types.GenerationConfig(
                temperature=0.1,
                max_output_tokens=150,
            )
        )
        response_text = response.text.strip()
        response_text = re.sub(r'^```json|```$', '', response_text).strip()
        result = json.loads(response_text)
        ticker_cache[cache_key] = result

        if result["company_name"].upper() == "NONE":
            return None, None
        return result["company_name"], result["ticker_symbol"]

    except Exception as e:
        log(f"❌ Gemini error: {e}")
        return None, None

# --- Fetch Latest Financial Headlines ---
def get_recent_headlines_newsdata(api_key, count=10):
    url = "https://newsdata.io/api/1/latest"
    params = {
        "apikey": api_key,
        "q": "finance OR stock OR IPO OR investment OR market OR NSE OR BSE",
        "country": "in",
        "language": "en",
        "category": "business"
    }

    try:
        response = requests.get(url, params=params)
        data = response.json()

        if data.get("status") != "success":
            log(f"⚠️ NewsData.io error: {data.get('message')}")
            return []

        results = data.get("results", [])
        sorted_articles = sorted(
            [item for item in results if "title" in item and "pubDate" in item],
            key=lambda x: x["pubDate"],
            reverse=True
        )

        headlines = [{
            "title": item["title"],
            "published": item["pubDate"]
        } for item in sorted_articles]

        log(f"✅ Fetched {len(headlines)} sorted financial headlines")
        return headlines[:count]

    except Exception as e:
        log(f"❌ Error fetching news: {e}")
        return []


# --- Analyze One Headline ---
def process_single_headline(entry):
    try:
        headline = entry["title"]
        log(f"\nProcessing headline: {headline}")
        log(f"🕒 Published at: {entry['published']}")

        company, symbol = get_ticker_from_gemini(headline)

        if company and symbol:
            label, score = get_sentiment(headline)
            log(f"📰 HEADLINE: {headline}")
            log(f"🏢 COMPANY: {company} ({symbol})")
            log(f"📊 SENTIMENT: {label} (Confidence: {score:.3f})")
        else:
            log("❌ Company not identified")

    except Exception as e:
        log(f"❌ Error: {e}")

# --- Main ---
def analyze_news_sentiment():
    log("=" * 80)
    log("🚀 STARTING FINANCIAL NEWS SENTIMENT ANALYSIS (Last 1 Hour Only)")
    log("=" * 80)

    headlines = get_recent_headlines_newsdata(api_key=NEWSDATA_API_KEY, count=15)

    if not headlines:
        log("❌ No headlines found. Exiting.")
        return

    for i, entry in enumerate(headlines, 1):
        log(f"\n[{i}/{len(headlines)}] → Analyzing")
        process_single_headline(entry)
        time.sleep(0.5)

    log("=" * 80)
    log("✅ Completed analysis of all fresh news items")
    log("=" * 80)

# --- Run ---
analyze_news_sentiment()


Loading FinBERT model...
FinBERT model loaded successfully!
[2025-06-28 06:51:29] - 🚀 STARTING FINANCIAL NEWS SENTIMENT ANALYSIS (Last 1 Hour Only)
[2025-06-28 06:51:30] - ✅ Fetched 10 sorted financial headlines
[2025-06-28 06:51:30] - 
[1/10] → Analyzing
[2025-06-28 06:51:30] - 
Processing headline: Wakefit seeks Rs 468 crore in IPO, major backers plan partial exit
[2025-06-28 06:51:30] - 🕒 Published at: 2025-06-27 18:47:10
[2025-06-28 06:51:32] - 📰 HEADLINE: Wakefit seeks Rs 468 crore in IPO, major backers plan partial exit
[2025-06-28 06:51:32] - 🏢 COMPANY: Wakefit Innovations Private Limited (N/A)
[2025-06-28 06:51:32] - 📊 SENTIMENT: neutral (Confidence: 0.918)
[2025-06-28 06:51:32] - 
[2/10] → Analyzing
[2025-06-28 06:51:32] - 
Processing headline: Gilead Stock Rises On US Supreme Court Ruling In Favor Of Health Insurers Covering Preventive Care
[2025-06-28 06:51:32] - 🕒 Published at: 2025-06-27 18:45:46
[2025-06-28 06:51:34] - 📰 HEADLINE: Gilead Stock Rises On US Supreme Court Ru