In [1]:
# Cell 1: Setup News Intelligence System
print("📰 Setting up M&A News Intelligence System")
print("=" * 60)

# Core libraries
import requests
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import time
import sqlite3
import json
import re
import os

# RSS feed processing
import feedparser

# Web scraping
from bs4 import BeautifulSoup

# Text analysis and NLP
try:
    import nltk
    from textblob import TextBlob
    from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
    print("✅ NLP libraries loaded")
except ImportError as e:
    print(f"📦 Installing missing NLP libraries: {e}")
    import subprocess
    import sys
    
    # Install required packages
    packages = ['nltk', 'textblob', 'vaderSentiment']
    for package in packages:
        try:
            subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        except:
            print(f"⚠️ Could not install {package}")
    
    # Try importing again
    import nltk
    from textblob import TextBlob
    from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
    print("✅ NLP libraries installed and loaded")

# Download required NLTK data
try:
    nltk.data.find('tokenizers/punkt')
    nltk.data.find('corpora/stopwords')
    print("✅ NLTK data already available")
except LookupError:
    print("📥 Downloading NLTK data...")
    nltk.download('punkt', quiet=True)
    nltk.download('stopwords', quiet=True)
    nltk.download('vader_lexicon', quiet=True)
    print("✅ NLTK data downloaded")

# Configuration and database
sys.path.append('../src')
try:
    from config_loader import load_config, load_data_sources, get_database_path
    config = load_config()
    data_sources = load_data_sources()
    print("✅ Configuration loaded from Notebook 1")
except ImportError:
    print("⚠️ Could not load configuration from Notebook 1")
    print("💡 Will use backup configuration")
    
    # Backup configuration
    config = {
        'news_intelligence': {
            'ma_keywords': ['merger', 'acquisition', 'buyout', 'takeover', 'deal', 'acquire', 'divest'],
            'max_articles_per_source': 50
        }
    }
    data_sources = {
        'news_sources': {
            'rss_feeds': [
                {'name': 'Reuters Business', 'url': 'http://feeds.reuters.com/reuters/businessNews', 'priority': 'high'},
                {'name': 'MarketWatch', 'url': 'http://feeds.marketwatch.com/marketwatch/topstories/', 'priority': 'high'},
                {'name': 'Yahoo Finance', 'url': 'https://finance.yahoo.com/news/rssindex', 'priority': 'medium'}
            ]
        }
    }

# Initialize sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

# Database connection
try:
    db_path = get_database_path() if 'get_database_path' in globals() else "../data/processed/ma_intelligence.db"
    db_connection = sqlite3.connect(db_path)
    print(f"✅ Connected to database: {db_path}")
except Exception as e:
    print(f"⚠️ Database connection issue: {e}")
    db_path = "../data/processed/ma_intelligence.db"
    db_connection = sqlite3.connect(db_path)
    print(f"✅ Connected to backup database path")

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 100)

print(f"\n📊 NEWS INTELLIGENCE SETUP COMPLETE!")
print(f"🎯 M&A Keywords: {config['news_intelligence']['ma_keywords']}")
print(f"📡 News Sources: {len(data_sources['news_sources']['rss_feeds'])} RSS feeds configured")
print(f"🗄️ Database: Ready for article storage and analysis")
print(f"📅 Session started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

print(f"\n🚀 Ready to collect and analyze M&A news!")

📰 Setting up M&A News Intelligence System
📦 Installing missing NLP libraries: No module named 'vaderSentiment'
✅ NLP libraries installed and loaded
✅ NLTK data already available
✅ Configuration loaded from Notebook 1
✅ Connected to database: ../data/processed/ma_intelligence.db

📊 NEWS INTELLIGENCE SETUP COMPLETE!
🎯 M&A Keywords: ['merger', 'acquisition', 'buyout', 'takeover', 'deal', 'acquire', 'divest', 'strategic review', 'strategic alternatives', 'spin-off', 'restructuring', 'consolidation']
📡 News Sources: 4 RSS feeds configured
🗄️ Database: Ready for article storage and analysis
📅 Session started: 2025-08-27 15:17:11

🚀 Ready to collect and analyze M&A news!
