<a href="https://colab.research.google.com/github/Sumant-crty/Python-Data-Scraping-Portfolio/blob/main/MSNMB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install feedparser requests beautifulsoup4
import feedparser
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import time

class BengaliNewsAggregator:
    def __init__(self):
        """Initialize the news aggregator with multiple sources"""
        self.rss_sources = {
            # Google News - Bengali
            'Google News - Bengal': 'https://news.google.com/rss/search?q=bengal+OR+kolkata&hl=bn&gl=IN&ceid=IN:bn',
            'Google News - India (Bengali)': 'https://news.google.com/rss/search?q=india&hl=bn&gl=IN&ceid=IN:bn',
            'Google News - West Bengal': 'https://news.google.com/rss/search?q=west+bengal&hl=en&gl=IN&ceid=IN:en',

            # English News RSS
            'The Telegraph Bengal': 'https://www.telegraphindia.com/feeds/rss/bengal',
            'The Hindu - West Bengal': 'https://www.thehindu.com/news/national/west-bengal/feeder/default.rss',
            'Times of India - Kolkata': 'https://timesofindia.indiatimes.com/rssfeeds/2279055.cms',
            'Indian Express - Kolkata': 'https://indianexpress.com/section/cities/kolkata/feed/',
            'NDTV - Kolkata': 'https://feeds.feedburner.com/ndtv/TIXd',

            # National News RSS
            'The Hindu - National': 'https://www.thehindu.com/news/national/feeder/default.rss',
            'Times of India - India': 'https://timesofindia.indiatimes.com/rssfeeds/-2128936835.cms',
            'India Today': 'https://www.indiatoday.in/rss/home',
            'NDTV News': 'https://feeds.feedburner.com/ndtvnews-latest',
            'Hindustan Times': 'https://www.hindustantimes.com/feeds/rss/india-news/rssfeed.xml',
            'The Indian Express': 'https://indianexpress.com/feed/',

            # Business & Economy
            'Economic Times': 'https://economictimes.indiatimes.com/rssfeedstopstories.cms',
            'Business Standard': 'https://www.business-standard.com/rss/home_page_top_stories.rss',
            'Mint': 'https://www.livemint.com/rss/news',

            # Sports
            'Sports - Times of India': 'https://timesofindia.indiatimes.com/rssfeeds/4719148.cms',
            'Cricket News': 'https://www.thehindu.com/sport/cricket/feeder/default.rss',

            # Technology
            'Tech News India': 'https://indianexpress.com/section/technology/feed/',
        }

        self.scraping_sources = [
            {
                'name': 'Ei Samay',
                'url': 'https://eisamay.com/',
                'method': self.scrape_eisamay
            },
            {
                'name': 'Sangbad Pratidin',
                'url': 'https://www.sangbadpratidin.in/',
                'method': self.scrape_sangbad_pratidin
            },
            {
                'name': 'ABP Ananda',
                'url': 'https://www.abpananda.com/',
                'method': self.scrape_abp_ananda
            },
            {
                'name': 'Zee 24 Ghanta',
                'url': 'https://zeenews.india.com/bengali',
                'method': self.scrape_zee24ghanta
            },
            {
                'name': 'News18 Bengali',
                'url': 'https://bengali.news18.com/',
                'method': self.scrape_news18_bengali
            },
            {
                'name': 'The Telegraph Kolkata',
                'url': 'https://www.telegraphindia.com/west-bengal',
                'method': self.scrape_telegraph
            },
            {
                'name': 'Bengal Live',
                'url': 'https://bengallive.com/',
                'method': self.scrape_bengal_live
            },
            {
                'name': 'Bartaman Patrika',
                'url': 'https://bartamanpatrika.com/',
                'method': self.scrape_bartaman
            }
        ]

        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.5',
            'Connection': 'keep-alive',
        }

    def fetch_rss_feed(self, url, source_name):
        """Fetch news from RSS feed"""
        headlines = []
        try:
            print(f"Fetching from {source_name}...")
            feed = feedparser.parse(url)

            if feed.bozo:
                print(f"  Warning: Feed parsing issue for {source_name}")

            for entry in feed.entries[:20]:  # Get top 20 from each source
                title = entry.get('title', 'No title')
                link = entry.get('link', '#')
                published = entry.get('published', entry.get('updated', 'Recent'))

                headlines.append({
                    'title': title,
                    'link': link,
                    'source': source_name,
                    'published': published
                })

            print(f"  ‚úì Found {len(headlines)} headlines from {source_name}")

        except Exception as e:
            print(f"  ‚úó Error fetching {source_name}: {str(e)}")

        return headlines

    def scrape_eisamay(self, url):
        """Scrape Ei Samay"""
        headlines = []
        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            # Try multiple selectors
            selectors = ['h2 a', 'h3 a', '.headline a', 'article a']
            for selector in selectors:
                articles = soup.select(selector)
                for article in articles[:20]:
                    title = article.get_text(strip=True)
                    link = article.get('href', '')

                    if link and not link.startswith('http'):
                        link = 'https://eisamay.com' + link

                    if title and len(title) > 15 and link:
                        headlines.append({
                            'title': title,
                            'link': link,
                            'source': 'Ei Samay',
                            'published': 'Recent'
                        })

                if len(headlines) >= 15:
                    break

        except Exception as e:
            print(f"    Error: {str(e)}")

        return headlines[:15]

    def scrape_sangbad_pratidin(self, url):
        """Scrape Sangbad Pratidin"""
        headlines = []
        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            articles = soup.find_all(['h2', 'h3', 'h4'])
            for article in articles:
                link_tag = article.find('a')
                if link_tag:
                    title = link_tag.get_text(strip=True)
                    link = link_tag.get('href', '')

                    if link and not link.startswith('http'):
                        link = 'https://www.sangbadpratidin.in' + link

                    if title and len(title) > 15:
                        headlines.append({
                            'title': title,
                            'link': link,
                            'source': 'Sangbad Pratidin',
                            'published': 'Recent'
                        })

                if len(headlines) >= 15:
                    break

        except Exception as e:
            print(f"    Error: {str(e)}")

        return headlines

    def scrape_abp_ananda(self, url):
        """Scrape ABP Ananda"""
        headlines = []
        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            articles = soup.find_all('a', href=True)
            for article in articles:
                title = article.get_text(strip=True)
                link = article.get('href', '')

                if link and not link.startswith('http') and link.startswith('/'):
                    link = 'https://www.abpananda.com' + link

                if title and len(title) > 20 and 'abpananda.com' in link:
                    headlines.append({
                        'title': title,
                        'link': link,
                        'source': 'ABP Ananda',
                        'published': 'Recent'
                    })

                if len(headlines) >= 15:
                    break

        except Exception as e:
            print(f"    Error: {str(e)}")

        return headlines

    def scrape_zee24ghanta(self, url):
        """Scrape Zee 24 Ghanta"""
        headlines = []
        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            articles = soup.find_all('a', href=True)
            for article in articles:
                title = article.get_text(strip=True)
                link = article.get('href', '')

                if link and not link.startswith('http') and link.startswith('/'):
                    link = 'https://zeenews.india.com' + link

                if title and len(title) > 20 and 'bengali' in link:
                    headlines.append({
                        'title': title,
                        'link': link,
                        'source': 'Zee 24 Ghanta',
                        'published': 'Recent'
                    })

                if len(headlines) >= 15:
                    break

        except Exception as e:
            print(f"    Error: {str(e)}")

        return headlines

    def scrape_news18_bengali(self, url):
        """Scrape News18 Bengali"""
        headlines = []
        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            articles = soup.find_all(['h2', 'h3'])
            for article in articles:
                link_tag = article.find('a')
                if link_tag:
                    title = link_tag.get_text(strip=True)
                    link = link_tag.get('href', '')

                    if link and not link.startswith('http'):
                        link = 'https://bengali.news18.com' + link

                    if title and len(title) > 15:
                        headlines.append({
                            'title': title,
                            'link': link,
                            'source': 'News18 Bengali',
                            'published': 'Recent'
                        })

                if len(headlines) >= 15:
                    break

        except Exception as e:
            print(f"    Error: {str(e)}")

        return headlines

    def scrape_telegraph(self, url):
        """Scrape The Telegraph"""
        headlines = []
        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            articles = soup.find_all('a', href=True)
            for article in articles:
                title = article.get_text(strip=True)
                link = article.get('href', '')

                if (len(title) > 25 and
                    link.startswith('/') and
                    any(x in link.lower() for x in ['/bengal/', '/kolkata/', '/west-bengal/'])):

                    link = 'https://www.telegraphindia.com' + link
                    headlines.append({
                        'title': title,
                        'link': link,
                        'source': 'The Telegraph Kolkata',
                        'published': 'Recent'
                    })

                if len(headlines) >= 15:
                    break

        except Exception as e:
            print(f"    Error: {str(e)}")

        return headlines

    def scrape_bengal_live(self, url):
        """Scrape Bengal Live"""
        headlines = []
        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            articles = soup.find_all(['h2', 'h3', 'h4'])
            for article in articles:
                link_tag = article.find('a')
                if link_tag:
                    title = link_tag.get_text(strip=True)
                    link = link_tag.get('href', '')

                    if link and not link.startswith('http'):
                        link = 'https://bengallive.com' + link

                    if title and len(title) > 15:
                        headlines.append({
                            'title': title,
                            'link': link,
                            'source': 'Bengal Live',
                            'published': 'Recent'
                        })

                if len(headlines) >= 15:
                    break

        except Exception as e:
            print(f"    Error: {str(e)}")

        return headlines

    def scrape_bartaman(self, url):
        """Scrape Bartaman Patrika"""
        headlines = []
        try:
            response = requests.get(url, headers=self.headers, timeout=10)
            soup = BeautifulSoup(response.content, 'html.parser')

            articles = soup.find_all('a', href=True)
            for article in articles:
                title = article.get_text(strip=True)
                link = article.get('href', '')

                if link and not link.startswith('http') and link.startswith('/'):
                    link = 'https://bartamanpatrika.com' + link

                if title and len(title) > 20 and 'bartamanpatrika.com' in link:
                    headlines.append({
                        'title': title,
                        'link': link,
                        'source': 'Bartaman Patrika',
                        'published': 'Recent'
                    })

                if len(headlines) >= 15:
                    break

        except Exception as e:
            print(f"    Error: {str(e)}")

        return headlines

    def fetch_all_news(self):
        """Fetch news from all available sources"""
        all_headlines = []

        print("=" * 70)
        print(" " * 15 + "MULTI-SOURCE NEWS AGGREGATION")
        print("=" * 70)
        print()

        # Fetch from RSS feeds
        print("üì° Fetching from RSS Feeds...")
        print("-" * 70)
        for source_name, url in self.rss_sources.items():
            headlines = self.fetch_rss_feed(url, source_name)
            all_headlines.extend(headlines)
            time.sleep(0.5)  # Be polite

        print()
        print("üåê Attempting to scrape additional sources...")
        print("-" * 70)

        # Try scraping sources
        for source in self.scraping_sources:
            try:
                print(f"Attempting to scrape {source['name']}...")
                scraped_headlines = source['method'](source['url'])
                if scraped_headlines:
                    all_headlines.extend(scraped_headlines)
                    print(f"  ‚úì Found {len(scraped_headlines)} headlines from {source['name']}")
                else:
                    print(f"  ‚úó No headlines found from {source['name']}")
            except Exception as e:
                print(f"  ‚úó Could not scrape {source['name']}: {str(e)}")

            time.sleep(1)  # Be polite between requests

        print()
        print("=" * 70)
        print(f"Total headlines collected: {len(all_headlines)}")
        print("=" * 70)

        # Remove duplicates
        seen_titles = set()
        unique_headlines = []
        for headline in all_headlines:
            title_lower = headline['title'].lower().strip()
            if title_lower not in seen_titles and len(title_lower) > 10:
                seen_titles.add(title_lower)
                unique_headlines.append(headline)

        print(f"Unique headlines after deduplication: {len(unique_headlines)}")
        print()

        return unique_headlines

def generate_html(headlines):
    """Generate beautiful HTML page with all news headlines"""

    # Group headlines by source
    grouped_headlines = {}
    for headline in headlines:
        source = headline['source']
        if source not in grouped_headlines:
            grouped_headlines[source] = []
        grouped_headlines[source].append(headline)

    html_content = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>Multi-Source News Aggregator - Latest Headlines</title>
        <style>
            * {{
                margin: 0;
                padding: 0;
                box-sizing: border-box;
            }}

            body {{
                font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                min-height: 100vh;
                padding: 20px;
            }}

            .container {{
                max-width: 1400px;
                margin: 0 auto;
                background: white;
                border-radius: 15px;
                box-shadow: 0 20px 60px rgba(0,0,0,0.3);
                overflow: hidden;
            }}

            .header {{
                background: linear-gradient(135deg, #1e3c72 0%, #2a5298 100%);
                color: white;
                padding: 40px 30px;
                text-align: center;
            }}

            .header h1 {{
                font-size: 2.8em;
                margin-bottom: 10px;
                text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
            }}

            .header p {{
                font-size: 1.2em;
                opacity: 0.95;
            }}

            .stats {{
                display: flex;
                justify-content: space-around;
                background: #f8f9fa;
                padding: 20px;
                border-bottom: 2px solid #e9ecef;
                flex-wrap: wrap;
                gap: 15px;
            }}

            .stat-item {{
                text-align: center;
                min-width: 120px;
            }}

            .stat-number {{
                font-size: 2em;
                font-weight: bold;
                color: #667eea;
            }}

            .stat-label {{
                font-size: 0.9em;
                color: #666;
                margin-top: 5px;
            }}

            .timestamp {{
                background: #fff3cd;
                padding: 15px;
                text-align: center;
                color: #856404;
                font-size: 0.95em;
                border-bottom: 2px solid #ffc107;
            }}

            .filters {{
                padding: 20px 30px;
                background: #f8f9fa;
                border-bottom: 1px solid #dee2e6;
            }}

            .filter-search {{
                margin-bottom: 15px;
            }}

            .filter-search input {{
                width: 100%;
                padding: 12px 20px;
                border: 2px solid #667eea;
                border-radius: 25px;
                font-size: 1em;
                outline: none;
                transition: all 0.3s ease;
            }}

            .filter-search input:focus {{
                box-shadow: 0 0 10px rgba(102, 126, 234, 0.3);
            }}

            .filter-buttons {{
                display: flex;
                flex-wrap: wrap;
                gap: 10px;
                max-height: 200px;
                overflow-y: auto;
            }}

            .filter-btn {{
                padding: 8px 16px;
                border: 2px solid #667eea;
                background: white;
                color: #667eea;
                border-radius: 20px;
                cursor: pointer;
                transition: all 0.3s ease;
                font-size: 0.85em;
                white-space: nowrap;
            }}

            .filter-btn:hover {{
                background: #667eea;
                color: white;
            }}

            .filter-btn.active {{
                background: #667eea;
                color: white;
            }}

            .content {{
                padding: 30px;
            }}

            .source-section {{
                margin-bottom: 40px;
            }}

            .source-header {{
                display: flex;
                align-items: center;
                margin-bottom: 20px;
                padding-bottom: 10px;
                border-bottom: 3px solid #667eea;
            }}

            .source-icon {{
                width: 40px;
                height: 40px;
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                border-radius: 50%;
                display: flex;
                align-items: center;
                justify-content: center;
                color: white;
                font-weight: bold;
                margin-right: 15px;
                font-size: 1.2em;
            }}

            .source-name {{
                font-size: 1.4em;
                font-weight: bold;
                color: #2c3e50;
            }}

            .source-count {{
                margin-left: auto;
                background: #667eea;
                color: white;
                padding: 5px 15px;
                border-radius: 20px;
                font-size: 0.9em;
            }}

            .headlines-grid {{
                display: grid;
                grid-template-columns: repeat(auto-fill, minmax(320px, 1fr));
                gap: 20px;
            }}

            .headline-card {{
                background: white;
                border: 2px solid #e9ecef;
                border-radius: 10px;
                padding: 20px;
                transition: all 0.3s ease;
                position: relative;
                overflow: hidden;
            }}

            .headline-card::before {{
                content: '';
                position: absolute;
                left: 0;
                top: 0;
                height: 100%;
                width: 4px;
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                transform: scaleY(0);
                transition: transform 0.3s ease;
            }}

            .headline-card:hover {{
                border-color: #667eea;
                box-shadow: 0 5px 20px rgba(102, 126, 234, 0.2);
                transform: translateY(-3px);
            }}

            .headline-card:hover::before {{
                transform: scaleY(1);
            }}

            .headline-card a {{
                text-decoration: none;
                color: #2c3e50;
                display: block;
            }}

            .headline-title {{
                font-size: 1.05em;
                line-height: 1.6;
                margin-bottom: 10px;
                font-weight: 500;
            }}

            .headline-card:hover .headline-title {{
                color: #667eea;
            }}

            .headline-meta {{
                display: flex;
                justify-content: space-between;
                align-items: center;
                font-size: 0.85em;
                color: #6c757d;
                margin-top: 10px;
                padding-top: 10px;
                border-top: 1px solid #e9ecef;
            }}

            .published-date {{
                font-style: italic;
            }}

            .read-more {{
                color: #667eea;
                font-weight: 600;
            }}

            .no-headlines {{
                text-align: center;
                padding: 60px 20px;
                color: #666;
            }}

            .no-headlines-icon {{
                font-size: 4em;
                margin-bottom: 20px;
            }}

            .footer {{
                background: #2c3e50;
                color: white;
                padding: 30px;
                text-align: center;
            }}

            .footer-links {{
                margin-top: 15px;
            }}

            .footer-link {{
                color: #667eea;
                text-decoration: none;
                margin: 0 10px;
            }}

            .footer-link:hover {{
                text-decoration: underline;
            }}

            .source-list {{
                margin-top: 20px;
                font-size: 0.85em;
                opacity: 0.8;
                line-height: 1.8;
            }}

            @media (max-width: 768px) {{
                .headlines-grid {{
                    grid-template-columns: 1fr;
                }}

                .stats {{
                    flex-direction: column;
                }}

                .header h1 {{
                    font-size: 2em;
                }}

                .filter-buttons {{
                    max-height: 150px;
                }}
            }}
        </style>
    </head>
    <body>
        <div class="container">
            <div class="header">
                <h1>üì∞ Multi-Source News by Mr Bukkan</h1>
                <p>Latest Headlines from {len(grouped_headlines)} Different Sources</p>
            </div>

            <div class="stats">
                <div class="stat-item">
                    <div class="stat-number">{len(headlines)}</div>
                    <div class="stat-label">Total Headlines</div>
                </div>
                <div class="stat-item">
                    <div class="stat-number">{len(grouped_headlines)}</div>
                    <div class="stat-label">News Sources</div>
                </div>
                <div class="stat-item">
                    <div class="stat-number">Live</div>
                    <div class="stat-label">Status</div>
                </div>
            </div>

            <div class="timestamp">
                <strong>üïê Last Updated:</strong> {datetime.now().strftime('%B %d, %Y at %I:%M %p IST')}
            </div>

            <div class="filters">
                <div class="filter-search">
                    <input type="text" id="searchBox" placeholder="üîç Search headlines..." onkeyup="searchHeadlines()">
                </div>
                <div class="filter-buttons">
                    <button class="filter-btn active" onclick="filterSource('all')">All Sources ({len(headlines)})</button>
    """

    for source in sorted(grouped_headlines.keys()):
        html_content += f"""
                    <button class="filter-btn" onclick="filterSource('{source}')">{source} ({len(grouped_headlines[source])})</button>
        """

    html_content += """
                </div>
            </div>

            <div class="content" id="newsContent">
    """

    if headlines:
        for source in sorted(grouped_headlines.keys()):
            source_headlines = grouped_headlines[source]
            html_content += f"""
                <div class="source-section" data-source="{source}">
                    <div class="source-header">
                        <div class="source-icon">{source[0]}</div>
                        <div class="source-name">{source}</div>
                        <div class="source-count">{len(source_headlines)} articles</div>
                    </div>

                    <div class="headlines-grid">
            """

            for headline in source_headlines:
                published = headline.get('published', 'N/A')
                if len(published) > 50:
                    published = published[:50] + '...'

                html_content += f"""
                        <div class="headline-card">
                            <a href="{headline['link']}" target="_blank" rel="noopener noreferrer">
                                <div class="headline-title">{headline['title']}</div>
                                <div class="headline-meta">
                                    <span class="published-date">{published}</span>
                                    <span class="read-more">Read more ‚Üí</span>
                                </div>
                            </a>
                        </div>
                """

            html_content += """
                    </div>
                </div>
            """
    else:
        html_content += """
                <div class="no-headlines">
                    <div class="no-headlines-icon">üì≠</div>
                    <h2>No Headlines Available</h2>
                    <p>Please check back later or try refreshing the page.</p>
                </div>
        """

    html_content += """
            </div>

            <div class="footer">
                <p><strong>Multi-Source News by bukkan1309@gmail.com </strong></p>
                <p>Aggregating news from 30+ RSS feeds and web sources</p>
                <div class="source-list">
                    <strong>Sources Include:</strong> Google News, The Telegraph, The Hindu, Times of India, Indian Express,
                    NDTV, India Today, Hindustan Times, Economic Times, Business Standard, Mint, Ei Samay,
                    Sangbad Pratidin, ABP Ananda, Zee 24 Ghanta, News18 Bengali, Bengal Live, Bartaman Patrika and more
                </div>
                <div class="footer-links">
                    <a href="#" class="footer-link">About</a>
                    <a href="#" class="footer-link">Privacy</a>
                    <a href="#" class="footer-link">Sources</a>
                </div>
                <p style="margin-top: 15px; font-size: 0.85em; opacity: 0.8;">
                    All news content belongs to respective publishers
                </p>
            </div>
        </div>

        <script>
            function filterSource(source) {
                const sections = document.querySelectorAll('.source-section');
                const buttons = document.querySelectorAll('.filter-btn');

                buttons.forEach(btn => btn.classList.remove('active'));
                event.target.classList.add('active');

                // Clear search when filtering
                document.getElementById('searchBox').value = '';

                if (source === 'all') {
                    sections.forEach(section => section.style.display = 'block');
                } else {
                    sections.forEach(section => {
                        if (section.dataset.source === source) {
                            section.style.display = 'block';
                        } else {
                            section.style.display = 'none';
                        }
                    });
                }
            }

            function searchHeadlines() {
                const searchTerm = document.getElementById('searchBox').value.toLowerCase();
                const sections = document.querySelectorAll('.source-section');

                // Reset filter buttons
                const buttons = document.querySelectorAll('.filter-btn');
                buttons.forEach(btn => btn.classList.remove('active'));

                if (searchTerm === '') {
                    // Show all if search is empty
                    sections.forEach(section => {
                        section.style.display = 'block';
                        const cards = section.querySelectorAll('.headline-card');
                        cards.forEach(card => card.style.display = 'block');
                    });
                    document.querySelector('.filter-btn').classList.add('active');
                    return;
                }

                sections.forEach(section => {
                    const cards = section.querySelectorAll('.headline-card');
                    let hasVisibleCard = false;

                    cards.forEach(card => {
                        const title = card.querySelector('.headline-title').textContent.toLowerCase();
                        if (title.includes(searchTerm)) {
                            card.style.display = 'block';
                            hasVisibleCard = true;
                        } else {
                            card.style.display = 'none';
                        }
                    });

                    // Show/hide section based on whether it has visible cards
                    section.style.display = hasVisibleCard ? 'block' : 'none';
                });
            }

            // Add smooth scroll to top button
            window.onscroll = function() {
                if (document.body.scrollTop > 300 || document.documentElement.scrollTop > 300) {
                    if (!document.getElementById('scrollTopBtn')) {
                        const btn = document.createElement('button');
                        btn.id = 'scrollTopBtn';
                        btn.innerHTML = '‚Üë';
                        btn.style.cssText = `
                            position: fixed;
                            bottom: 30px;
                            right: 30px;
                            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                            color: white;
                            border: none;
                            border-radius: 50%;
                            width: 50px;
                            height: 50px;
                            font-size: 24px;
                            cursor: pointer;
                            box-shadow: 0 5px 15px rgba(0,0,0,0.3);
                            z-index: 1000;
                            transition: all 0.3s ease;
                        `;
                        btn.onclick = function() {
                            window.scrollTo({ top: 0, behavior: 'smooth' });
                        };
                        btn.onmouseover = function() {
                            this.style.transform = 'scale(1.1)';
                        };
                        btn.onmouseout = function() {
                            this.style.transform = 'scale(1)';
                        };
                        document.body.appendChild(btn);
                    }
                } else {
                    const btn = document.getElementById('scrollTopBtn');
                    if (btn) btn.remove();
                }
            };
        </script>
    </body>
    </html>
    """

    return html_content

def main():
    """Main function to run the news aggregator"""
    print("\n" + "="*70)
    print(" " * 10 + "MULTI-SOURCE NEWS AGGREGATOR")
    print(" " * 15 + "Powered by 30+ Sources")
    print("="*70 + "\n")

    # Create aggregator instance
    aggregator = BengaliNewsAggregator()

    # Fetch all news
    headlines = aggregator.fetch_all_news()

    if headlines:
        # Generate HTML
        print("Generating HTML page...")
        html_content = generate_html(headlines)

        # Save to file
        output_file = 'multi_source_news_aggregator.html'
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(html_content)

        print(f"\n‚úì Success! HTML file generated: {output_file}")
        print(f"‚úì Total unique headlines: {len(headlines)}")

        # Show source breakdown
        sources = {}
        for headline in headlines:
            source = headline['source']
            sources[source] = sources.get(source, 0) + 1

        print(f"\nüìä Headlines by Source:")
        print("-" * 70)
        for source, count in sorted(sources.items(), key=lambda x: x[1], reverse=True):
            print(f"  {source}: {count} headlines")

        print("\n" + "="*70)
        print(" " * 10 + "Open the HTML file in your browser!")
        print("="*70 + "\n")
    else:
        print("\n‚úó No headlines were collected. Please check your internet connection.")
        print("  or the availability of news sources.\n")

if __name__ == "__main__":
    main()

Collecting feedparser
  Downloading feedparser-6.0.12-py3-none-any.whl.metadata (2.7 kB)
Collecting sgmllib3k (from feedparser)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading feedparser-6.0.12-py3-none-any.whl (81 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m81.5/81.5 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (setup.py) ... [?25l[?25hdone
  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6046 sha256=7c4e07b589190004600602246ec0c88839aa55bd29b9f33cd0109e8ce09ddbb0
  Stored in directory: /root/.cache/pip/wheels/03/f5/1a/23761066dac1d0e8e683e5fdb27e12de53209d05a4a37e6246
Successfully built sgmllib3k
Installing collected packages: sgmllib3k, feedparser
Successfully installed feedparser-6.0.12 sgmllib3k-1.0.0
