In [None]:
import feedparser
import re
import logging
from typing import List, Dict, Set
from datetime import datetime

logger = logging.getLogger(__name__)

class RSSService:
    # –°–ø–∏—Å–æ–∫ –±–µ—Å–ø–ª–∞—Ç–Ω—ã—Ö –∏—Å—Ç–æ—á–Ω–∏–∫–æ–≤
    FEED_URLS = {
        'bcs_express': 'https://bcs-express.ru/feed',
        'finam': 'https://www.finam.ru/analysis/conews/rsspoint/',
        # –ú–æ–∂–Ω–æ –¥–æ–±–∞–≤–∏—Ç—å –†–ë–ö –∏–ª–∏ Investing, –µ—Å–ª–∏ –Ω–∞–π—Ç–∏ –∏—Ö RSS
    }

    def __init__(self, known_tickers: Set[str]):
        """
        known_tickers: —Å–ø–∏—Å–æ–∫ —Ç–∏–∫–µ—Ä–æ–≤ (['GAZP', 'SBER'...]), 
        —á—Ç–æ–±—ã –º—ã –∑–Ω–∞–ª–∏, —á—Ç–æ –∏—Å–∫–∞—Ç—å –≤ —Ç–µ–∫—Å—Ç–µ.
        """
        self.known_tickers = known_tickers

    def _extract_tickers(self, text: str) -> List[str]:
        """
        –ü—Ä–æ—Å—Ç–∞—è —ç–≤—Ä–∏—Å—Ç–∏–∫–∞: –∏—â–µ–º –∞–Ω–≥–ª–∏–π—Å–∫–∏–µ —Å–ª–æ–≤–∞ –∏–∑ 3-5 –±—É–∫–≤ –≤ –≤–µ—Ä—Ö–Ω–µ–º —Ä–µ–≥–∏—Å—Ç—Ä–µ.
        –ï—Å–ª–∏ —Å–ª–æ–≤–æ —Å–æ–≤–ø–∞–¥–∞–µ—Ç —Å known_tickers ‚Äî –±–∏–Ω–≥–æ!
        """
        if not text:
            return []
            
        # –ò—â–µ–º –∫–∞–Ω–¥–∏–¥–∞—Ç–æ–≤ (—Å–ª–æ–≤–∞ –∫–∞–ø—Å–æ–º)
        candidates = re.findall(r'\b[A-Z]{3,5}\b', text)
        
        # –§–∏–ª—å—Ç—Ä—É–µ–º: –æ—Å—Ç–∞–≤–ª—è–µ–º —Ç–æ–ª—å–∫–æ —Ç–µ, —á—Ç–æ —Ä–µ–∞–ª—å–Ω–æ —Ç–æ—Ä–≥—É—é—Ç—Å—è –Ω–∞ MOEX
        found = list(set([c for c in candidates if c in self.known_tickers]))
        return found

    def fetch_all_news(self) -> List[Dict]:
        """–ü—Ä–æ—Ö–æ–¥–∏—Ç –ø–æ –≤—Å–µ–º RSS –ª–µ–Ω—Ç–∞–º –∏ —Å–æ–±–∏—Ä–∞–µ—Ç –Ω–æ–≤–æ—Å—Ç–∏"""
        all_news = []

        for source_name, url in self.FEED_URLS.items():
            logger.info(f"üì° –°–∫–∞—á–∏–≤–∞–µ–º –ª–µ–Ω—Ç—É: {source_name}...")
            try:
                feed = feedparser.parse(url)
                
                for entry in feed.entries:
                    # –°–æ–±–∏—Ä–∞–µ–º –ø–æ–ª–Ω—ã–π —Ç–µ–∫—Å—Ç –¥–ª—è –∞–Ω–∞–ª–∏–∑–∞
                    full_text = f"{entry.title} {entry.get('summary', '')} {entry.get('description', '')}"
                    
                    # –ò—â–µ–º —Ç–∏–∫–µ—Ä—ã
                    tickers = self._extract_tickers(full_text)
                    
                    # –ï—Å–ª–∏ —Ç–∏–∫–µ—Ä–æ–≤ –Ω–µ—Ç - –Ω–æ–≤–æ—Å—Ç—å "–æ–±—â–µ—Ä—ã–Ω–æ—á–Ω–∞—è", —Ç–æ–∂–µ –ø–æ–ª–µ–∑–Ω–æ, 
                    # –Ω–æ –¥–ª—è –Ω–∞—á–∞–ª–∞ –Ω–∞–º –∏–Ω—Ç–µ—Ä–µ—Å–Ω–µ–µ –∫–æ–Ω–∫—Ä–µ—Ç–Ω—ã–µ –∫–æ–º–ø–∞–Ω–∏–∏.
                    
                    news_item = {
                        'title': entry.title,
                        'link': entry.link,
                        'published': entry.get('published', str(datetime.now())),
                        'source': source_name,
                        'tickers': tickers,
                        'summary': entry.get('summary', '')[:500] # –û–±—Ä–µ–∑–∞–µ–º —Å–ª–∏—à–∫–æ–º –¥–ª–∏–Ω–Ω–æ–µ
                    }
                    all_news.append(news_item)
                    
            except Exception as e:
                logger.error(f"‚ö†Ô∏è –û—à–∏–±–∫–∞ —Å –ª–µ–Ω—Ç–æ–π {source_name}: {e}")
                continue
        
        logger.info(f"‚úÖ –í—Å–µ–≥–æ —Å–æ–±—Ä–∞–Ω–æ –Ω–æ–≤–æ—Å—Ç–µ–π: {len(all_news)}")
        return all_news
