In [1]:
# Cell 1: Setup and Configuration
import os
import logging
import json
from datetime import datetime
import numpy as np
import pandas as pd
from typing import List, Dict, Any
from IPython.display import Markdown, display
import requests
import bs4
from bs4 import BeautifulSoup
import time
from tqdm import tqdm
import yaml

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Load configuration
CONFIG_PATH = 'config.yaml'  # Create this file with your configurations
try:
    with open(CONFIG_PATH) as f:
        config = yaml.safe_load(f)
except FileNotFoundError:
    logger.warning("Config file not found. Using default configuration.")
    config = {
        'max_articles_per_source': 5,
        'request_timeout': 10,
        'embedding_dimension': 768,
        'cache_dir': './cache',
        'database_path': './vectordb'
    }

# Install and import required packages
def setup_environment():
    try:
        !pip install -q "google-generativeai>=0.3.1" "chromadb>=0.6.3" "requests" \
            "beautifulsoup4" "lxml[html_clean]" "newspaper3k" "pyyaml" "tqdm"
        logger.info("Packages installed successfully")
    except Exception as e:
        logger.error(f"Error installing packages: {e}")
        raise

# Initialize API and models
def initialize_models():
    try:
        import google.generativeai as genai
        from kaggle_secrets import UserSecretsClient
        
        # Get API key securely
        GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")
        genai.configure(api_key=GOOGLE_API_KEY)
        
        # Find available models
        models = genai.list_models()
        text_model = next((m.name for m in models if "gemini-pro" in m.name.lower()), "gemini-1.0-pro")
        embedding_model = next((m.name for m in models if "embedding" in m.name.lower()), "embedding-001")
        
        # Model configurations
        generation_config = {
            "temperature": 0.7,
            "top_p": 0.95,
            "top_k": 40,
            "max_output_tokens": 1024,
        }
        
        safety_settings = [
            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        ]
        
        generation_model = genai.GenerativeModel(
            model_name=text_model,
            generation_config=generation_config,
            safety_settings=safety_settings
        )
        
        return generation_model, embedding_model
        
    except Exception as e:
        logger.error(f"Error initializing models: {e}")
        raise

# Setup cache directory
os.makedirs(config['cache_dir'], exist_ok=True)

# Run setup
setup_environment()
generation_model, embedding_model = initialize_models()

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m80.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m69.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m57.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m


In [2]:
# Cell 2: Enhanced News Fetching with Caching

class NewsFetcher:
    def __init__(self, cache_dir: str, timeout: int = 10):
        self.cache_dir = cache_dir
        self.timeout = timeout
        self.cache_file = os.path.join(cache_dir, 'news_cache.json')
        self.rss_feeds = {
            'technology': [
                'https://feeds.feedburner.com/TechCrunch/',
                'https://www.wired.com/feed/rss',
            ],
            'business': [
                'https://feeds.marketwatch.com/marketwatch/topstories/',
                'https://www.forbes.com/business/feed/',
            ],
            'science': [
                'https://rss.nytimes.com/services/xml/rss/nyt/Science.xml',
                'https://www.sciencedaily.com/rss/all.xml',
            ],
            'health': [
                'https://rss.nytimes.com/services/xml/rss/nyt/Health.xml',
                'https://www.who.int/rss-feeds/news-english.xml',
            ]
        }
        
    def load_cache(self) -> Dict:
        try:
            if os.path.exists(self.cache_file):
                with open(self.cache_file, 'r') as f:
                    return json.load(f)
            return {}
        except Exception as e:
            logger.error(f"Error loading cache: {e}")
            return {}

    def save_cache(self, cache_data: Dict):
        try:
            with open(self.cache_file, 'w') as f:
                json.dump(cache_data, f)
        except Exception as e:
            logger.error(f"Error saving cache: {e}")

    def fetch_article_content(self, url: str) -> str:
        try:
            from newspaper import Article
            article = Article(url)
            article.download()
            article.parse()
            return article.text
        except Exception as e:
            logger.warning(f"Error using newspaper3k: {e}")
            try:
                response = requests.get(url, timeout=self.timeout)
                soup = BeautifulSoup(response.content, 'html.parser')
                paragraphs = soup.find_all('p')
                return ' '.join([p.text for p in paragraphs])
            except Exception as e:
                logger.error(f"Error fetching article content: {e}")
                return ""

    def fetch_news(self, max_articles_per_category: int = 5) -> List[Dict]:
        cache = self.load_cache()
        current_date = datetime.now().strftime("%Y-%m-%d")
        
        if current_date in cache:
            logger.info("Using cached news data")
            return cache[current_date]
        
        all_articles = []
        
        for category, feeds in tqdm(self.rss_feeds.items(), desc="Fetching news"):
            articles_count = 0
            for feed_url in feeds:
                if articles_count >= max_articles_per_category:
                    break
                    
                try:
                    response = requests.get(feed_url, timeout=self.timeout)
                    soup = BeautifulSoup(response.content, features="xml")
                    items = soup.findAll('item')
                    
                    for item in items:
                        if articles_count >= max_articles_per_category:
                            break
                            
                        title = item.find('title').text
                        link = item.find('link').text
                        content = self.fetch_article_content(link)
                        
                        if content:
                            all_articles.append({
                                'title': title,
                                'link': link,
                                'content': content,
                                'category': category,
                                'date': current_date
                            })
                            articles_count += 1
                            
                except Exception as e:
                    logger.error(f"Error fetching from {feed_url}: {e}")
                    continue
                    
                time.sleep(1)  # Rate limiting
        
        # Update cache
        cache[current_date] = all_articles
        self.save_cache(cache)
        
        return all_articles

# Initialize fetcher and get news
news_fetcher = NewsFetcher(config['cache_dir'])
all_articles = news_fetcher.fetch_news(max_articles_per_category=config['max_articles_per_source'])

Fetching news: 100%|██████████| 4/4 [00:12<00:00,  3.14s/it]


In [3]:
# Cell 3: Enhanced Summarization with Few-shot Learning

class NewsSummarizer:
    def __init__(self, model, cache_dir: str):
        self.model = model
        self.cache_dir = cache_dir
        self.cache_file = os.path.join(cache_dir, 'summary_cache.json')
        
        # Load few-shot examples
        self.few_shot_examples = [
            {
                "article": "The European Union has approved a new directive aimed at reducing single-use plastics...",
                "summary": "EU bans single-use plastics by 2021 and mandates 90% recycling of plastic bottles by 2029."
            },
            {
                "article": "Researchers at Stanford University have developed a new artificial intelligence system...",
                "summary": "Stanford AI system diagnoses pneumonia from X-rays with 95% accuracy, outperforming human radiologists."
            }
        ]
        
    def load_cache(self) -> Dict:
        try:
            if os.path.exists(self.cache_file):
                with open(self.cache_file, 'r') as f:
                    return json.load(f)
            return {}
        except Exception as e:
            logger.error(f"Error loading summary cache: {e}")
            return {}

    def save_cache(self, cache_data: Dict):
        try:
            with open(self.cache_file, 'w') as f:
                json.dump(cache_data, f)
        except Exception as e:
            logger.error(f"Error saving summary cache: {e}")

    def create_few_shot_prompt(self, article_content: str) -> str:
        prompt = "Generate a concise, informative summary of the following news article:\n\n"
        
        # Add few-shot examples
        for example in self.few_shot_examples:
            prompt += f"Article: {example['article']}\n"
            prompt += f"Summary: {example['summary']}\n\n"
        
        # Add target article
        prompt += f"Article: {article_content}\nSummary:"
        
        return prompt

    def summarize_article(self, article_content: str, max_length: int = 150) -> str:
        # Check cache first
        cache = self.load_cache()
        content_hash = hash(article_content)
        
        if str(content_hash) in cache:
            return cache[str(content_hash)]
        
        try:
            prompt = self.create_few_shot_prompt(article_content[:5000])  # Truncate if too long
            response = self.model.generate_content(prompt)
            summary = response.text.strip()
            
            # Ensure summary length
            if len(summary) > max_length:
                summary = summary[:max_length].rsplit(' ', 1)[0] + '...'
            
            # Cache the result
            cache[str(content_hash)] = summary
            self.save_cache(cache)
            
            return summary
            
        except Exception as e:
            logger.error(f"Error generating summary: {e}")
            # Fallback to extractive summary
            sentences = article_content.split('.')[:3]
            return '. '.join(sentences) + '...'

# Initialize summarizer and process articles
summarizer = NewsSummarizer(generation_model, config['cache_dir'])

for article in tqdm(all_articles, desc="Generating summaries"):
    article['summary'] = summarizer.summarize_article(article['content'])

# Display sample results
print("\nSample Summaries:")
for article in all_articles[:3]:
    print(f"\nTitle: {article['title']}")
    print(f"Summary: {article['summary']}")

Generating summaries: 100%|██████████| 15/15 [00:03<00:00,  4.58it/s]


Sample Summaries:

Title: Top 10 AI Tools That Will Transform Your Content Creation in 2025
Summary: Top 10 AI Tools That Will Transform Your Content Creation in 2025





Looking to level up your content creation game in 2025? You're in the right place! The digital landscape has evolved dramatically, and AI tools have become essential for creators who want to stay ahead of the curve.  In this guide, I'll show you the top 10 AI tools that are revolutionizing content creation and making creators' lives easier. 

Why You Need These AI Tools in 2025

Content creation has become more demanding than ever...

Title: LimeWire AI Studio Review 2023: Details, Pricing & Features
Summary: In the rapidly advancing landscape of AI technology and innovation, LimeWire emerges as a unique platform in the realm of generative AI tools.  This platform not only stands out from the multitude of existing AI tools but also brings a fresh approach to content generation.  LimeWire not only empowers users to c




In [4]:
# Cell 4: Enhanced Embeddings System

class ArticleEmbedder:
    def __init__(self, model_name: str, cache_dir: str, dimension: int = 768):
        self.model_name = model_name
        self.cache_dir = cache_dir
        self.dimension = dimension
        self.cache_file = os.path.join(cache_dir, 'embedding_cache.json')
        
    def load_cache(self) -> Dict:
        try:
            if os.path.exists(self.cache_file):
                with open(self.cache_file, 'r') as f:
                    cache = json.load(f)
                    # Convert string lists back to numpy arrays
                    return {k: np.array(v) for k, v in cache.items()}
            return {}
        except Exception as e:
            logger.error(f"Error loading embedding cache: {e}")
            return {}

    def save_cache(self, cache_data: Dict):
        try:
            # Convert numpy arrays to lists for JSON serialization
            serializable_cache = {k: v.tolist() for k, v in cache_data.items()}
            with open(self.cache_file, 'w') as f:
                json.dump(serializable_cache, f)
        except Exception as e:
            logger.error(f"Error saving embedding cache: {e}")

    def generate_embedding(self, text: str) -> np.ndarray:
        try:
            import google.generativeai as genai
            embedding_model = genai.GenerativeModel(self.model_name)
            result = embedding_model.embed_content(text=text)
            return np.array(result.embedding)
        except Exception as e:
            logger.error(f"Error generating embedding: {e}")
            return np.random.randn(self.dimension)

    def get_embedding(self, text: str) -> np.ndarray:
        cache = self.load_cache()
        text_hash = hash(text)
        
        if str(text_hash) in cache:
            return cache[str(text_hash)]
        
        embedding = self.generate_embedding(text)
        cache[str(text_hash)] = embedding
        self.save_cache(cache)
        
        return embedding

    def find_similar_articles(self, target_embedding: np.ndarray, embeddings: List[np.ndarray], 
                            top_n: int = 3) -> List[tuple]:
        similarities = []
        
        for i, embedding in enumerate(embeddings):
            similarity = np.dot(target_embedding, embedding) / (
                np.linalg.norm(target_embedding) * np.linalg.norm(embedding)
            )
            similarities.append((i, similarity))
        
        return sorted(similarities, key=lambda x: x[1], reverse=True)[:top_n]

# Initialize embedder and process articles
embedder = ArticleEmbedder(embedding_model, config['cache_dir'])

# Generate embeddings for all articles
print("Generating embeddings...")
for article in tqdm(all_articles):
    combined_text = f"{article['title']} {article['summary']}"
    article['embedding'] = embedder.get_embedding(combined_text)

# Demonstrate similarity search
print("\nSimilarity Analysis:")
for i, article in enumerate(all_articles[:3]):
    print(f"\nFinding similar articles for: {article['title']}")
    embeddings = [a['embedding'] for a in all_articles]
    similar_indices = embedder.find_similar_articles(article['embedding'], embeddings)
    
    for idx, score in similar_indices:
        if idx != i:  # Skip the article itself
            print(f"- {all_articles[idx]['title']} (Similarity: {score:.4f})")

Generating embeddings...


100%|██████████| 15/15 [00:00<00:00, 123.08it/s]


Similarity Analysis:

Finding similar articles for: Top 10 AI Tools That Will Transform Your Content Creation in 2025
- Top 10 AI Content Generator & Writer Tools in 2022 (Similarity: 0.0837)
- The Very Territorial Caterpillar (Similarity: 0.0733)

Finding similar articles for: LimeWire AI Studio Review 2023: Details, Pricing & Features
- Top 10 AI Tools That Will Transform Your Content Creation in 2025 (Similarity: 0.0548)
- Why Cameras Are Popping Up in Eldercare Facilities (Similarity: 0.0395)

Finding similar articles for: Top 10 AI Tools in 2023 That Will Make Your Life Easier
- As RFK Jr. Champions Chronic Disease Prevention, Key Research Is Cut (Similarity: 0.0355)
- An Endangered Galápagos Tortoise Is a First-Time Mother at 100 (Similarity: 0.0302)





In [5]:
# Cell 5: Enhanced RAG System with ChromaDB

import chromadb
from chromadb.config import Settings
from chromadb.utils import embedding_functions
import shutil

class NewsRAGSystem:
    def __init__(self, database_path: str):
        self.database_path = database_path
        
        # Clean up existing database if it exists
        if os.path.exists(database_path):
            try:
                shutil.rmtree(database_path)
                logger.info(f"Cleaned up existing database at {database_path}")
            except Exception as e:
                logger.warning(f"Could not clean up existing database: {e}")

        # Create fresh database directory
        os.makedirs(database_path, exist_ok=True)

        try:
            self.client = chromadb.Client(Settings(
                chroma_db_impl="duckdb+parquet",
                persist_directory=database_path,
                anonymized_telemetry=False  # Disable telemetry
            ))
            logger.info("Successfully initialized ChromaDB client")
        except Exception as e:
            logger.error(f"Error initializing ChromaDB client: {e}")
            # Fallback to in-memory client
            self.client = chromadb.Client()
            logger.info("Falling back to in-memory ChromaDB client")

        self.collection_name = "news_articles"
        self.setup_collection()
        
    def setup_collection(self):
        try:
            # Delete existing collection if it exists
            try:
                self.client.delete_collection(self.collection_name)
                logger.info(f"Deleted existing collection: {self.collection_name}")
            except:
                pass

            # Create new collection
            self.collection = self.client.create_collection(
                name=self.collection_name,
                metadata={"description": "News articles collection"}
            )
            logger.info(f"Created new collection: {self.collection_name}")
        except Exception as e:
            logger.error(f"Error setting up ChromaDB collection: {e}")
            raise

    def add_articles(self, articles: List[Dict]):
        try:
            # Prepare data for insertion
            ids = [str(i) for i in range(len(articles))]
            embeddings = [article['embedding'].tolist() for article in articles]
            metadatas = [{
                "title": article['title'],
                "category": article['category'],
                "date": article['date']
            } for article in articles]
            documents = [article['content'] for article in articles]
            
            # Add to collection in batches
            batch_size = 100
            for i in range(0, len(articles), batch_size):
                batch_end = min(i + batch_size, len(articles))
                self.collection.add(
                    ids=ids[i:batch_end],
                    embeddings=embeddings[i:batch_end],
                    metadatas=metadatas[i:batch_end],
                    documents=documents[i:batch_end]
                )
            logger.info(f"Added {len(articles)} articles to the collection")
            
        except Exception as e:
            logger.error(f"Error adding articles to ChromaDB: {e}")
            raise

    def get_personalized_recommendations(self, 
                                       user_preferences: List[str], 
                                       num_results: int = 5) -> List[Dict]:
        try:
            # Generate embedding for user preferences
            query = " ".join(user_preferences)
            query_embedding = embedder.get_embedding(query)
            
            # Query the collection
            results = self.collection.query(
                query_embeddings=[query_embedding.tolist()],
                n_results=min(num_results, self.collection.count()),
                include=["metadatas", "documents", "distances"]
            )
            
            # Format results
            recommendations = []
            for i in range(len(results['ids'][0])):
                recommendations.append({
                    "title": results['metadatas'][0][i]['title'],
                    "category": results['metadatas'][0][i]['category'],
                    "content": results['documents'][0][i],
                    "relevance_score": 1 - results['distances'][0][i]  # Convert distance to similarity
                })
            
            return recommendations
            
        except Exception as e:
            logger.error(f"Error getting recommendations: {e}")
            return []

# Ensure database directory exists in config
if 'database_path' not in config:
    config['database_path'] = os.path.join(config['cache_dir'], 'chromadb')

# Initialize RAG system
try:
    rag_system = NewsRAGSystem(config['database_path'])
    
    # Add articles to the system
    rag_system.add_articles(all_articles)
    
    # Demonstrate personalized recommendations
    user_profiles = {
        "tech_enthusiast": ["artificial intelligence", "software development", "tech startups"],
        "health_conscious": ["medical research", "healthcare innovation", "wellness"],
        "business_analyst": ["market trends", "economic policy", "business strategy"]
    }

    print("\nPersonalized Recommendations Demo:")
    for profile, interests in user_profiles.items():
        print(f"\nRecommendations for {profile}:")
        recommendations = rag_system.get_personalized_recommendations(interests)
        for rec in recommendations:
            print(f"- {rec['title']} ({rec['category']}) - Relevance: {rec['relevance_score']:.4f}")

except Exception as e:
    logger.error(f"Error in RAG system setup: {e}")
    print("Failed to initialize RAG system. Please check the logs for details.")


Personalized Recommendations Demo:

Recommendations for tech_enthusiast:
- Top 10 AI Content Generator & Writer Tools in 2022 (technology) - Relevance: -1443.1567
- Kennedy Attends Funeral of Texas Girl Who Died of Measles (health) - Relevance: -1473.2455
- Top 10 AI Tools That Will Transform Your Content Creation in 2025 (technology) - Relevance: -1479.2073
- Giant Sloths’ Hairy Truth Revealed by Scientists (science) - Relevance: -1484.0428
- Scientists Revive the Dire Wolf, or Something Close (science) - Relevance: -1495.2771

Recommendations for health_conscious:
- Kennedy Kicks Off Tour on Fighting Chronic Disease (health) - Relevance: -1409.4835
- Why Cameras Are Popping Up in Eldercare Facilities (health) - Relevance: -1436.9148
- Top 10 AI Tools in 2023 That Will Make Your Life Easier (technology) - Relevance: -1454.4076
- Giant Sloths’ Hairy Truth Revealed by Scientists (science) - Relevance: -1461.7936
- Top 10 AI Tools That Will Transform Your Content Creation in 2025 (techn

In [6]:
# Cell 6: Enhanced Interactive Dashboard

class NewsDashboard:
    def __init__(self, rag_system: NewsRAGSystem):
        self.rag_system = rag_system
        
    def create_article_card(self, article: Dict) -> str:
        relevance_score = article.get('relevance_score', None)
        relevance_html = f"<br>Relevance Score: {relevance_score:.4f}" if relevance_score else ""
        
        return f"""
        <div style="border: 1px solid #ddd; padding: 15px; margin: 10px 0; border-radius: 5px;">
            <h3>{article['title']}</h3>
            <p><strong>Category:</strong> {article['category']}{relevance_html}</p>
            <p>{article.get('summary', '')}</p>
            <a href="{article.get('link', '#')}" target="_blank">Read more</a>
        </div>
        """

    def display_personalized_feed(self, user_preferences: List[str]):
        recommendations = self.rag_system.get_personalized_recommendations(user_preferences)
        
        html = f"""
        <h2>Your Personalized News Feed</h2>
        <p>Based on interests: {', '.join(user_preferences)}</p>
        """
        
        for article in recommendations:
            html += self.create_article_card(article)
            
        display(HTML(html))

    def display_trending_topics(self):
        # Analyze frequent terms in recent articles
        from collections import Counter
        import re
        
        all_text = ' '.join([a['title'] + ' ' + a.get('summary', '') for a in all_articles])
        words = re.findall(r'\w+', all_text.lower())
        common_words = Counter(words).most_common(10)
        
        html = """
        <div style="margin: 20px 0;">
            <h3>Trending Topics</h3>
            <div style="display: flex; flex-wrap: wrap;">
        """
        
        for word, count in common_words:
            if len(word) > 3:  # Filter out short words
                html += f"""
                <span style="background: #f0f0f0; padding: 5px 10px; margin: 5px; 
                border-radius: 15px;">{word} ({count})</span>
                """
                
        html += "</div></div>"
        display(HTML(html))

# Initialize and display dashboard
from IPython.display import HTML
dashboard = NewsDashboard(rag_system)

# Display for different user profiles
for profile, interests in user_profiles.items():
    print(f"\nDashboard for {profile}")
    dashboard.display_trending_topics()
    dashboard.display_personalized_feed(interests)


Dashboard for tech_enthusiast



Dashboard for health_conscious



Dashboard for business_analyst


In [7]:
# Cell 7: Enhanced System Evaluation and Analytics

class NewsSystemEvaluator:
    def __init__(self, generation_model):
        self.model = generation_model
        self.metrics = {
            'summary_quality': [],
            'recommendation_relevance': [],
            'system_performance': {}
        }
        
    def evaluate_summary(self, article: Dict) -> Dict:
        try:
            prompt = f"""
            Evaluate this news summary on the following criteria (score 1-10):
            1. Accuracy: Does it capture the main points?
            2. Conciseness: Is it appropriately brief?
            3. Clarity: Is it easy to understand?

            Original Article (excerpt): {article['content'][:500]}...
            Summary: {article.get('summary', 'No summary available')}

            Format your response as:
            Accuracy Score: [1-10]
            Conciseness Score: [1-10]
            Clarity Score: [1-10]
            Overall Score: [average]
            Feedback: [brief feedback]
            """
            
            response = self.model.generate_content(prompt)
            
            return {
                'article_title': article['title'],
                'evaluation_text': response.text,
                'timestamp': datetime.now().isoformat(),
                'success': True
            }
        except Exception as e:
            logger.error(f"Error evaluating summary: {e}")
            return {
                'article_title': article.get('title', 'Unknown'),
                'evaluation_text': f"Evaluation failed: {str(e)}",
                'timestamp': datetime.now().isoformat(),
                'success': False
            }

    def evaluate_recommendations(self, user_preferences: List[str], 
                               recommendations: List[Dict]) -> Dict:
        try:
            relevance_scores = [rec.get('relevance_score', 0) for rec in recommendations]
            return {
                'average_relevance': np.mean(relevance_scores) if relevance_scores else 0,
                'preference_coverage': len(set(user_preferences)),
                'recommendation_count': len(recommendations),
                'success': True
            }
        except Exception as e:
            logger.error(f"Error evaluating recommendations: {e}")
            return {
                'error': str(e),
                'success': False
            }

    def generate_report(self):
        try:
            # Evaluate summaries
            print("Evaluating summaries...")
            for article in tqdm(all_articles[:5]):  # Evaluate a sample
                evaluation = self.evaluate_summary(article)
                self.metrics['summary_quality'].append(evaluation)

            # Evaluate recommendations
            print("\nEvaluating recommendations...")
            for profile, interests in user_profiles.items():
                recommendations = rag_system.get_personalized_recommendations(interests)
                self.metrics['recommendation_relevance'].append({
                    'profile': profile,
                    'metrics': self.evaluate_recommendations(interests, recommendations)
                })

            # Generate report
            report = """
            # News System Evaluation Report

            ## Summary Quality Analysis
            """
            
            # Add summary evaluations to report
            for i, eval_data in enumerate(self.metrics['summary_quality']):
                report += f"\n### Summary {i+1}: {eval_data['article_title']}\n"
                report += f"Evaluation Results:\n{eval_data['evaluation_text']}\n"
                report += f"Timestamp: {eval_data['timestamp']}\n"
                report += "-" * 50 + "\n"

            # Add recommendation performance to report
            report += "\n## Recommendation Performance Analysis\n"
            for rec_eval in self.metrics['recommendation_relevance']:
                report += f"\n### Profile: {rec_eval['profile']}\n"
                metrics = rec_eval['metrics']
                if metrics.get('success', False):
                    report += f"- Average Relevance: {metrics['average_relevance']:.2f}\n"
                    report += f"- Preference Coverage: {metrics['preference_coverage']}\n"
                    report += f"- Recommendations: {metrics['recommendation_count']}\n"
                else:
                    report += f"- Evaluation failed: {metrics.get('error', 'Unknown error')}\n"

            # Add system performance metrics
            self.metrics['system_performance'] = {
                'total_articles': len(all_articles),
                'evaluation_timestamp': datetime.now().isoformat(),
                'successful_summaries': sum(1 for e in self.metrics['summary_quality'] if e['success']),
                'successful_recommendations': sum(1 for e in self.metrics['recommendation_relevance'] 
                                               if e['metrics'].get('success', False))
            }

            report += "\n## System Performance Metrics\n"
            for key, value in self.metrics['system_performance'].items():
                report += f"- {key}: {value}\n"

            # Display the report
            display(Markdown(report))

            # Save detailed metrics
            self.save_metrics()

        except Exception as e:
            logger.error(f"Error generating report: {e}")
            display(Markdown(f"# Error Generating Report\nAn error occurred: {str(e)}"))

    def save_metrics(self):
        """Save metrics to file with proper error handling"""
        try:
            metrics_file = os.path.join(config['cache_dir'], 'evaluation_metrics.json')
            with open(metrics_file, 'w') as f:
                # Convert numpy values to native Python types
                metrics_dict = json.loads(json.dumps(self.metrics, default=str))
                json.dump(metrics_dict, f, indent=2)
            logger.info(f"Metrics saved to {metrics_file}")
        except Exception as e:
            logger.error(f"Error saving metrics: {e}")

# Run evaluation with proper error handling
try:
    print("Starting system evaluation...")
    evaluator = NewsSystemEvaluator(generation_model)
    evaluator.generate_report()
    print("Evaluation completed successfully!")
except Exception as e:
    logger.error(f"Fatal error in evaluation: {e}")
    print(f"Evaluation failed: {str(e)}")

Starting system evaluation...
Evaluating summaries...


100%|██████████| 5/5 [00:00<00:00,  5.61it/s]



Evaluating recommendations...



            # News System Evaluation Report

            ## Summary Quality Analysis
            
### Summary 1: Top 10 AI Tools That Will Transform Your Content Creation in 2025
Evaluation Results:
Evaluation failed: 404 Gemini 1.0 Pro Vision has been deprecated on July 12, 2024. Consider switching to different model, for example gemini-1.5-flash.
Timestamp: 2025-04-07T22:42:51.479424
--------------------------------------------------

### Summary 2: LimeWire AI Studio Review 2023: Details, Pricing & Features
Evaluation Results:
Evaluation failed: 404 Gemini 1.0 Pro Vision has been deprecated on July 12, 2024. Consider switching to different model, for example gemini-1.5-flash.
Timestamp: 2025-04-07T22:42:51.663624
--------------------------------------------------

### Summary 3: Top 10 AI Tools in 2023 That Will Make Your Life Easier
Evaluation Results:
Evaluation failed: 404 Gemini 1.0 Pro Vision has been deprecated on July 12, 2024. Consider switching to different model, for example gemini-1.5-flash.
Timestamp: 2025-04-07T22:42:51.844140
--------------------------------------------------

### Summary 4: Top 10 AI Content Generator & Writer Tools in 2022
Evaluation Results:
Evaluation failed: 404 Gemini 1.0 Pro Vision has been deprecated on July 12, 2024. Consider switching to different model, for example gemini-1.5-flash.
Timestamp: 2025-04-07T22:42:52.019527
--------------------------------------------------

### Summary 5: Beginner Guide to CJ Affiliate (Commission Junction) in 2022
Evaluation Results:
Evaluation failed: 404 Gemini 1.0 Pro Vision has been deprecated on July 12, 2024. Consider switching to different model, for example gemini-1.5-flash.
Timestamp: 2025-04-07T22:42:52.194431
--------------------------------------------------

## Recommendation Performance Analysis

### Profile: tech_enthusiast
- Average Relevance: -1474.99
- Preference Coverage: 3
- Recommendations: 5

### Profile: health_conscious
- Average Relevance: -1445.66
- Preference Coverage: 3
- Recommendations: 5

### Profile: business_analyst
- Average Relevance: -1481.16
- Preference Coverage: 3
- Recommendations: 5

## System Performance Metrics
- total_articles: 15
- evaluation_timestamp: 2025-04-07T22:42:52.221162
- successful_summaries: 0
- successful_recommendations: 3


Evaluation completed successfully!
