In [1]:
import feedparser
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
from newspaper import Article
import os
from collections import defaultdict

In [2]:
model_name = "sshleifer/distilbart-cnn-12-6"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

In [3]:
users = {
    "Alex Parker": {
        "interests": ["AI", "cybersecurity", "blockchain", "startups", "programming"],
        "sources": ["https://feeds.arstechnica.com/arstechnica/technology-lab.xml"],
        "country_sources": ["https://rss.nytimes.com/services/xml/rss/nyt/Technology.xml"],  # USA-based tech news
    },
    "Priya Sharma": {
        "interests": ["Global markets", "startups", "fintech", "cryptocurrency", "economics"],
        "sources": ["https://search.cnbc.com/rs/search/combinedcms/view.xml?partnerId=wrss01&id=10000664"],
        "country_sources": ["https://economictimes.indiatimes.com/rssfeedsdefault.cms"], 
    },
    "Marco Rossi": {
        "interests": ["Football", "F1", "NBA", "Olympic sports", "esports"],
        "sources": ["https://www.espn.com/espn/rss/news.xml"],
        "country_sources": ["https://www.gazzetta.it/rss/home.xml"],  # Italy-based sports news
    },
    "Lisa Thompson": {
        "interests": ["Movies", "celebrity news", "TV shows", "music", "books"],
        "sources": ["https://www.tmz.com/rss.xml"],
        "country_sources": ["https://www.theguardian.com/uk/culture/rss"],  # UK-based entertainment news
    },
    "David Martinez": {
        "interests": ["Space exploration", "AI", "biotech", "physics", "renewable energy"],
        "sources": ["http://rss.sciam.com/ScientificAmerican-Global.xml"],
        "country_sources": ["https://www.elmundo.es/rss/ciencia-y-salud.xml"],  # Spain-based science news
    },
}

In [4]:
def fetch_articles(rss_urls):
    """Fetch articles from RSS feeds."""
    articles = []
    for url in rss_urls:
        feed = feedparser.parse(url)
        for entry in feed.entries[:6]:  # Fetch latest 5 articles
            articles.append({"title": entry.title, "link": entry.link})
    return articles

In [5]:
def summarize_article(url):
    """Fetch and summarize a news article from a given URL."""
    try:
        article = Article(url)
        article.download()
        article.parse()

        if not article.text:
            return "No content available."

        # Summarize using Hugging Face model
        inputs = tokenizer.encode("summarize: " + article.text[:2000], return_tensors="pt", truncation=True)
        with torch.no_grad():
            summary_ids = model.generate(inputs, max_length=200, min_length=100, num_beams=4)

        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        return summary
    except Exception as e:
        return f"Error fetching article: {str(e)}"

In [6]:
def generate_newsletter(user, articles):
    """Generate a structured personalized newsletter in Markdown format."""
    filename = f"{user.replace(' ', '_').lower()}_newsletter.md"
    
    # Generate a short summary at the top using article titles
    top_articles = [article["title"] for article in articles[:3]]  # Pick first 3 articles for summary
    top_summary = f"Here are the top trending news updates for {user}: " + "; ".join(top_articles) + "."

    # Organize articles based on interests
    categorized_articles = defaultdict(list)
    for article in articles:
        matched = False
        for interest in users[user]["interests"]:
            if interest.lower() in article["title"].lower():
                categorized_articles[interest].append(article)
                matched = True
        if not matched:
            categorized_articles["General"].append(article)  # Store unclassified articles here

    with open(filename, "w", encoding="utf-8") as f:
        f.write(f"# Personalized Newsletter for {user}\n\n")
        f.write(f"### Interests: {', '.join(users[user]['interests'])}\n\n")
        f.write(f"## Summary\n\n")
        f.write(f"{top_summary}\n\n")
        f.write(f"---\n\n")

        f.write(f"## News by Category\n\n")

        for category, articles in categorized_articles.items():
            f.write(f"### {category.capitalize()}\n\n")
            for article in articles:
                summary = summarize_article(article["link"])
                f.write(f"**{article['title']}**\n\n")
                f.write(f"{summary}\n\n")
                f.write(f"[Read full article]({article['link']})\n\n")
            f.write("---\n\n")

    print(f"Newsletter for {user} saved as {filename}")

In [7]:
for user, data in users.items():
    print(f"Generating newsletter for {user}...")
    user_articles = fetch_articles(data["sources"] + data.get("country_sources", []))  # Include country-specific sources
    generate_newsletter(user, user_articles)

Generating newsletter for Alex Parker...
Newsletter for Alex Parker saved as alex_parker_newsletter.md
Generating newsletter for Priya Sharma...
Newsletter for Priya Sharma saved as priya_sharma_newsletter.md
Generating newsletter for Marco Rossi...


KeyboardInterrupt: 