In [1]:
import feedparser
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
from newspaper import Article
import json
from collections import defaultdict

In [2]:
#Country specific sources for news
def load_users(file_path="users.json"):
    with open(file_path, "r", encoding="utf-8") as f:
        return json.load(f)
users = load_users()


In [3]:
#distilbart LLM from Hugging Face for summarization
model_name = "sshleifer/distilbart-cnn-12-6"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

In [4]:
def fetch_articles(rss_urls):
    articles = []
    for url in rss_urls:
        feed = feedparser.parse(url)
        for entry in feed.entries[:6]:  # Fetch latest 5 articles
            articles.append({"title": entry.title, "link": entry.link})
    return articles

In [5]:
def summarize_article(url):
    try:
        article = Article(url)
        article.download()
        article.parse()

        if not article.text:
            return "No content available."

        inputs = tokenizer.encode("summarize: " + article.text[:2000], return_tensors="pt", truncation=True)
        with torch.no_grad():
            summary_ids = model.generate(inputs, max_length=200, min_length=100, num_beams=4)

        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        return summary
    except Exception as e:
        return f"Error fetching article: {str(e)}"

In [6]:
def generate_newsletter(user, articles):
    filename = f"{user.replace(' ', '_').lower()}_newsletter.md"
    top_articles = [article["title"] for article in articles[:3]]  
    top_summary = f"Top trending news updates for {user}: " + "; ".join(top_articles) + "."
    categorized_articles = defaultdict(list)
    for article in articles:
        matched = False
        for interest, keywords in users[user]["interest_keywords"].items():
            for keyword in keywords:
                if keyword.lower() in article["title"].lower():
                    categorized_articles[interest].append(article)
                    matched = True
                    break  
            if matched:
                break
    
        if not matched:
            categorized_articles["General"].append(article) 

    with open(filename, "w", encoding="utf-8") as f:
        f.write(f"# Personalized Newsletter for {user}\n\n")
        f.write(f"### Interests: {', '.join(users[user]['interests'])}\n\n")
        f.write(f"## Summary\n\n")
        f.write(f"{top_summary}\n\n")
        f.write(f"---\n\n")

        f.write(f"## News by Category\n\n")

        for category, articles in categorized_articles.items():
            f.write(f"### {category.capitalize()}\n\n")
            for article in articles:
                summary = summarize_article(article["link"])
                f.write(f"**{article['title']}**\n\n")
                f.write(f"{summary}\n\n")
                f.write(f"[Read full article]({article['link']})\n\n")
            f.write("---\n\n")

    print(f"Newsletter for {user} saved as {filename}")

In [7]:
for user, data in users.items():
    print(f"Generating newsletter for {user}...")
    user_articles = fetch_articles(data["sources"] + data.get("country_sources", []))  
    generate_newsletter(user, user_articles)

Generating newsletter for Alex Parker...
Newsletter for Alex Parker saved as alex_parker_newsletter.md
Generating newsletter for Priya Sharma...
Newsletter for Priya Sharma saved as priya_sharma_newsletter.md
Generating newsletter for Marco Rossi...
Newsletter for Marco Rossi saved as marco_rossi_newsletter.md
Generating newsletter for Lisa Thompson...
Newsletter for Lisa Thompson saved as lisa_thompson_newsletter.md
Generating newsletter for David Martinez...
Newsletter for David Martinez saved as david_martinez_newsletter.md
