In [None]:
import os
import feedparser
import pytz

from datetime import datetime
from dataclasses import dataclass
from time import mktime

tz = pytz.timezone(os.getenv('TIMEZONE', 'UTC'))
now = datetime.now(tz)
start_of_the_day = now.replace(hour=0, minute=0, second=0, microsecond=0)

banned_tags = set([
    'government & policy',
    'smartglasses'
])

@dataclass
class Feed:
    name: str
    url: str
    priority: int

@dataclass
class Article:
    feed: Feed
    title: str
    description: str
    full_text: str
    link: str
    published: datetime
    authors: list
    tags: list
    priority: int



In [None]:
feeds = [
    Feed(name="TechCrunch", url="https://techcrunch.com/feed/", priority=2),
    Feed(name="BigThinking", url="https://bigthinking.io/feed", priority=1),
    Feed(name="Product Hunt", url="https://www.producthunt.com/feed", priority=2),
    Feed(name="Hacker News Launches", url="https://news.ycombinator.com/launches", priority=1),
    Feed(name="Andrew Chen", url="https://andrewchen.substack.com/feed", priority=1),
    Feed(name="Benedict Evans", url="http://ben-evans.com/benedictevans?format=rss", priority=1),
    Feed(name="Irrational Exuberance", url="https://irrationalexuberance.libsyn.com/rss", priority=1),
    Feed(name="Pragmatic Engineer", url="https://pragmaticengineer.com/feed/", priority=1)
]

In [None]:
articles = []

for feed in feeds:
    news = feedparser.parse(feed.url)

    for entry in news.entries:
        published = entry.get("published_parsed", None)

        if published:
            published = tz.localize(datetime.fromtimestamp(mktime(published)))
            if published < start_of_the_day:
                continue
        else:
            published = now

        tags = set([tag.get("term", "").lower() for tag in entry.get("tags", [])])
        if banned_tags.intersection(tags):
            continue

        title = entry.get("title", "")
        full_text = entry.get("content", [{}])[0].get("value", entry.get("summary", ""))
        description = entry.get("summary", "")
        link = entry.get("link", "")
        authors = [author.get("name", "") for author in entry.get("authors", [])]

        article = Article(
            feed=feed,
            title=title,
            description=description,
            full_text=full_text,
            link=link,
            published=published,
            authors=authors,
            tags=list(tags),
            priority=feed.priority
        )

        articles.append(article)

articles.sort(key=lambda x: (x.priority, x.published))

# Select up to 5 articles, max 2 per feed
selected_articles = []
feed_count = {}

for article in articles:
    feed_name = article.feed.name
    
    if feed_count.get(feed_name, 0) < 2:
        selected_articles.append(article)
        feed_count[feed_name] = feed_count.get(feed_name, 0) + 1
        
        if len(selected_articles) == 5:
            break

print(f"Selected {len(selected_articles)} articles to read:\n")
for i, article in enumerate(selected_articles, 1):
    print(f"{i}. [{article.feed.name}] {article.title}")
    print(f"   {article.link}")
    print()
