In [5]:
import random

def reservoir_sampling(stream, k):
    reservoir = []
    for i, item in enumerate(stream):
        if i < k:
            reservoir.append(item)
        else:
            j = random.randint(0, i)
            if j < k:
                reservoir[j] = item
    return reservoir


if __name__ == "__main__":
    stream = range(1_000_000)
    
    sample = reservoir_sampling(stream, 10)
    
    print("Sample:", sample)

    print(f"Sorted sample(k={len(sample)}):", sorted(sample))

Sample: [9615, 850756, 581427, 475614, 957344, 999078, 272805, 245023, 921712, 156270]
Sorted sample(k=10): [9615, 156270, 245023, 272805, 475614, 581427, 850756, 921712, 957344, 999078]


In [20]:
import random
from datetime import datetime, timedelta
from dataclasses import dataclass

@dataclass
class NewsArticle:
    id: int
    title: str
    publication_date: datetime

    def __str__(self):
        formatted_date = self.publication_date.strftime("%Y-%m-%d")
        return f"Article(#{self.id}, '{self.title}') - {formatted_date}"

def news_stream(days=7):
    current_date = datetime.now()
    for i in range(1_000):  # 1_000 articles weekly
        random_date = current_date - timedelta(days=random.randint(0, days))
        yield NewsArticle(i, f"News {i}", random_date)

if __name__ == "__main__":
    # 5 randomly sampled news
    featured_news = reservoir_sampling(news_stream(), 5)

    print("Recommended news:")
    for article in featured_news: print(article)


Recommended news:
Article(#757, 'News 757') - 2025-08-04
Article(#10, 'News 10') - 2025-08-03
Article(#642, 'News 642') - 2025-07-28
Article(#950, 'News 950') - 2025-07-30
Article(#200, 'News 200') - 2025-08-03
