In [1]:
import logging
from datetime import datetime

from app.scraper import DynamicScraper, PostFieldSelector, PostModel, ScraperRuleset

logging.basicConfig(level=logging.DEBUG)


class RedditPostData(PostModel):
    """
    Pydantic model for Reddit post data.

    Attributes
    ----------
    permalink : str
        The relative URL path to the Reddit post
    title : str
        The title of the Reddit post
    body_snippet : str
        The content/body of the Reddit post
    post_time : Optional[datetime]
        The timestamp when the post was created
    rating : Optional[int]
        The vote score/rating of the post
    """

    permalink: str
    title: str
    body_snippet: str = ""
    post_time: datetime | None = None
    rating: int | None = None

    field_selectors = {
        "permalink": PostFieldSelector("", "permalink"),
        "title": PostFieldSelector("a[slot='title']"),
        "body_snippet": PostFieldSelector("a[slot='text-body']"),
        "post_time": PostFieldSelector("faceplate-timeago > time", "datetime"),
        "rating": PostFieldSelector(".shreddit-post-container button+span>faceplate-number", "number"),
    }

    def get_item_id(self) -> str:
        return self.permalink


class RedditRuleset(ScraperRuleset[RedditPostData]):
    """
    Reddit-specific ruleset for extracting post data.
    """

    post_selector: str = "shreddit-feed > article shreddit-post, faceplate-batch > article shreddit-post"
    scroll_element_selector: str = "shreddit-feed > article shreddit-post, faceplate-batch > article shreddit-post"
    post_model: type[RedditPostData] = RedditPostData


async def scrape_reddit(url: str, max_posts: int = 50, debug: bool = False) -> list[RedditPostData]:
    """
    Create a Reddit-specific scraper.

    Parameters
    ----------
    url : str
        URL of Reddit page to scrape
    max_posts : int, default=50
        Maximum number of posts to extract
    debug : bool, default=False
        Whether to run in debug mode (visible browser)

    Returns
    -------
    list[RedditPostData]
        Extracted Reddit posts
    """
    ruleset = RedditRuleset()
    async with DynamicScraper(ruleset=ruleset, debug=debug) as scraper:
        return await scraper.extract_content(url, max_posts)

In [2]:
posts = await scrape_reddit("https://www.reddit.com/r/learnpython/new", max_posts=50, debug=False)

for i, post in enumerate(posts, 1):
    print(f"{i}. {post.title} (Score: {post.rating})")
    print(f"   Posted: {post.post_time}")
    print(f"   Snippet: {post.body_snippet[:100]}..." if post.body_snippet else "   No snippet")
    print()

DEBUG:app.scraper:Using random user agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0
DEBUG:app.scraper:Launching browser...
DEBUG:app.scraper:Browser initialized successfully
INFO:app.scraper:Extracting up to 50 items from https://www.reddit.com/r/learnpython/new
INFO:app.scraper:Navigating to https://www.reddit.com/r/learnpython/new
DEBUG:app.scraper:Extracting content batch...
INFO:app.scraper:Found 28 new items, total 28/50 items.
DEBUG:app.scraper:Scrolling to load more content...
DEBUG:app.scraper:Extracting content batch...
INFO:app.scraper:Found 0 new items, total 28/50 items.
DEBUG:app.scraper:Retrying item collection after attempt #1
DEBUG:app.scraper:Extracting content batch...
INFO:app.scraper:Found 25 new items, total 53/50 items.
DEBUG:app.scraper:Scrolling to load more content...
INFO:app.scraper:Content extraction complete. Extracted 50 items.
DEBUG:app.scraper:Cleaning up browser resourc

1. How to add a copilot to an online Python code editor? (Score: 0)
   Posted: 2025-03-14 15:02:55.335000+00:00
   Snippet: I'm building an online Python code editor, and I want to integrate a Copilot-like AI assistant that ...

2. Using exercise platforms (Score: 2)
   Posted: 2025-03-14 14:55:01.547000+00:00
   Snippet: I'm using platforms like codewars and hackerrank to solve problems and coding. 

Often I have many d...

3. Can someone help me with basic string operations? (Score: 0)
   Posted: 2025-03-14 14:28:45.026000+00:00
   Snippet: https://www.learnpython.org/en/Basic_String_Operations 

I do not understand how "Strings are awesom...

4. how to drag the value until the next value shows (Score: 2)
   Posted: 2025-03-14 14:16:34.036000+00:00
   Snippet: I am reading this csv into pandas and I need to drag a column value until the next value in the colu...

5. Reinforcement Learning Project Ideas (Score: 2)
   Posted: 2025-03-14 13:47:46.093000+00:00
   Snippet: Hi, 

I have a 