In [None]:
import asyncio
from playwright.async_api import async_playwright
import getpass
from datetime import datetime

TARGET_URL = "https://discourse.onlinedegree.iitm.ac.in/c/courses/tds-kb/34"

# Extracts the "Created" or "Latest" date from the post's title attribute
def extract_valid_date(title: str) -> datetime | None:
    lines = title.splitlines()
    date_str = None

    for line in lines:
        if "Latest:" in line:
            date_str = line.replace("Latest:", "").strip()
            break
        elif "Created:" in line:
            date_str = line.replace("Created:", "").strip()

    try:
        return datetime.strptime(date_str, "%b %d, %Y %I:%M %p")
    except Exception:
        return None

async def main(email: str, password: str):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        context = await browser.new_context()
        page = await context.new_page()

        await page.goto(TARGET_URL)

        # Login
        await page.fill('#login-account-name', email)
        await page.fill('#login-account-password', password)
        await page.click('#login-button')

        await page.wait_for_url(TARGET_URL, timeout=20000)
        print("✅ Logged in and redirected!")

        await asyncio.sleep(5)

        print("🔄 Scrolling 40 times to load more posts...")
        for i in range(40):
            await page.mouse.wheel(0, 2000)
            await asyncio.sleep(2)

        print("🔍 Extracting matching posts between Jan 1 and Apr 14, 2025...")

        start_date = datetime(2025, 1, 1)
        end_date = datetime(2025, 4, 14)

        td_elements = await page.query_selector_all('td.activity.num.topic-list-data')
        matching_links = []

        for td in td_elements:
            title = await td.get_attribute("title")
            if not title:
                continue

            post_date = extract_valid_date(title)
            if not post_date:
                continue

            if start_date <= post_date <= end_date:
                a_tag = await td.query_selector('a.post-activity')
                if a_tag:
                    href = await a_tag.get_attribute("href")
                    if href:
                        full_link = f"https://discourse.onlinedegree.iitm.ac.in{href}"
                        if full_link not in matching_links:
                            matching_links.append(full_link)

        print(f"✅ Found {len(matching_links)} posts between Jan 1 and Apr 14, 2025:")
        for link in matching_links:
            print(link)

        # Save to file
        with open("filtered_post_links.txt", "w", encoding="utf-8") as f:
            for link in matching_links:
                f.write(link + "\n")

        print("📁 Saved to filtered_post_links.txt")
        print("🕹️ Done. Browser will stay open. Press CTRL+C to exit.")
        await asyncio.Event().wait()

# Run this in a script or Jupyter
email = input("Enter your IITM email: ")
password = getpass.getpass("Enter your IITM password: ")
await main(email, password)


Enter your IITM email:  sapta
Enter your IITM password:  ········


✅ Logged in and redirected!
🔄 Scrolling 40 times to load more posts...
🔍 Extracting matching posts between Jan 1 and Apr 14, 2025...
✅ Found 112 posts between Jan 1 and Apr 14, 2025:
https://discourse.onlinedegree.iitm.ac.in/t/about-the-tools-in-data-science-category/23335/42
https://discourse.onlinedegree.iitm.ac.in/t/end-term-mock-tds-jan-25/172333/11
https://discourse.onlinedegree.iitm.ac.in/t/pyq-haversine/172546/1
https://discourse.onlinedegree.iitm.ac.in/t/what-to-do-if-peer-has-not-allowed-access-and-the-deadline-is-over-for-peer-review-in-project-2/172471/4
https://discourse.onlinedegree.iitm.ac.in/t/project-1-not-submitted-issue/172497/2
https://discourse.onlinedegree.iitm.ac.in/t/graded-assignment-6/169283/46
https://discourse.onlinedegree.iitm.ac.in/t/ga7-data-visualisation-discussion-thread-tds-jan-2025/169888/31
https://discourse.onlinedegree.iitm.ac.in/t/email-in-ga-7/172021/2
https://discourse.onlinedegree.iitm.ac.in/t/pyq-doubt/172373/2
https://discourse.onlinedegree.ii