In [18]:
import os
import re
import requests
from urllib.parse import urljoin, urlparse

def scrape_month(month):
    base_url = f"https://tds.s-anand.net/{month}/"
    sidebar_url = urljoin(base_url, "_sidebar.md")

    # Create output folder
    os.makedirs(month, exist_ok=True)

    print(f"\n📥 Fetching sidebar from: {sidebar_url}")
    resp = requests.get(sidebar_url)
    if resp.status_code != 200:
        print(f"❌ Sidebar not found for {month}")
        return

    sidebar_md = resp.text

    # Find all markdown links (e.g., ../fastapi.md, README.md)
    links = re.findall(r'\]\((.*?)\)', sidebar_md)

    print(f"🔗 Found {len(links)} links in sidebar")

    for link in links:
        if not link.endswith(".md"):
            continue

        full_url = urljoin(base_url, link)
        filename = os.path.basename(urlparse(link).path)

        print(f"⬇️ Downloading {filename} from {full_url}")
        md_resp = requests.get(full_url)
        if md_resp.status_code == 200:
            with open(os.path.join(month, filename), "w", encoding="utf-8") as f:
                f.write(md_resp.text)
        else:
            print(f"⚠️ Failed to fetch {full_url} (status {md_resp.status_code})")

# Loop through all valid months (adjust as needed)
months = [
    "2025-01", "2025-02", "2025-03", "2025-04",
    "2024-12", "2024-11", "2024-10"
]

for m in months:
    scrape_month(m)





📥 Fetching sidebar from: https://tds.s-anand.net/2025-01/_sidebar.md
🔗 Found 111 links in sidebar
⬇️ Downloading README.md from https://tds.s-anand.net/2025-01/README.md
⬇️ Downloading development-tools.md from https://tds.s-anand.net/development-tools.md
⬇️ Downloading vscode.md from https://tds.s-anand.net/vscode.md
⬇️ Downloading uv.md from https://tds.s-anand.net/uv.md
⬇️ Downloading npx.md from https://tds.s-anand.net/npx.md
⬇️ Downloading unicode.md from https://tds.s-anand.net/unicode.md
⬇️ Downloading devtools.md from https://tds.s-anand.net/devtools.md
⬇️ Downloading css-selectors.md from https://tds.s-anand.net/css-selectors.md
⬇️ Downloading json.md from https://tds.s-anand.net/json.md
⬇️ Downloading bash.md from https://tds.s-anand.net/bash.md
⬇️ Downloading spreadsheets.md from https://tds.s-anand.net/spreadsheets.md
⬇️ Downloading sqlite.md from https://tds.s-anand.net/sqlite.md
⬇️ Downloading git.md from https://tds.s-anand.net/git.md
⬇️ Downloading deployment-tools.md 

In [19]:
import os
import re
import requests
from urllib.parse import urljoin, urlparse

BASE_URL = "https://tds.s-anand.net/"
SIDEBAR_URL = urljoin(BASE_URL, "_sidebar.md")
OUTPUT_DIR = "scraped_tds"

# Make output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Get _sidebar.md
print(f"📥 Fetching sidebar from: {SIDEBAR_URL}")
resp = requests.get(SIDEBAR_URL)
if resp.status_code != 200:
    print("❌ Could not fetch _sidebar.md")
    exit(1)

sidebar_md = resp.text
# Extract all links
links = re.findall(r'\]\(([^)]+\.md)\)', sidebar_md)
print(f"🔗 Found {len(links)} markdown links.")

# Download each markdown file
for link in links:
    full_url = urljoin(BASE_URL, link)
    filename = os.path.basename(urlparse(link).path)
    local_path = os.path.join(OUTPUT_DIR, filename)

    print(f"⬇️ Downloading {filename} from {full_url}")
    md_resp = requests.get(full_url)
    if md_resp.status_code == 200:
        with open(local_path, "w", encoding="utf-8") as f:
            f.write(md_resp.text)
    else:
        print(f"⚠️ Failed to fetch {full_url} (status {md_resp.status_code})")


📥 Fetching sidebar from: https://tds.s-anand.net/_sidebar.md
🔗 Found 109 markdown links.
⬇️ Downloading README.md from https://tds.s-anand.net/README.md
⬇️ Downloading development-tools.md from https://tds.s-anand.net/development-tools.md
⬇️ Downloading vscode.md from https://tds.s-anand.net/vscode.md
⬇️ Downloading github-copilot.md from https://tds.s-anand.net/github-copilot.md
⬇️ Downloading uv.md from https://tds.s-anand.net/uv.md
⬇️ Downloading npx.md from https://tds.s-anand.net/npx.md
⬇️ Downloading unicode.md from https://tds.s-anand.net/unicode.md
⬇️ Downloading devtools.md from https://tds.s-anand.net/devtools.md
⬇️ Downloading css-selectors.md from https://tds.s-anand.net/css-selectors.md
⬇️ Downloading json.md from https://tds.s-anand.net/json.md
⬇️ Downloading bash.md from https://tds.s-anand.net/bash.md
⬇️ Downloading llm.md from https://tds.s-anand.net/llm.md
⬇️ Downloading spreadsheets.md from https://tds.s-anand.net/spreadsheets.md
⬇️ Downloading sqlite.md from https:/