In [5]:
import requests
import json
import csv

In [6]:
def extract_comments(comment_tree, comments_list):
    """Recursively extract comment bodies from Reddit JSON."""
    if isinstance(comment_tree, list):
        for child in comment_tree:
            extract_comments(child, comments_list)
    elif isinstance(comment_tree, dict):
        data = comment_tree.get("data", {})

        body = data.get("body")
        if body and body != "[deleted]" and body != "[removed]":
            comments_list.append(body)

        # Recurse into replies
        replies = data.get("replies")
        if isinstance(replies, dict):
            extract_comments(replies.get("data", {}).get("children", []), comments_list)

In [7]:
def scrape_reddit_post(url, output_csv="reddit_comments.csv"):
    print(f"Scraping: {url}")

    if not url.endswith(".json"):
        url = url.rstrip("/") + "/.json"

    headers = {"User-Agent": "Mozilla/5.0"}

    r = requests.get(url, headers=headers)
    data = r.json()

    print("Parsing comments...")

    comments_list = []
    comment_tree = data[1]["data"]["children"]  # comment section

    extract_comments(comment_tree, comments_list)

    print(f"Collected {len(comments_list)} comments.")

    # Save CSV
    with open(output_csv, "w", encoding="utf-8", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["comment"])
        for c in comments_list:
            writer.writerow([c])

    print(f"Saved to {output_csv}")

    return comments_list

In [8]:
# --------------------------
# RUN
# --------------------------

scrape_reddit_post(
    "https://www.reddit.com/r/TrueOffMyChest/comments/t9m7fl/i_might_die_in_7_hours_and_im_not_afraid/",
    "TrueOffMyChest_comments.csv"
)

Scraping: https://www.reddit.com/r/TrueOffMyChest/comments/t9m7fl/i_might_die_in_7_hours_and_im_not_afraid/


ConnectTimeout: HTTPSConnectionPool(host='www.reddit.com', port=443): Max retries exceeded with url: /r/TrueOffMyChest/comments/t9m7fl/i_might_die_in_7_hours_and_im_not_afraid/.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x00000216CB491E10>, 'Connection to www.reddit.com timed out. (connect timeout=None)'))