In [1]:
import requests
from requests.auth import HTTPBasicAuth
from collections import Counter

# ── 1. Fill in your app credentials ───────────────────────────────────────────
CLIENT_ID     = "77pr5LsRNFJ8yATCEcu-ZQ"
CLIENT_SECRET = "6YIdUdDDgr8IteWLz1vAVhLekT4mQQ"
USER_AGENT    = "MyApp/0.1 by YOUR_REDDIT_USERNAME"

# ── 2. Get a bearer token via client-credentials grant ────────────────────────
auth = HTTPBasicAuth(CLIENT_ID, CLIENT_SECRET)
data = {"grant_type": "client_credentials"}
headers = {"User-Agent": USER_AGENT}
token_res = requests.post(
    "https://www.reddit.com/api/v1/access_token",
    auth=auth, data=data, headers=headers
).json()
bearer = token_res["access_token"]

# ── 3. Page through r/all/hot, 100 posts at a time ───────────────────────────
hot_url = "https://oauth.reddit.com/r/all/hot"
headers["Authorization"] = f"bearer {bearer}"

after = None
counts = Counter()
MAX_PAGES = 20       # 20 pages × 100 posts = 2 000 posts total
for page in range(MAX_PAGES):
    params = {"limit": 100}
    if after:
        params["after"] = after

    resp = requests.get(hot_url, headers=headers, params=params)
    resp.raise_for_status()
    listing = resp.json()["data"]

    posts = listing["children"]
    if not posts:
        break

    # tally each post’s subreddit
    for post in posts:
        counts[post["data"]["subreddit"]] += 1

    after = listing.get("after")
    if not after:
        break

# ── 4. Extract the top 1 000 subreddits by post-count ─────────────────────────
top_1000 = [sub for sub, _ in counts.most_common(1000)]

# ── 5. Done! Print or save as you like ───────────────────────────────────────
print(f"Found {len(top_1000)} unique subreddits. Here are the top 20:")
for i, sub in enumerate(top_1000[:20], 1):
    print(f"{i:>3}. {sub} ({counts[sub]} posts)")


Found 1000 unique subreddits. Here are the top 20:
  1. interestingasfuck (5 posts)
  2. facepalm (5 posts)
  3. MadeMeSmile (4 posts)
  4. politics (4 posts)
  5. meirl (4 posts)
  6. MurderedByWords (4 posts)
  7. SipsTea (4 posts)
  8. pics (4 posts)
  9. clevercomebacks (4 posts)
 10. Fauxmoi (4 posts)
 11. WhitePeopleTwitter (4 posts)
 12. worldnews (4 posts)
 13. AITAH (4 posts)
 14. europe (4 posts)
 15. Unexpected (4 posts)
 16. shitposting (4 posts)
 17. whenthe (4 posts)
 18. CuratedTumblr (4 posts)
 19. anime_irl (4 posts)
 20. cats (4 posts)


In [1]:
import requests
import json
from requests.auth import HTTPBasicAuth
import pandas as pd

# ── 1. Your Reddit app credentials ────────────────────────────────────────────
CLIENT_ID     = "77pr5LsRNFJ8yATCEcu-ZQ"
CLIENT_SECRET = "6YIdUdDDgr8IteWLz1vAVhLekT4mQQ"
USER_AGENT    = "MyApp/0.1 by YOUR_REDDIT_USERNAME"

# ── 2. Get an OAuth bearer token ─────────────────────────────────────────────
auth = HTTPBasicAuth(CLIENT_ID, CLIENT_SECRET)
data = {"grant_type": "client_credentials"}
headers = {"User-Agent": USER_AGENT}

token_res = requests.post(
    "https://www.reddit.com/api/v1/access_token",
    auth=auth, data=data, headers=headers
)
token_res.raise_for_status()
bearer = token_res.json()["access_token"]

# ── 3. Page through r/all/hot until you have 1 000 posts ────────────────────
hot_url = "https://oauth.reddit.com/r/all/hot"
headers["Authorization"] = f"bearer {bearer}"

all_posts = []
after = None

while len(all_posts) < 1000:
    to_fetch = min(100, 1000 - len(all_posts))
    params = {"limit": to_fetch}
    if after:
        params["after"] = after

    res = requests.get(hot_url, headers=headers, params=params)
    res.raise_for_status()
    data = res.json()["data"]
    children = data.get("children", [])
    if not children:
        break

    all_posts.extend(children)
    after = data.get("after")
    if not after:
        break

# trim just in case
all_posts = all_posts[:1000]

# ── 4. Normalize and save to CSV ─────────────────────────────────────────────
# Extract the inner "data" dict from each post
posts_data = [post["data"] for post in all_posts]

# Flatten into a DataFrame
df = pd.json_normalize(posts_data)

# Write out to CSV
output_path = "reddit_hot_posts.csv"
df.to_csv(output_path, index=False)

print(f"Saved {len(df)} posts to {output_path}")


Saved 1000 posts to reddit_hot_posts.csv
