In [23]:
import requests
import csv
import time

In [24]:
def save_csv(filename, rows):
    with open(filename, "w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["comment"])
        for r in rows:
            # Ganti semua newline/tab dengan spasi
            clean_r = r.replace("\n", " ").replace("\r", " ").replace("\t", " ").strip()
            writer.writerow([clean_r])
    print(f"Saved {len(rows)} comments to {filename}")

In [25]:
def scrape_all_comments_pullpush(post_id, max_batches, output_csv, batch_size=1000, verbose=True):
    base_url = "https://api.pullpush.io/reddit/comment/search"
    all_comments = []
    last_after = None

    for batch in range(max_batches):
        if verbose:
            print(f"\n--- Batch {batch+1}/{max_batches} ---")

        params = {"link_id": post_id, "size": batch_size}
        if last_after is not None:
            params["after"] = last_after

        resp = requests.get(base_url, params=params)
        if resp.status_code != 200:
            print("Error pada request:", resp.status_code, resp.text)
            break

        js = resp.json()
        data = js.get("data")
        if not data:
            print("Tidak ada data lagi ‚Üí stop pagination.")
            break

        # Proses komentar batch ini
        for i, c in enumerate(data):
            body = c.get("body")
            if body and body not in ("[deleted]", "[removed]"):
                # Hapus newline supaya CSV satu baris per comment
                clean = body.replace("\n", " ").replace("\r", " ").strip()
                all_comments.append(clean)

        if verbose:
            print(f" ‚Üí Diperoleh {len(data)} komentar di batch ini; total sekarang: {len(all_comments)}")

        # Pagination: gunakan `created_utc` dari comment terakhir
        try:
            last_after = data[-1]["created_utc"]
        except KeyError:
            print("Tidak bisa ambil created_utc dari data untuk pagination. Berhenti.")
            break

        time.sleep(1)  # jeda agar tidak spam API

    save_csv(output_csv, all_comments)
    # Simpan ke CSV
    # with open(output_csv, "w", encoding="utf-8", newline="") as f:
    #     writer = csv.writer(f)
    #     writer.writerow(["comment"])
    #     for c in all_comments:
    #         writer.writerow([c])

    print(f"Selesai! Total komentar: {len(all_comments)}. Disimpan ke `{output_csv}`")
    return all_comments

In [26]:
# post_id = "406d65"  # bagian dari URL /comments/406d65/‚Ä¶
# scrape_all_comments_pullpush(post_id, 250, "what_is_something_someone_said_that_changed_your_perspective.csv")

# https://www.reddit.com/r/AskReddit/comments/406d65/what_is_something_someone_said_that_changed_your_perspective/

In [27]:
post_id = "dkjimg"  # bagian dari URL /comments/406d65/‚Ä¶
scrape_all_comments_pullpush(post_id, 400, "what_screams_im_very_insecure.csv")

# https://www.reddit.com/r/AskReddit/comments/dkjimg/what_screams_im_very_insecure/


--- Batch 1/400 ---
 ‚Üí Diperoleh 100 komentar di batch ini; total sekarang: 92

--- Batch 2/400 ---
 ‚Üí Diperoleh 100 komentar di batch ini; total sekarang: 184

--- Batch 3/400 ---
 ‚Üí Diperoleh 100 komentar di batch ini; total sekarang: 276

--- Batch 4/400 ---
 ‚Üí Diperoleh 100 komentar di batch ini; total sekarang: 368

--- Batch 5/400 ---
 ‚Üí Diperoleh 100 komentar di batch ini; total sekarang: 460

--- Batch 6/400 ---
 ‚Üí Diperoleh 100 komentar di batch ini; total sekarang: 552

--- Batch 7/400 ---
 ‚Üí Diperoleh 100 komentar di batch ini; total sekarang: 644

--- Batch 8/400 ---
 ‚Üí Diperoleh 100 komentar di batch ini; total sekarang: 736

--- Batch 9/400 ---
 ‚Üí Diperoleh 100 komentar di batch ini; total sekarang: 828

--- Batch 10/400 ---
 ‚Üí Diperoleh 100 komentar di batch ini; total sekarang: 920

--- Batch 11/400 ---
 ‚Üí Diperoleh 100 komentar di batch ini; total sekarang: 1012

--- Batch 12/400 ---
 ‚Üí Diperoleh 100 komentar di batch ini; total sekarang: 1104


['‚ÄúAww, gonna cry?‚Äù',
 "Posting 5+ pictures on social media every day, bonus points if they're selfies",
 '"Oh wow! Your drawing looks so cool! That must have took forever to do!"  "Oh, haha, it\'s not THAT good. The face isn\'t really good and the eyes are off and-"',
 'Blew on u? What does that mean?',
 '6th graders at my school',
 "Grandstanding. If you are good, you're good. Everybody will acknowledge that without you calling attention to it.",
 '"What screams" questions.',
 "I underestimate myself a lot because I feel if I try to get cocky or be a bit too overconfident of myself I'll crash and burn. It's not the best mindset because it keeps me in this shell of being too scared to try things.",
 'My history',
 "That one r/AmItheAsshole post where the OP didn't want his girlfriend to change in front of a gay guy but was fine with her changing in front of her lesbian friend because she was straight",
 'Apparently, innocently asking on here why some girls only date tall guys',
 '