# Reddit Comment Scraper

## Installation and Imports

In [None]:
pip install praw

In [None]:
import praw
import json
import re

## API Authentication and Post List

In [None]:
# Reddit API credentials


# List of post IDs
post_ids = [
    "j1ap5g", "14xw68c", "1feyfmx", "15g1thv", "yt1gwc", "16thg4o",
    "1cgtigf", "bbi3cb", "16x13cy", "pj00hn", "utiezl", "pzibsc",
    "1d3ersq", "ix5pbo", "1bloq77", "e8gvgk", "169eb31", "jssp35"
]

## Preprocessing

In [None]:
maltese_comments = []

def preprocess_text(text):
    # Removing numbering (e.g., "1. Text")
    text = re.sub(r'^\d+\.\s*', '', text, flags=re.MULTILINE)

    # Removing bullet points (•, -, *, →, etc.)
    text = re.sub(r'^[•\-\*\→]+\s*', '', text, flags=re.MULTILINE)

    # Ensure sentence separation: Add a period if a line doesn't end with punctuation
    lines = text.split("\n")  # Split into separate lines
    cleaned_lines = []

    for line in lines:
        line = line.strip()
        if line and not re.search(r'[.!?]$', line):  # If missing punctuation, add `.`
            line += '.'
        cleaned_lines.append(line)

    # Rejoin lines into a proper paragraph
    text = " ".join(cleaned_lines)

    return text

## Scraping Loop

In [None]:
# Looping through the provided post IDs
for post_id in post_ids:
    post = reddit.submission(id=post_id)  # Get the post by ID
    post.comments.replace_more(limit=0)  # Load all comments

    for comment in post.comments.list():
        try:
            text = comment.body.strip()
            clean_text = preprocess_text(text)

            # Append the cleaned comment
            maltese_comments.append({"content": clean_text})

        except Exception as e:
            print(f"Error processing comment: {e}")  # Debugging info
            continue  # Ignore errors (e.g., empty comments)

## Saving

In [None]:
# Saving as JSON
with open("reddit_comments.json", "w", encoding="utf-8") as f:
    json.dump(maltese_comments, f, ensure_ascii=False, indent=4)

print(f"Collected {len(maltese_comments)} Maltese comments!")

Collected 654 Maltese comments!
