<a href="https://colab.research.google.com/github/AMElashal26/AMElashal26/blob/main/Python_Reddit_Scraper_S.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# reddit_scraper.py
#
# Description:
# This script uses the Python Reddit API Wrapper (PRAW) to scrape posts and
# comments from a specified subreddit and saves them to a text file.
#
# Prerequisites:
# 1. Python 3.6+
# 2. PRAW library: Install it using pip -> pip install praw
# 3. A Reddit account.
#
# How to get Reddit API Credentials:
# 1. Go to https://www.reddit.com/prefs/apps
# 2. Log in with your Reddit account.
# 3. Scroll to the bottom and click "are you a developer? create an app..."
# 4. Fill out the form:
#    - name: Give your script a unique name (e.g., my_python_scraper_v1)
#    - type: Select "script"
#    - description: (Optional) A brief description.
#    - about url: (Optional)
#    - redirect uri: http://localhost:8080 (This is required for script apps)
# 5. Click "create app".
# 6. You will now see your app's credentials.
#    - The string of characters under your app name is the "client_id".
#    - The string labeled "secret" is the "client_secret".
# 7. Copy these values into the script below.

import praw
import argparse
import sys
from datetime import datetime

def scrape_reddit(subreddit_name, post_limit, comment_limit):
    """
    Scrapes a subreddit for a given number of posts and comments.

    Args:
        subreddit_name (str): The name of the subreddit to scrape.
        post_limit (int): The maximum number of posts to fetch.
        comment_limit (int): The maximum number of comments to fetch per post.
    """
    print("Starting the Reddit scraper...")

    # --- IMPORTANT: PASTE YOUR REDDIT API CREDENTIALS HERE ---
    try:
        reddit = praw.Reddit(
            client_id="YOUR_CLIENT_ID", # Replace with your client ID
            client_secret="YOUR_CLIENT_SECRET", # Replace with your client secret
            user_agent="MyScraper/1.0 by u/YourUsername", # Replace with your app name and Reddit username
        )
        # Verify that the connection is read-only and successful
        print(f"API Connection Read-Only: {reddit.read_only}")
    except Exception as e:
        print(f"Error connecting to Reddit API: {e}")
        print("Please ensure your client_id, client_secret, and user_agent are correct.")
        sys.exit(1)


    try:
        # Get the subreddit instance
        subreddit = reddit.subreddit(subreddit_name)
        # Check if the subreddit exists
        subreddit.hot(limit=1)
        print(f"Successfully connected to r/{subreddit_name}")
    except Exception as e:
        print(f"Error: Could not access subreddit 'r/{subreddit_name}'.")
        print(f"Please check if the subreddit name is correct and public. Details: {e}")
        sys.exit(1)

    # Define the output filename
    filename = f"{subreddit_name}_posts_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"

    print(f"Fetching {post_limit} hot posts from r/{subreddit_name}...")

    # Open the file to write the scraped data
    with open(filename, "w", encoding="utf-8") as f:
        f.write(f"--- Scraped Data from r/{subreddit_name} on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ---\n\n")

        post_count = 0
        # Fetch the 'hot' posts from the subreddit
        for post in subreddit.hot(limit=post_limit):
            post_count += 1
            # Write post details to the file
            f.write("="*80 + "\n")
            f.write(f"POST #{post_count}: {post.title}\n")
            f.write(f"URL: https://www.reddit.com{post.permalink}\n")
            f.write(f"SCORE: {post.score}\n")
            f.write(f"AUTHOR: u/{post.author}\n")
            f.write("-"*80 + "\n")

            # Write the post's main content (selftext) if it exists
            if post.selftext:
                f.write("POST CONTENT:\n")
                f.write(post.selftext + "\n")
                f.write("-"*80 + "\n")

            f.write(f"TOP {comment_limit} COMMENTS:\n\n")

            # Fetch the comments for the post
            # 'submission.comments.replace_more(limit=0)' removes "load more comments" links
            post.comments.replace_more(limit=0)
            comment_count = 0
            for comment in post.comments.list():
                if comment_count >= comment_limit:
                    break
                if not comment.stickied: # Ignore stickied/moderator comments
                    f.write(f"  Comment #{comment_count + 1} | Score: {comment.score} | Author: u/{comment.author}\n")
                    # Replace newlines in comment body to keep formatting clean
                    comment_body = comment.body.replace('\n', '\n  ')
                    f.write(f"  > {comment_body}\n\n")
                    comment_count += 1

            f.write("\n\n") # Add space before the next post
            print(f"  > Processed Post #{post_count}: '{post.title[:50]}...'")

    print("="*80)
    print("Scraping complete!")
    print(f"Data for {post_count} posts saved to '{filename}'")
    print("="*80)


if __name__ == "__main__":
    # Set up command-line argument parsing
    parser = argparse.ArgumentParser(description="Scrape posts and comments from a subreddit.")
    parser.add_argument("subreddit", help="The name of the subreddit to scrape (e.g., 'python').")
    parser.add_argument("-p", "--posts", type=int, default=10, help="The number of posts to scrape (default: 10).")
    parser.add_argument("-c", "--comments", type=int, default=5, help="The number of comments to scrape per post (default: 5).")

    args = parser.parse_args()

    # Run the scraper function with the provided arguments
    scrape_reddit(args.subreddit, args.posts, args.comments)

# --- How to Run the Script ---
# 1. Save this file as "reddit_scraper.py".
# 2. Open your terminal or command prompt.
# 3. Navigate to the directory where you saved the file.
# 4. Run the script using a command like this:
#
#    python reddit_scraper.py python -p 25 -c 10
#
#    - "python" is the subreddit name.
#    - "-p 25" sets the post limit to 25.
#    - "-c 10" sets the comment limit to 10 per post.
#
# 5. A .txt file will be created in the same directory.