In [1]:
import praw
import csv
from dotenv import load_dotenv
import os

In [2]:
load_dotenv()

True

In [4]:

class RedditScraper:
    def __init__(self, client_id, client_secret, user_agent, subreddit_name):
        self.reddit = praw.Reddit(client_id=client_id,
                                  client_secret=client_secret,
                                  user_agent=user_agent)
        self.subreddit = self.reddit.subreddit(subreddit_name)
        self.batch_size = 900
        self.num_batches = 15

    def fetch_posts(self):
        post_data = []

        for _ in range(self.num_batches):
            # Get posts in batches of 300
            submissions = list(self.subreddit.hot(limit=self.batch_size, params={'after': self.get_last_post_id(post_data)}))

            if not submissions:
                break

            for submission in submissions:
                post_data.append([submission.title, submission.selftext, submission.id])

        return post_data

    def get_last_post_id(self, post_data):
        if post_data:
            return post_data[-1][2]
        else:
            return None

    def save_to_csv(self, post_data, file_name):
        with open(file_name, mode='w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(['Title', 'Selftext', 'Post ID'])  # Write header row
            writer.writerows(post_data)  # Write post data


In [5]:

if __name__ == "__main__":
    scraper = RedditScraper(
        client_id=os.getenv('REDDIT_CLIENT_ID'),
        client_secret=os.getenv('REDDIT_CLIENT_SECRET'),
        user_agent=os.getenv('REDDIT_USER_AGENT'),
        subreddit_name='AnorexiaNervosa'
    )

    post_data = scraper.fetch_posts()

    if post_data:
        scraper.save_to_csv(post_data, 'r_AnorexiaNervosa_posts.csv')
        print(f'Data saved to r_AnorexiaNervosa_posts.csv')
    else:
        print('No more posts to fetch.')


Data saved to r_AnorexiaNervosa_posts_2.csv
