In [1]:
# load credentials
import praw
import time
from requests.exceptions import RequestException
import pandas as pd
from dotenv import load_dotenv
import os
load_dotenv()

True

In [2]:
# setup reddit credentials
user_agent =os.environ.get('USER_AGENT')
reddit = praw.Reddit(
    client_id=os.environ.get('CLIENT_ID'),
    client_secret=os.environ.get('CLIENT_SECRET'),
    user_agent=user_agent
)

In [3]:
# Function to get posts from a subreddit
def get_subreddit_posts(subreddit_name, limit=2000, retries=5):
    attempt = 0
    posts_df = []
    
    while attempt < retries:
        try:
            posts = reddit.subreddit(subreddit_name).top(time_filter="all", limit=limit)
            for post in posts:
                posts_df.append({
                    'post_id': post.id,
                    'subreddit': str(post.subreddit),
                    'created_utc': post.created_utc,
                    'selftext': post.selftext,
                    'post_url': post.url,
                    'post_title': post.title,
                    'link_flair_text': post.link_flair_text,
                    'score': post.score,
                    'num_comments': post.num_comments,
                    'upvote_ratio': post.upvote_ratio,
                    'post_author': str(post.author)
                })
            break
        except RequestException as e:
            print(f"Request failed: {e}. Retrying in 5 seconds...")
            time.sleep(5)
            attempt += 1

    return pd.DataFrame(posts_df)

In [4]:
# Function to get comments for a list of posts
def get_comments_for_posts(post_ids, retries=5):
    comment_list = []

    for post_id in post_ids:
        attempt = 0
        while attempt < retries:
            try:
                submission = reddit.submission(post_id)
                submission.comments.replace_more(limit=None)
                for comment in submission.comments.list():
                    comment_list.append({
                        'post_id': post_id,
                        'comment': comment.body,
                        'author': str(comment.author)
                    })
                break
            except RequestException as e:
                print(f"Request failed: {e}. Retrying in 5 seconds...")
                time.sleep(5)
                attempt += 1

    return pd.DataFrame(comment_list)

In [5]:
# Get posts and comments from the specified subreddits
subreddits = 'Periods+WomensHealth+Healthyhooha+TwoXSupport+WomenHealthTreatments'
posts_df = get_subreddit_posts(subreddits)

In [6]:
posts_df.head()

Unnamed: 0,post_id,subreddit,created_utc,selftext,post_url,post_title,link_flair_text,score,num_comments,upvote_ratio,post_author
0,kf0d1g,WomensHealth,1608222000.0,"Hey! I spoke with them on the phone, and they ...",https://www.reddit.com/r/WomensHealth/comments...,Please sign petition to get nerves in the clit...,,2466,183,1.0,jessica_pin
1,dwup3z,Healthyhooha,1573842000.0,,https://i.redd.it/ua4z8e477wy31.jpg,I know this is apart of the standardized proce...,,2407,49,0.99,
2,hamqgj,Periods,1592380000.0,,https://i.redd.it/h83zzjacdf551.jpg,I'm sure every person here relates.,Fluff,2357,16,1.0,noorhaider97
3,j8mxtl,Periods,1602346000.0,,https://i.redd.it/2fo1u3ioias51.jpg,🩸,Fluff,2176,16,1.0,
4,mqc0v5,Periods,1618349000.0,,https://i.redd.it/83x11rfxd0t61.jpg,why is this so true?,Fluff,2113,72,1.0,KitKat1287


In [7]:
# Save posts DataFrame to CSV
posts_df.to_csv('reddit_posts.csv', header=True, index=False)

In [8]:
# Get comments DataFrame
post_ids = posts_df['post_id']
comments_df = get_comments_for_posts(post_ids)

comments_df.head()


Unnamed: 0,post_id,comment,author
0,kf0d1g,Oh no. Oh no NO. I’m so sorry. Signed sealed d...,ohhh_RaMoannn
1,kf0d1g,Why are vaginas still taboo??? I don’t get it.,Dracalia
2,kf0d1g,*Has this been cross posted to r/twoxchromosom...,bex505
3,kf0d1g,"How does their excuse that it ""doesn't fit the...",shanaenae91
4,kf0d1g,Signed and donated 5000 shares. You are a saint!,BooBooCanoe


In [9]:
# Save comments DataFrame to CSV
comments_df.to_csv('reddit_post_comments.csv', header=True, index=False)