Hot: emphasizes recent activity and engagement, showcasing posts that are currently popular.

Top: emphasizes posts with the highest overall scores within a defined time frame, allowing users to see the most popular posts over different periods.

In [6]:
import json

import praw
from praw.models import MoreComments
from datetime import datetime

class RedditReply:

    def __init__(self, reply_object):
        self.id = reply_object.id
        if reply_object.author:
            self.author = reply_object.author.name
        else:
            self.author = None
        self.body = reply_object.body
        timestamp = reply_object.created_utc
        datetime_obj = datetime.utcfromtimestamp(timestamp)
        formatted_date = datetime_obj.strftime('%Y-%m-%d %H:%M:%S')
        self.timestamp = formatted_date
        
    def to_dict(self):
        return {'reply_id': self.id, 
                'author': self.author,
                'timestamp': self.timestamp, 
                'body': self.body}

    
class RedditComment:
  
    def __init__(self, comment_object, MAX_REPLIES=100):
        self.MAX_REPLIES = MAX_REPLIES

        self.id = comment_object.id
        
        if comment_object.author:
            self.author = comment_object.author.name
        else:
            self.author = None
            
        timestamp = comment_object.created_utc
        datetime_obj = datetime.utcfromtimestamp(timestamp)
        formatted_date = datetime_obj.strftime('%Y-%m-%d %H:%M:%S')
        self.timestamp = formatted_date
        self.body = comment_object.body
        self.replies = []
        self.add_replies(comment_object.replies)
        
    def add_replies(self, comment_replies):
        for reply in comment_replies[:self.MAX_REPLIES]:
            if isinstance(reply, MoreComments):
                continue
            reply = RedditReply(reply)
            self.replies.append(reply.to_dict())
            
    def to_dict(self):
        return {'comment_id': self.id, 
                'author': self.author,
                'timestamp': self.timestamp, 
                'body': self.body, 
                "replies": self.replies}


class RedditPost:

    def __init__(self, post_object, MAX_COMMENTS=100, MAX_REPLIES=100):
        self.MAX_COMMENTS = MAX_COMMENTS
        self.MAX_REPLIES = MAX_REPLIES
        
        self.id = post_object.id
        self.title = post_object.title
        
        if post_object.author:
            self.author = post_object.author.name
        else:
            self.author = None
            
        timestamp = post_object.created_utc
        datetime_obj = datetime.utcfromtimestamp(timestamp)
        formatted_date = datetime_obj.strftime('%Y-%m-%d %H:%M:%S')
        self.timestamp = formatted_date
        self.body = post_object.selftext
        self.score = post_object.ups
        self.downs = post_object.downs
        self.total_comments = post_object.num_comments
        
        self.comments = []
        self.add_comments(post_object.comments)
        
    def add_comments(self, comments):
        for comment in comments[:self.MAX_COMMENTS]:
            if isinstance(comment, MoreComments):
                continue
            
            comment = RedditComment(comment, self.MAX_REPLIES)
            self.comments.append(comment.to_dict())
    
    def to_dict(self):
        return {'post_id': self.id, 
                'title': self.title, 
                'author': self.author,
                'timestamp': self.timestamp, 
                'body': self.body, 
                'score': self.score,
                'downs': self.downs, 
                'total_comments': self.total_comments, 
                'comments': self.comments}

reddit = praw.Reddit(client_id='dcqH40fBNkFA',
                     client_secret='490LJjcq753VMPfEMl1R4tuwQ',
                     user_agent="Scraper 1 by u/Prashat")

subreddit_name = 'Anxiety'
limit = None

# List to store post data
all_posts_data = []

# Get submissions from the subreddit
subreddit = reddit.subreddit(subreddit_name)
for submission in subreddit.hot(limit=limit):
    post_data = RedditPost(submission)
    all_posts_data.append(post_data.to_dict())

# Define the filename for the JSON file
json_filename = "reddit_data.json"

# Write post data to JSON file
with open(json_filename, "w") as json_file:
    json.dump(all_posts_data, json_file, indent=4)

print(f"Data saved to {json_filename}")


Data saved to reddit_data.json
