In [20]:
import json
import logging
import praw

logger = logging.getLogger(__name__)

class SubRedditInfoScraper:
    def __init__(self, client_id, client_secret, password, user_agent, username):
        self.reddit = praw.Reddit(
            client_id=client_id,
            client_secret=client_secret,
            password=password,
            user_agent=user_agent,
            username=username,
        )

    def get_subreddit_info(
        self,
        subreddit_name,
        limit=10,
        time_filter="all",
        filter_locked=True,
        filter_mod=False,
        filter_stickied=True,
        filter_original_content=False,
        filter_nsfw=False,
        min_upvotes=None,
        min_num_comments=None,
        min_upvote_ratio=None,
    ):
        if time_filter not in ["all", "day", "hour", "month", "week", "year"]:
            raise ValueError("time_filter must be one of 'all', 'day', 'hour', 'month', 'week', 'year'")
        subreddit = self.reddit.subreddit(subreddit_name)
        logger.info(f"Getting top {limit} posts from {subreddit_name} in {time_filter} time filter")
        info_list = []
        for submission in subreddit.top(limit=limit, time_filter=time_filter):
            if (
                (not filter_locked or not submission.locked)
                and (not filter_mod or submission.distinguished is None)
                and (not filter_stickied or not submission.stickied)
                and (not filter_original_content or submission.is_original_content)
                and (not filter_nsfw or not submission.over_18)
                and (min_upvotes is None or submission.score >= min_upvotes)
                and (min_num_comments is None or submission.num_comments >= min_num_comments)
                and (min_upvote_ratio is None or submission.upvote_ratio >= min_upvote_ratio)
            ):
                info_dict = {
                    "created_utc": submission.created_utc,
                    "distinguished": submission.distinguished,
                    "id": submission.id,
                    "is_original_content": submission.is_original_content,
                    "link_flair_text": submission.link_flair_text,
                    "locked": submission.locked,
                    "name": submission.name,
                    "num_comments": submission.num_comments,
                    "nsfw": submission.over_18,
                    "permalink": submission.permalink,
                    "score": submission.score,
                    "selftext": submission.selftext,
                    "spoiler": submission.spoiler,
                    "stickied": submission.stickied,
                    "title": submission.title,
                    "upvote_ratio": submission.upvote_ratio,
                    "url": submission.url,
                }
                info_list.append(info_dict)
        logger.info(f"Found {len(info_list)} posts in {subreddit_name} with the given filters")
        return info_list


reddit_creds = json.load(open("viddit/reddit_credentials.json"))
subreddit_scraper = SubRedditInfoScraper(
    reddit_creds["client_id"],
    reddit_creds["client_secret"],
    reddit_creds["password"],
    reddit_creds["user_agent"],
    reddit_creds["username"],
)
posts = subreddit_scraper.get_subreddit_info(
    "conspiracy",
    limit=5,
    time_filter="all",
    filter_locked=True,
    filter_mod=False,
    filter_stickied=True,
    filter_original_content=False,
    filter_nsfw=False,
    min_upvotes=50,
    min_num_comments=10,
    min_upvote_ratio=0.85,
)
print(posts)

[{'created_utc': 1566399585.0, 'distinguished': None, 'id': 'cti1q4', 'is_original_content': False, 'link_flair_text': None, 'locked': False, 'name': 't3_cti1q4', 'num_comments': 2779, 'nsfw': True, 'permalink': '/r/conspiracy/comments/cti1q4/this_rare_image_featuring_the_victims_of_the_mass/', 'score': 92759, 'selftext': '', 'spoiler': False, 'stickied': False, 'title': 'This rare image, featuring the victims of the mass slaughter of peaceful protesters by the Chinese army at Tiananmen Square, was just censored from the front page of reddit with 134,000 net upvotes.', 'upvote_ratio': 0.95, 'url': 'https://i.redd.it/43kffo8bgth31.jpg'}, {'created_utc': 1522543474.0, 'distinguished': None, 'id': '88n2td', 'is_original_content': False, 'link_flair_text': None, 'locked': False, 'name': 't3_88n2td', 'num_comments': 2147, 'nsfw': False, 'permalink': '/r/conspiracy/comments/88n2td/this_was_deleted_twice_from_reddits_front_page/', 'score': 73663, 'selftext': '', 'spoiler': False, 'stickied': 