# Get The Top Subreddits By Subscriber Count

The top Subreddits does not include any that are private/quarantined. This does include NSFW subreddits.

Data:
* From: 2021-11-19
* Source: https://frontpagemetrics.com/list-all-subreddits

New Data Stored In: top_subreddits.json

In [1]:
import gzip
import praw
from config import *
from csv import reader
from collections import defaultdict
from prawcore.exceptions import Forbidden

In [2]:
# Will map a subreddit to its popularity
subreddit_popularity = defaultdict(int)

# The total amount of subreddits that will be returned
n = 500

In [3]:
# Open the subreddit popularity data
with gzip.open("../data/2021-11-19.csv.gz", "rt", errors="ignore") as read_obj:
    # pass the file object to reader() to get the reader object
    csv_reader = reader(read_obj)
    
    # Skip the header
    next(csv_reader)
    
    # Iterate over each row in the csv using reader object
    for row in csv_reader:
        subreddit = row[0]
        subscribers = row[-1]
        
        subreddit_popularity[subreddit] = subscribers

In [4]:
# Sort the subreddits by their subscriber count
subreddit_popularity = sorted(subreddit_popularity.items(), key=lambda x: int(x[1]), reverse=True)

In [5]:
# Initialize PRAW API
reddit = praw.Reddit(
    client_id=CLIENT_ID,
    client_secret=CLIENT_SECRET,
    user_agent=USER_AGENT,
    username=REDDIT_USERNAME,
    password=REDDIT_PASSWORD,
)

In [6]:
# Remove any private subreddits from the list
remove_list = []

# Only remove subreddits until n public subreddits are in the top
m = 0

for i in range(len(subreddit_popularity)):
    # Edge Case: Stop once n amount of subreddits are in the top
    if m == n:
        break

    subreddit = subreddit_popularity[i][0]
    
    # Edge Case: Remove r/announcements due to barely 5 posts this year
    if subreddit == "announcements":
        remove_list.append(i)
        continue
    
    # Try to find subreddits that are locked/quarantined
    try:
        for submission in reddit.subreddit(subreddit).top('year', limit=5):
            pass
        m += 1
    except Forbidden:
        print("Locked/Quarantined Subreddit: {}".format(subreddit))
        remove_list.append(i)

Locked/Quarantined Subreddit: BlackPeopleTwitter
Locked/Quarantined Subreddit: ImGoingToHellForThis


In [None]:
for i in remove_list:
    subreddit_popularity.pop(i)

In [None]:
file = open("../data/top_subreddits.json", "w")
file.write("{\n")

for i, subreddit in enumerate(subreddit_popularity[:n]):
    if i < (n - 1):
        file.write("\t\"{}\":{},\n".format(subreddit[0], subreddit[1]))
    else:
        file.write("\t\"{}\":{}\n".format(subreddit[0], subreddit[1]))
        
file.write("}\n")
file.close()