In [1]:
import datetime
import os
import praw
import pandas as pd

from dataclasses import dataclass, asdict
from decouple import config
from tqdm import tqdm


reddit = praw.Reddit(
    client_id=config("REDDIT_KEY"),
    client_secret=config("REDDIT_SECRET"),
    user_agent="NA"
)



## Functions

In [2]:

def extract_attributes_from_subreddit(subreddit):
    return {
        "active_user_count": subreddit.active_user_count,
        "url": subreddit.url,
        "title": subreddit.title,
        "subscribers": subreddit.subscribers,
        "subreddit_type": subreddit.subreddit_type,
        "spoilers_enabled": subreddit.spoilers_enabled,
        "public_description": subreddit.public_description,
        "over18": subreddit.over18,
        "created": subreddit.created,
        "created_utc": subreddit.created_utc,
        "lang": subreddit.lang,
        "videos_allowed": subreddit.allow_videos,
        "images_allowed": subreddit.allow_images
    }


def convert_timestamp(ts):
    datetime_obj = datetime.datetime.fromtimestamp(ts)
    date = datetime.datetime.strftime(datetime_obj,"%m-%d-%Y")
    return date



def get_submissions(subreddit_name: str, category: str, limit: int) -> list:
    '''
    Categories: hot, new or top
    
    >>> get_submissions('MrRobot', 'hot', 10)
    '''
    subreddit = reddit.subreddit(subreddit_name)
    if category == 'hot':
        submission_obj = subreddit.hot(limit=limit)
    elif category == 'new':
        submission_obj = subreddit.new(limit=limit)
    else:
        submission_obj = subreddit.top(limit=limit)
    
    return [sub for sub in submission_obj]



def get_comments(submission: praw.models.reddit.submission.Submission, body: bool=False) -> list:
    if body:
        return [comment.body for comment in submission.comments]
    return list(submission.comments)



In [15]:
mr10h = get_submissions('MrRobot', 'hot', 10)

In [14]:

@dataclass
class Submission:
    subreddit_url: str
    subreddit_name: str
    title: str
    selftext: str
    author: str
    created: int
    
    over_18: bool
    edited: bool
    is_original_content: bool
    locked: bool
    spoiler: bool
    
    num_comments: int
    num_crossposts: int
    num_duplicates: int
    num_reports: int
    num_upvotes: int
    num_downvotes: int
    
    def __post_init__(self):
        self.title = self.title.replace(',', ' ')
        self.selftext = self.selftext.replace(',', ' ')
        self.created = convert_timestamp(self.created)
        
        
@dataclass
class Comment:
    '''
    Comment dataclass
    '''
    
    
    
def submission_factory(subreddit):
    return  Submission(
    subreddit.subreddit.url,
    subreddit.subreddit.url.split('/')[-2],
    subreddit.title,
    subreddit.selftext,
    subreddit.author.name,
    subreddit.created,
    subreddit.over_18,
    subreddit.edited,
    subreddit.is_original_content,
    subreddit.locked,
    subreddit.spoiler,
    subreddit.num_comments,
    subreddit.num_crossposts,
    subreddit.num_duplicates,
    subreddit.num_reports,
    subreddit.ups,
    subreddit.downs
)


In [16]:
# testsub = Submission(
#     mr10h[0].subreddit.url,
#     mr10h[0].subreddit.url.split('/')[-2],
#     mr10h[0].title,
#     mr10h[0].selftext,
#     mr10h[0].author.name,
#     mr10h[0].created,
#     mr10h[0].over_18,
#     mr10h[0].edited,
#     mr10h[0].is_original_content,
#     mr10h[0].locked,
#     mr10h[0].spoiler,
#     mr10h[0].num_comments,
#     mr10h[0].num_crossposts,
#     mr10h[0].num_duplicates,
#     mr10h[0].num_reports,
#     mr10h[0].ups,
#     mr10h[0].downs    
# )


Submission(subreddit_url='/r/MrRobot/', subreddit_name='MrRobot', title='Free Talk Monthly (Month of September 04  2020)', selftext='Comments are sorted by new by default. Feel free to talk about anything. Spoiler tags are not needed for any Mr. Robot talk.', author='AutoModerator', created='09-05-2020', over_18=False, edited=False, is_original_content=False, locked=False, spoiler=False, num_comments=5, num_crossposts=0, num_duplicates=0, num_reports=None, num_upvotes=3, num_downvotes=0)

In [152]:
testcoms = get_comments(mr10h[0])

testcoms[1].replies[0].body

'i think its an lsd friendly show. its so immersive.'

# Top 100 Subreddits

In [4]:
import pandas as pd
import datetime

date_asof = datetime.datetime.strftime(datetime.datetime.now(),"%m-%d-%Y")
date_file_format = datetime.datetime.strftime(datetime.datetime.now(),"%m%d%Y")

top_100 = pd.read_html('https://frontpagemetrics.com/top')

In [20]:
# table from the webiste
top_100_subreddits = top_100[0]

# storing subreddit names to list 
top_100_subreddits = [subreddit.split('/')[-1] for subreddit in top_100_subreddits.Reddit]

In [7]:
# getting subreddit objects
subreddits = [reddit.subreddit(subreddit_name) for subreddit_name in top_100_subreddits]

# extracting attributes from each subreddit
subreddit_data = [extract_attributes_from_subreddit(subreddit) for subreddit in tqdm(subreddits)]


100%|██████████| 100/100 [00:00<00:00, 87967.79it/s]


In [18]:
# store the result in dataframe
subreddits_df = pd.DataFrame(subreddit_data)

# add a few columns
subreddits_df['created_date'] = subreddits_df.created.apply(convert_timestamp)
subreddits_df['name'] = top_100_subreddits
subreddits_df['asof'] = date_asof

# save as csv
subreddits_df.to_csv(f'top100subreddits{date_file_format}.csv', index=False)

# Submission Data

In [22]:
test_subreddit = top_100_subreddits[0]

In [25]:
test_submissions = get_submissions(test_subreddit, 'hot', 10)

In [26]:
test_result = []

for submission in test_submissions:
    test_result.append(submission_factory(submission))
    

In [27]:
test_result

[Submission(subreddit_url='/r/announcements/', subreddit_name='announcements', title='Now you can make posts with multiple images.', selftext='', author='LanterneRougeOG', created='07-16-2020', over_18=False, edited=False, is_original_content=False, locked=False, spoiler=False, num_comments=3907, num_crossposts=72, num_duplicates=57, num_reports=None, num_upvotes=84383, num_downvotes=0),
 Submission(subreddit_url='/r/announcements/', subreddit_name='announcements', title='Upcoming changes to our content policy  our board  and where we’re going from here', selftext="**TL;DR: We’re working with mods to change our content policy to explicitly address hate.** u/kn0thing **has resigned from our board to fill his seat with a Black candidate  a request we will honor. I want to take responsibility for the history of our policies over the years that got us here  and we still have work to do.**\n\nAfter watching people across the country mourn and demand an end to centuries of murder and violent

In [33]:
test_df

Unnamed: 0,subreddit_url,subreddit_name,title,selftext,author,created,over_18,edited,is_original_content,locked,spoiler,num_comments,num_crossposts,num_duplicates,num_reports,num_upvotes,num_downvotes
0,/r/announcements/,announcements,Now you can make posts with multiple images.,,LanterneRougeOG,07-16-2020,False,False,False,False,False,3907,72,57,,84383,0
1,/r/announcements/,announcements,Update to Our Content Policy,[A few weeks ago](https://www.reddit.com/r/ann...,spez,06-30-2020,False,1.59357e+09,False,False,False,40429,82,79,,20676,0
2,/r/announcements/,announcements,Upcoming changes to our content policy our bo...,**TL;DR: We’re working with mods to change our...,spez,06-06-2020,False,False,False,False,False,42011,84,54,,40540,0
3,/r/announcements/,announcements,Changes to Reddit’s Political Ads Policy,As the 2020 election approaches we are updati...,con_commenter,04-14-2020,False,False,False,False,False,99661,18,18,,21065,0
4,/r/announcements/,announcements,Introducing the Solidarity Award — A 100% cont...,It’s been incredible to witness the ways in wh...,plgrmonedge,04-04-2020,False,False,False,False,False,2830,67,31,,19147,0
5,/r/announcements/,announcements,Imposter,If you’ve participated in Reddit’s April Fools...,powerlanguage,04-02-2020,False,False,False,False,False,1548,24,6,,26843,0
6,/r/announcements/,announcements,Introducing Reddit Polls An All-New Post Type,If you’re looking for an opinion on anything —...,LanterneRougeOG,03-25-2020,False,1.58509e+09,False,False,False,3977,42,21,,67825,0
7,/r/announcements/,announcements,Announcing our partnership and AMA with Crisis...,,jkohhey,03-05-2020,False,False,False,True,False,1,0,25,,15685,0
8,/r/announcements/,announcements,Spring forward… into Reddit’s 2019 transparenc...,**TL;DR: Today we published our 2019** [**Tran...,spez,02-25-2020,False,1.58259e+09,False,False,False,16788,54,18,,36558,0
9,/r/announcements/,announcements,Suspected Campaign from Russia on Reddit,,worstnerd,12-07-2019,False,False,False,True,False,3,0,90,,76387,0


In [35]:
    
def get_hot_submissions(category, limit=10):
    
    print('Getting list of subreddits...')
    
    submissions = []
    
    for subreddit in tqdm(top_100_subreddits[:10]):
        submission_objects = get_submissions(subreddit, category, limit)
        submission_data = [submission_factory(submission_obj) for submission_obj in submission_objects]
        submissions += [asdict(submission) for submission in submission_data]
    
    
#     print('Extracting data from subreddits...')
    
#     for submission in submissions:
        
    
#     test_df = pd.DataFrame(
#         [asdict(submission) for submission in test_result]
#     )
    
#     df.to_csv(f'hot_submissions_{date_file_format}', index=False)

    print(submissions)


In [36]:
get_hot_submissions('hot', 1)

Getting list of subreddits...
[Submission(id='hrrh23'), Submission(id='fkoqez'), Submission(id='inh9d5'), Submission(id='in00w0'), Submission(id='hyts0n'), Submission(id='ingamx'), Submission(id='ild5uu'), Submission(id='in93ls'), Submission(id='inc7vo'), Submission(id='in9pi2'), Submission(id='inbhou'), Submission(id='im7k7x'), Submission(id='ine3z5'), Submission(id='fjtbye'), Submission(id='gvcvrc'), Submission(id='gz78ba'), Submission(id='3dasau'), Submission(id='hwnhe2'), Submission(id='inbydt'), Submission(id='i3mmsn'), Submission(id='in9uv3'), Submission(id='ikfj5e'), Submission(id='e3ftbf'), Submission(id='gwpk7v'), Submission(id='ind55t'), Submission(id='i0u9vj'), Submission(id='ije6ob'), Submission(id='gz1ipb'), Submission(id='imzjgx'), Submission(id='ijg653'), Submission(id='i2chtb'), Submission(id='i34zuj'), Submission(id='imiqhp'), Submission(id='ijnnyh'), Submission(id='ijij0d'), Submission(id='ijoipb'), Submission(id='inb83c'), Submission(id='inab5y'), Submission(id='fxky