## Collect all submissions within a subreddit

In [28]:
import datetime
import praw
import csv

### convert datestring to Epoch unix time

In [4]:
def epoch_converter(day, month, year):
    return int(datetime.datetime(year,month,day,0,0).timestamp())

### config Reddit

In [8]:
reddit = praw.Reddit(user_agent='collect all data in the subreddit',
                     client_id='your client id', client_secret='your client secret')

Version 5.2.0 of praw is outdated. Version 5.3.0 was released Sunday December 17, 2017.


### set the begining and end (1 year for now)

In [31]:
start_date = epoch_converter(day=1,month=1,year=2017)
end_date = epoch_converter(day=1,month=2,year=2017)
print(start_date, end_date)

1483250400 1485928800


### set subreddit name

In [32]:
subreddit = reddit.subreddit('Parenting')

### collecting

In [33]:
total_submissions = []
for s in subreddit.submissions(start_date, end_date):
    total_submissions.append(s)

In [34]:
print(len(total_submissions))

955


### save to csv with a name convention: subredditname_yyyy_mm_dd.csv (starting date)

In [45]:
fields = ['approved_at_utc', 'approved_by', 'archived', 'author', 'author_flair_css_class', 'author_flair_text', 
              'banned_at_utc', 'banned_by', 'brand_safe', 'can_gild', 'can_mod_post', 'clicked', 'comment_limit',
              'comment_sort', 'contest_mode', 'created', 'created_utc', 'distinguished', 'domain','downs', 
              'edited', 'fullname', 'gilded',
 'hidden', 'hide_score', 'id', 'is_crosspostable', 'is_reddit_media_domain', 'is_self',
 'is_video', 'likes', 'link_flair_css_class', 'link_flair_text', 'locked', 'media', 'mod_note',
 'mod_reason_by', 'mod_reason_title', 'mod_reports', 'name', 'num_comments', 'num_crossposts', 'num_reports', 'over_18',
 'parent_whitelist_status', 'permalink', 'pinned', 'quarantine', 'removal_reason', 'report_reasons',
 'save', 'score', 'secure_media', 'secure_media_embed', 'selftext', 'shortlink', 'spoiler',
 'stickied', 'subreddit', 'subreddit_id', 'subreddit_name_prefixed', 'subreddit_type', 'suggested_sort', 'thumbnail', 
 'title', 'unhide', 'unsave', 'ups', 'url', 'user_reports',
 'view_count', 'visited', 'whitelist_status']

In [37]:
content = [fields] # header
for s in total_submissions:
    row = []
    for field in fields:
        row.append(str(s.__getattribute__(field)))
    content.append(row)
    
with open('Parenting_20170101.csv', "w", newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerows(content)
        
print('Parenting_20170101.csv is saved in current directory')    

Parenting_20170101.csv is saved in current directory


### function

In [76]:
def collect_subreddit(subreddit_name, start_date, end_date, filename, client_id, client_secret):
    reddit = praw.Reddit(user_agent='collect all data in the subreddit',
                     client_id=client_id, client_secret=client_secret)
    subreddit = reddit.subreddit(subreddit_name)
    
    fields = ['approved_at_utc', 'approved_by', 'archived', 'author', 'author_flair_css_class', 'author_flair_text', 
              'banned_at_utc', 'banned_by', 'brand_safe', 'can_gild', 'can_mod_post', 'clicked', 'comment_limit',
              'comment_sort', 'contest_mode', 'created', 'created_utc', 'distinguished', 'domain','downs', 
              'edited', 'fullname', 'gilded',
 'hidden', 'hide_score', 'id', 'is_crosspostable', 'is_reddit_media_domain', 'is_self',
 'is_video', 'likes', 'link_flair_css_class', 'link_flair_text', 'locked', 'media', 'mod_note',
 'mod_reason_by', 'mod_reason_title', 'mod_reports', 'name', 'num_comments', 'num_crossposts', 'num_reports', 'over_18',
 'parent_whitelist_status', 'permalink', 'pinned', 'quarantine', 'removal_reason', 'report_reasons',
 'save', 'score', 'secure_media', 'secure_media_embed', 'selftext', 'shortlink', 'spoiler',
 'stickied', 'subreddit', 'subreddit_id', 'subreddit_name_prefixed', 'subreddit_type', 'suggested_sort', 'thumbnail', 
 'title', 'unhide', 'unsave', 'ups', 'url', 'user_reports',
 'view_count', 'visited', 'whitelist_status']
    
    content = [fields]
    
    for s in subreddit.submissions(start_date, end_date):
        row = []
        for field in fields:
            row.append(str(s.__getattribute__(field)))
        content.append(row)
    
    with open(filename, "w", newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerows(content)
        
    print(filename + ' is saved in current directory')    

In [79]:
client_id = 'your client id'
client_secret = 'your client secret'
subreddit_name = 'kiddiekitchen'

for year in range(2005, 2019):
    for month in range(1,12):
        start_date = epoch_converter(day=1,month=month,year=year)
        end_date = epoch_converter(day=1,month=month+1,year=year)

        filename = subreddit_name + '/' + subreddit_name + '_' + str(year) + '-' + str(month) + '.csv'

        collect_subreddit(subreddit_name, start_date, end_date, filename, client_id, client_secret)

kiddiekitchen/kiddiekitchen_2005-1.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2005-2.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2005-3.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2005-4.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2005-5.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2005-6.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2005-7.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2005-8.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2005-9.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2005-10.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2005-11.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2006-1.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2006-2.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2006-3.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2006

kiddiekitchen/kiddiekitchen_2015-10.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2015-11.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2016-1.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2016-2.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2016-3.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2016-4.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2016-5.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2016-6.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2016-7.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2016-8.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2016-9.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2016-10.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2016-11.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2017-1.csv is saved in current directory
kiddiekitchen/kiddiekitchen_20

### for December every year

In [80]:
for year in range(2007, 2019):
    start_date = epoch_converter(day=1,month=12,year=year)
    end_date = epoch_converter(day=1,month=1,year=year+1)

    filename = subreddit_name + '/' + subreddit_name + '_' + str(year) + '-' + str(12) + '.csv'

    collect_subreddit(subreddit_name, start_date, end_date, filename, client_id, client_secret)

kiddiekitchen/kiddiekitchen_2007-12.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2008-12.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2009-12.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2010-12.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2011-12.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2012-12.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2013-12.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2014-12.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2015-12.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2016-12.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2017-12.csv is saved in current directory
kiddiekitchen/kiddiekitchen_2018-12.csv is saved in current directory
