In [2]:
import praw
import pandas as pd
import datetime

In [3]:
# initialize with your own id
reddit = praw.Reddit(client_id = 'your_client_id', 
                     client_secret = 'your_client_secret', user_agent = 'your_user_agent')

In [4]:
# function to created datetime from UNIX timestamp
def get_date(created):
    return datetime.datetime.fromtimestamp(created)

In [27]:
subred_list = ["Singapore", "SGExams", "mentalhealthsg", "SingaporeRaw", "nationalservicesg", 
                "askSingapore", "NUS", "NTU", "SMU_Singpore", "SingaporePoly", "singaporefi"]

# attributes of each submission to obtain
fields = ["author", "id", "title", "created_utc", "subreddit", "subreddit_subscribers", "num_comments", 
          "score", "selftext", "upvote_ratio", "url", 'comments']

#keywords from the google sheets
keywords = ['tighter measures', 'phase 2', 'community cases', 'mask', 'asymptomatic', 'fatality rate', 'clinical trial', 
            'contact tracing', 'contactless', 'national emergency', 'transmission', 'screening', 'testing', 'hospital', 
            'healthcare worker']

In [16]:
# setting the start and end date i.e. covid-19 period
start = datetime.datetime(2019,1,1,0,0)
end = datetime.datetime(2021,6,30,23,59)

In [11]:
# turns the subred_list into a long string
subred_string = '+'.join(subred_list)

In [34]:
def scrape_data(subreds, fields, start, end):
    # initialize empty list to store the data of each post
    data = []
    for word in keywords:
        
        # searching all the subreddits for each word in the keywords list
        posts = reddit.subreddit(subreds).search(word, limit = 9999)
        
        # loop through each submission and get the required attributes 
        for post in posts:
            # store the required attributes into a list to append to the data list afterwards
            post_fields = [post.author, post.id, post.title, post.created_utc, post.subreddit, post.subreddit_subscribers, 
                           post.num_comments,post.score, post.selftext, post.upvote_ratio, post.url]
            comments = []
            
            # flatten the CommentForest in the comments attribute of each submission
            post.comments.replace_more(limit = None)
            
            # after flattening, post.comments.list() turns the flattened CommentForest into an iterable for looping through
            # each individual comment
            for comment in post.comments.list():
                comments.append(comment.body)
                
            # add the list of comments into the list of attributes above
            post_fields.append(comments)
            
            # append the attribute list of each submission into the data list 
            data.append(post_fields)
            
    # convert the data list into a dataframe
    df = pd.DataFrame(data, columns = fields)
    
    # change the UNIX timestamp into datetime
    df['created_utc'] = df['created_utc'].apply(get_date)
    
    # filter the data based on the time period i.e. covid-19 period
    return df[(df['created_utc'] >= start) & (df['created_utc'] <= end)]

In [35]:
df = scrape_data(subred_string, fields, start, end)

In [36]:
df

Unnamed: 0,author,id,title,created_utc,subreddit,subreddit_subscribers,num_comments,score,selftext,upvote_ratio,url,comments
0,Durian881,ki0u86,Singapore to bar UK travellers over new corona...,2020-12-22 15:43:54,singapore,378328,60,269,,0.98,https://www.channelnewsasia.com/news/singapore...,"[Finally., [deleted], But there's no news on i..."
1,Durian881,fo41t7,MOH List of all new tighter measures,2020-03-24 20:08:40,singapore,378328,37,37,,0.97,https://www.moh.gov.sg/news-highlights/details...,[>6. Other public venues such as re...
2,salmonsonice,nc27zz,Do you think there will be CB 2.0 ?,2021-05-14 14:43:12,askSingapore,44564,14,6,https://www.channelnewsasia.com/news/singapore...,0.87,https://www.reddit.com/r/askSingapore/comments...,"[Tbh as a teacher, while we hate lessons being..."
3,hun486,no43k2,"Students to sit PSLE, O-level oral exams in pe...",2021-05-30 13:15:39,singapore,378328,31,91,,0.93,https://www.straitstimes.com/singapore/parenti...,[Guess that's why they are rushing to vaccinat...
4,themoorofvenice,nd3md4,S'pore announces tighter Covid-19 measures for...,2021-05-16 01:11:53,singapore,378328,64,92,,0.93,https://www.straitstimes.com/singapore/health/...,[Am I missing something? We dallied for nearly...
...,...,...,...,...,...,...,...,...,...,...,...,...
2315,dlialala,fx2u6i,MOH Press Release Discrepancies,2020-04-08 16:39:13,singapore,378431,5,7,I've been trying to follow the press releases ...,0.61,https://www.reddit.com/r/singapore/comments/fx...,[Could it simply be an administrative error th...
2316,Locastor,id6uky,Anyone sat through an info session for an SGUn...,2020-08-20 17:05:17,SingaporeRaw,13714,0,1,As per:\n\nhttps://www.straitstimes.com/singap...,0.67,https://www.reddit.com/r/SingaporeRaw/comments...,[]
2317,dragonth20,avh0v3,"The price of hawker food, transport, etc. all ...",2019-02-28 03:02:17,singapore,378431,120,33,EDIT: It seems I haven't really gotten my poin...,0.64,https://www.reddit.com/r/singapore/comments/av...,"[I haven't written a serious reply in a while,..."
2318,theunraveler1,f6rhgg,About the Filipino workers in Singapore,2020-02-20 19:01:31,singapore,378431,10,0,"When I was young, the only jobs that I see Fil...",0.38,https://www.reddit.com/r/singapore/comments/f6...,"[did you time travel here from the 90s, Filipi..."


In [37]:
#export dataframe as csv
df.to_csv('reddit-5.csv',index = None, header = True)