In [5]:
import requests
import json
import time
import pandas as pd

# parameters / variables
sub='mentalhealth'
after = '1577836800' # January 1 2020
before = '1577923199' # 24hs later
redditPostData = [] # stores all post data
redditCommentData = [] # stores all comment data


# returns data on all reddit posts of a subreddit from a start date to an end date, caps at ~100 posts
def getRedditPostData(after, before, sub):
    url = 'https://api.pushshift.io/reddit/search/submission/?size=1000&after='+str(after)+'&before='+str(before)+'&subreddit='+str(sub) # generate api url
    print(url)
    r = requests.get(url) # make api call
    if 'json' in r.headers.get('Content-Type'):
        data = json.loads(r.text) # convert json data to python dictionary object
        return data['data'] # returns data as list
    else:
        return []

# returns reddit post data from a given subreddit and within a given time frame
def getFullPostData(after, before, sub):
    postData = [];
    nextPostData = getRedditPostData(after, before, sub) # get first ~100 posts

    # continuously update start date -> get post data, until all post data is collected
    count = 0
    while len(nextPostData) > 0:
        postData = postData + nextPostData
        print(len(postData))
        after = nextPostData[-1]['created_utc']
        nextPostData = getRedditPostData(after, before, sub)
        count+=1
        if(count % 5 == 0): # throttle amount of api calls
            time.sleep(5)
            continue
        else:
            continue
    
    return postData



# returns data on all reddit comments within a subreddit from a start date to an end date, caps at ~100 comments
def getRedditCommentData(after, before, sub):
    url = 'https://api.pushshift.io/reddit/search/comment/?size=1000&after='+str(after)+'&before='+str(before)+'&subreddit='+str(sub) # generate api url
    print(url)
    r = requests.get(url) # make api call
    if 'json' in r.headers.get('Content-Type'):
        data = json.loads(r.text) # convert json data to python dictionary object
        return data['data'] # returns data as list
    else:
        return []

# returns reddit comment data from a given subreddit and within a given time frame
def getFullCommentData(after, before, sub):
    commentData = []
    nextCommentData = getRedditCommentData(after, before, sub) # get first ~100 comments

    # continuously update start date -> get comment data, until all comment data is collected
    count = 0
    while len(nextCommentData) > 0:
        commentData = commentData + nextCommentData
        print(len(commentData))
        after = nextCommentData[-1]['created_utc']
        nextCommentData = getRedditCommentData(after, before, sub)
        count+=1
        if(count % 5 == 0): # throttle amount of api calls
            time.sleep(5)
            continue
        else:
            continue
    
    return commentData



# returns data on all reddit comments within a subreddit by post ID, caps at ~100 comments
def getRedditCommentDataByPost(link_id, sub):
    url = 'https://api.pushshift.io/reddit/search/comment/?size=1000&link_id='+str(link_id)+'&subreddit='+str(sub) # generate api url
    print(url)
    r = requests.get(url) # make api call
    try:
        data = json.loads(r.text) # convert json data to python dictionary object
        return data['data'] # returns data as list
    except:
        return

# accepts list of post data, returns reddit comment data from posts (max 100 comments per post)
def getFullCommentDataByPost(postData):
    commentData = []
    count = 4
    print(len(postData))
    for post in postData:
        sub = post['subreddit']
        link_id = post['id']
        commentData = commentData + getRedditCommentDataByPost(link_id, sub) # get ~100 comments of post
        print(len(commentData))        
        count+=1
        if(count % 5 == 0): # throttle amount of api calls
            time.sleep(5)
            continue
        else:
            continue
    
    return commentData




In [6]:
counter = 1577857724
today = 1609477200

while counter < today:
    
    redditPostData = getFullPostData(counter, counter+86399, sub)
    df_json = pd.DataFrame(redditPostData)
    df_json.applymap(lambda x: x.encode('unicode_escape').
             decode('utf-8') if isinstance(x, str) else x)
    df_json.to_csv('reddit_post_data_2020.csv', encoding='utf-8', mode='a')
    print("done reddit post")

    redditPostData = getFullCommentData(counter, counter+86399, sub)
    df_json = pd.DataFrame(redditPostData)
    df_json.applymap(lambda x: x.encode('unicode_escape').
             decode('utf-8') if isinstance(x, str) else x)
    df_json.to_csv('reddit_comment_data_2020.csv', encoding='utf-8', mode='a')
    print("done reddit comment")
    
    counter = counter + 86399


https://api.pushshift.io/reddit/search/submission/?size=1000&after=1577857724&before=1577944123&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1577918090&before=1577944123&subreddit=mentalhealth
158
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1577943670&before=1577944123&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1577857724&before=1577944123&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1577882818&before=1577944123&subreddit=mentalhealth
200
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1577896092&before=1577944123&subreddit=mentalhealth
300
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1577909493&before=1577944123&subreddit=mentalhealth
400
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1577925727&before=1577944123&subreddit=mentalhealth
500
https://api.pushsh

100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1578660471&before=1578721714&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1578721714&before=1578808113&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1578721714&before=1578808113&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1578808113&before=1578894512&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1578808113&before=1578894512&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1578831525&before=1578894512&subreddit=mentalhealth
200
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1578849654&before=1578894512&subreddit=mentalhealth
300
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1578861687&before=1578894512

100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1579953017&before=1580017699&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1580017699&before=1580104098&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1580017699&before=1580104098&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1580104098&before=1580190497&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1580164732&before=1580190497&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1580104098&before=1580190497&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1580190497&before=1580276896&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000

done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1582091275&before=1582177674&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1582119898&before=1582177674&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1582177674&before=1582264073&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1582177674&before=1582264073&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1582264073&before=1582350472&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1582316149&before=1582350472&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1582264073&before=1582350472&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000

100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1584266691&before=1584337649&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1584337649&before=1584424048&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1584337649&before=1584424048&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1584424048&before=1584510447&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1584424048&before=1584510447&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1584439614&before=1584510447&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1584510447&before=1584596846&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&af

100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1585996984&before=1586065629&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1586065629&before=1586152028&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1586065629&before=1586152028&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1586077689&before=1586152028&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1586152028&before=1586238427&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1586152028&before=1586238427&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1586238427&before=1586324826&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&af

100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1588077179&before=1588139205&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1588139205&before=1588225604&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1588139205&before=1588225604&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1588225604&before=1588312003&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1588273113&before=1588312003&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1588225604&before=1588312003&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1588312003&before=1588398402&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000

200
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1590072397&before=1590126382&subreddit=mentalhealth
300
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1590084690&before=1590126382&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1590126382&before=1590212781&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1590126382&before=1590212781&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1590212781&before=1590299180&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1590212781&before=1590299180&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1590226173&before=1590299180&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=159029918

done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1591854362&before=1591940761&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1591940761&before=1592027160&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1591992696&before=1592027160&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1591940761&before=1592027160&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1592027160&before=1592113559&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1592027160&before=1592113559&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1592113559&before=1592199958&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/submission/?size=1

done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1594014337&before=1594100736&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1594100736&before=1594187135&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1594100736&before=1594187135&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1594115431&before=1594187135&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1594187135&before=1594273534&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1594187135&before=1594273534&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1594273534&before=1594359933&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/

done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1596174312&before=1596260711&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1596260711&before=1596347110&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1596260711&before=1596347110&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1596275287&before=1596347110&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1596347110&before=1596433509&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1596347110&before=1596433509&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1596433509&before=1596519908&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/

100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1598177225&before=1598247888&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1598247888&before=1598334287&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1598247888&before=1598334287&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1598334287&before=1598420686&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1598334287&before=1598420686&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1598347273&before=1598420686&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1598420686&before=1598507085&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&af

100
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1600368604&before=1600407863&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1600321464&before=1600407863&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1600407863&before=1600494262&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1600407863&before=1600494262&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1600494262&before=1600580661&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1600494262&before=1600580661&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1600508272&before=1600580661&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000

done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1602308641&before=1602395040&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1602395040&before=1602481439&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1602440292&before=1602481439&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1602395040&before=1602481439&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1602481439&before=1602567838&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1602525605&before=1602567838&subreddit=mentalhealth
200
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1602557389&before=1602567838&subreddit=mentalhealth
226
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1602567712&befor

done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1604295818&before=1604382217&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1604295818&before=1604382217&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1604307138&before=1604382217&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1604382217&before=1604468616&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1604382217&before=1604468616&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1604468616&before=1604555015&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1604468616&before=1604555015&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&af

100
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1605796631&before=1605851000&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1605764601&before=1605851000&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1605851000&before=1605937399&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1605851000&before=1605937399&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1605937399&before=1606023798&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1605974040&before=1606023798&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1605937399&before=1606023798&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1

done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1607924576&before=1608010975&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1607924576&before=1608010975&subreddit=mentalhealth
100
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1607930844&before=1608010975&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1608010975&before=1608097374&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1608010975&before=1608097374&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submission/?size=1000&after=1608097374&before=1608183773&subreddit=mentalhealth
done reddit post
https://api.pushshift.io/reddit/search/comment/?size=1000&after=1608097374&before=1608183773&subreddit=mentalhealth
done reddit comment
https://api.pushshift.io/reddit/search/submi