# Setting up Reddit's API

In [1]:
import pandas as pd 
import requests 
import time 
import getpass
import praw

In [None]:
client_id = getpass.getpass()
client_secret = getpass.getpass()
user_agent = getpass.getpass()
username = getpass.getpass()
password = getpass.getpass()

In [5]:
auth = requests.auth.HTTPBasicAuth(client_id, client_secret)

data = {
    'grant_type': 'password',
    'username': username,
    'password': password
}

In [6]:
#create an informative header for your application
headers = {'User-Agent': 'dsb318/0.0.1'}

res = requests.post(
    'https://www.reddit.com/api/v1/access_token',
    auth=auth,
    data=data,
    headers=headers)

print(res)

<Response [200]>


In [9]:
# Retreieve access token
token = res.json()['access_token']

In [10]:
headers['Authorization'] = f'bearer {token}'

requests.get('https://oauth.reddit.com/api/v1/me', headers=headers).status_code == 200

True

---
# Getting data from r/musicians 

## Manual Method

In [32]:
# Viewing an individual post
#musicians['data']['children'][24]['data']['selftext']

'Anyone know of any good and good priced DAWs? I only know of FL Studio rn but due to shallow pockets I can’t really get that right now. If there’s any cheaper ones that work well please let me know, it would be greatly appreciated.'

In [53]:
base_url = 'https://oauth.reddit.com/r/' #would modify this url to grab data from the hot / controversial
subreddit = 'musicians'                  # try the praw (a python reddit api wrapper, read documentation)

res = requests.get(base_url+subreddit, headers=headers)

# This is our dictionary for musicians
musicians = res.json()

# Creating a response request loop so that it gets 1000 requests
musicians_submissions = []
target_submissions = 1000
params = {'limit': 100}

while len(musicians_submissions) < target_submissions:
    
    res = requests.get(base_url + subreddit, headers=headers, params=params)
    
    musicians_dict = res.json()
    
    if 'data' not in musicians_dict or 'children' not in musicians_dict['data'] or len(musicians_dict['data']['children']) == 0:
        print("No more submissions available.")
        break
    
    musicians_submissions_data = musicians_dict['data']['children']
    
    for submission in musicians_submissions_data:
        musicians_submissions.append(submission['data']['selftext'])
    
    if len(musicians_submissions_data) < params['limit']:
        print("No more submissions available.")
        break
    
    last_submission_name = musicians_submissions_data[-1]['data']['name']
    
    params['after'] = last_submission_name

print(len(musicians_submissions))


No more submissions available.
990


In [57]:
# A list of all the selftext from musicians
musicians_submissions
# Creating a pandas dataframe for r/musicians, with all the self-text
musicians_dataframe = pd.DataFrame(musicians_submissions, columns = ['selftext'])
# Exporting to csv 
musicians_dataframe.to_csv('../data/musicians_dataframe.csv', index = False)

## Using PRAW (Python Reddit API wrapper)

In [62]:
# Authenticating
reddit = praw.Reddit(client_id=client_id, client_secret=client_secret, user_agent=user_agent)

In [77]:
# hot posts from r/musicians put into a dataframe and exported as csv 

musicians_hot_posts = []
musicians_subreddit = reddit.subreddit('musicians')

for post in musicians_subreddit.hot(limit=1000):
    musicians_hot_posts.append([post.title, post.score, post.id, post.subreddit, post.url, post.num_comments, post.selftext, post.created])
musicians_hot_posts_df = pd.DataFrame(musicians_hot_posts,columns=['title', 'score', 'id', 'subreddit', 'url', 'num_comments', 'body', 'created'])
musicians_hot_posts_df.to_csv('../project-3/data/musicians_hot_posts_df.csv')

In [78]:
# new posts from r/musicians put into a dataframe and exported as csv 
musicians_new_posts = []
#musicians_subreddit = reddit.subreddit('musicians') # already define this above

for post in musicians_subreddit.new(limit=1000):
    musicians_new_posts.append([post.title, post.score, post.id, post.subreddit, post.url, post.num_comments, post.selftext, post.created])
musicians_new_posts_df = pd.DataFrame(musicians_new_posts,columns=['title', 'score', 'id', 'subreddit', 'url', 'num_comments', 'body', 'created'])
musicians_new_posts_df.to_csv('../project-3/data/musicians_new_posts_df.csv')

In [85]:
# top posts from r/musicians put into a dataframe and exported as csv 
musicians_top_posts = []
#musicians_subreddit = reddit.subreddit('musicians') # already define this above

for post in musicians_subreddit.top(limit=1000):
    musicians_top_posts.append([post.title, post.score, post.id, post.subreddit, post.url, post.num_comments, post.selftext, post.created])
musicians_top_posts_df = pd.DataFrame(musicians_top_posts,columns=['title', 'score', 'id', 'subreddit', 'url', 'num_comments', 'body', 'created'])
musicians_top_posts_df.to_csv('../project-3/data/musicians_top_posts_df.csv')


In [86]:
# rising posts from r/musicians put into a dataframe and exported as csv 
musicians_rising_posts = []
#musicians_subreddit = reddit.subreddit('musicians') # already define this above

for post in musicians_subreddit.rising(limit=1000):
    musicians_rising_posts.append([post.title, post.score, post.id, post.subreddit, post.url, post.num_comments, post.selftext, post.created])
musicians_rising_posts_df = pd.DataFrame(musicians_rising_posts,columns=['title', 'score', 'id', 'subreddit', 'url', 'num_comments', 'body', 'created'])
musicians_rising_posts_df.to_csv('../project-3/data/musicians_rising_posts_df.csv')

---
# Getting Data from r/DJs

## Using Manual Method

In [58]:
base_url = 'https://oauth.reddit.com/r/'
subreddit = 'DJs'                  

#res = requests.get(base_url+subreddit, headers=headers)

# This is our dictionary for musicians
djs_dict = res.json()

# Creating a response request loop so that it gets 1000 requests
djs_submissions = []
target_submissions = 1000
params = {'limit': 100}

while len(djs_submissions) < target_submissions:
    
    res = requests.get(base_url+subreddit, headers=headers, params=params)
    
    djs_dict = res.json()
    
    if 'data' not in djs_dict or 'children' not in djs_dict['data'] or len(djs_dict['data']['children']) == 0:
        print("No more submissions available.")
        break
    
    djs_submissions_data = djs_dict['data']['children']
    
    for submission in djs_submissions_data:
        djs_submissions.append(submission['data']['selftext'])
    
    if len(djs_submissions_data) < params['limit']:
        print("No more submissions available.")
        break
    
    last_submission_name = djs_submissions_data[-1]['data']['name']
    
    params['after'] = last_submission_name

print(len(djs_submissions))


No more submissions available.
924


In [59]:
# A list of all the selftext from djs
djs_submissions
# Creating a pandas dataframe for r/djs, with all the self-text
djs_dataframe = pd.DataFrame(djs_submissions, columns = ['selftext'])
# Exporting to csv 
djs_dataframe.to_csv('../data/djs_dataframe.csv', index = False)

## Using PRAW (Python Reddit API Wrapper) 

In [87]:
# hot posts from r/djs put into a dataframe and exported as csv 
djs_hot_posts = []
djs_subreddit = reddit.subreddit('djs')

for post in djs_subreddit.hot(limit=1000):
    djs_hot_posts.append([post.title, post.score, post.id, post.subreddit, post.url, post.num_comments, post.selftext, post.created])
djs_hot_posts_df = pd.DataFrame(djs_hot_posts,columns=['title', 'score', 'id', 'subreddit', 'url', 'num_comments', 'body', 'created'])
djs_hot_posts_df.to_csv('../project-3/data/djs_hot_posts_df.csv')

In [88]:
# new posts from r/djs put into a dataframe and exported as csv 
djs_new_posts = []
#djs_subreddit = reddit.subreddit('djs') # already define this above

for post in djs_subreddit.new(limit=1000):
    djs_new_posts.append([post.title, post.score, post.id, post.subreddit, post.url, post.num_comments, post.selftext, post.created])
djs_new_posts_df = pd.DataFrame(djs_new_posts,columns=['title', 'score', 'id', 'subreddit', 'url', 'num_comments', 'body', 'created'])
djs_new_posts_df.to_csv('../project-3/data/djs_new_posts_df.csv')

In [89]:
# top posts from r/djs put into a dataframe and exported as csv 
djs_top_posts = []
#djs_subreddit = reddit.subreddit('djs') # already define this above

for post in djs_subreddit.top(limit=1000):
    djs_top_posts.append([post.title, post.score, post.id, post.subreddit, post.url, post.num_comments, post.selftext, post.created])
djs_top_posts_df = pd.DataFrame(djs_top_posts,columns=['title', 'score', 'id', 'subreddit', 'url', 'num_comments', 'body', 'created'])
djs_top_posts_df.to_csv('../project-3/data/djs_top_posts_df.csv')

In [90]:
# rising posts from r/djs put into a dataframe and exported as csv 
djs_rising_posts = []
#djs_subreddit = reddit.subreddit('djs') # already define this above

for post in djs_subreddit.rising(limit=1000):
    djs_rising_posts.append([post.title, post.score, post.id, post.subreddit, post.url, post.num_comments, post.selftext, post.created])
djs_rising_posts_df = pd.DataFrame(djs_rising_posts,columns=['title', 'score', 'id', 'subreddit', 'url', 'num_comments', 'body', 'created'])
djs_rising_posts_df.to_csv('../project-3/data/djs_rising_posts_df.csv')