In [1]:
# Dependencies
import praw
import pandas as pd
from config import client_id, client_secret, user_agent
from tickers import Ticker, scrape_tickers, query_list

In [2]:
# Scrape to get updated tickers and company names
scrape_tickers()

In [3]:
queries = []

# Build raw query list with ticker abbrevation and name of company
for query in query_list:
    ticker = "$" + query.abbrev
    queries.append(ticker)
    queries.append(query.abbrev)
    queries.append(query.name)

In [4]:
# Create connection to Reddit application
reddit = praw.Reddit(client_id=client_id,      # your client id
                     client_secret=client_secret,  # your client secret
                     user_agent=user_agent,  # user agent name
                     username="",     # your reddit username
                     password="")     # your reddit password

In [5]:
submissions_list = []
comments_list = []

subreddit_list = ['Stocks']

# Loop through list of Subreddits
for subreddit in subreddit_list:
    subreddit = reddit.subreddit(subreddit)
    # Loop through query list to query subreddit with each keyword
    for q in queries[:500]:  # slice to first 500 queries for testing purposes
        # Loop through submission results from keyword query
        # Change time filter as needed - set to hour for testing purposes
        for submission in subreddit.search(q, sort="top", time_filter="hour"):
            submission_obj = {
                "Post_Type": "Submission",
                "Submission_ID": submission.id,
                "Title": submission.title,
                "Author": submission.author,
                "Body": submission.selftext,
                "Distinguished": submission.distinguished,
                "Num_Comments": submission.num_comments,
                "Name": submission.name,
                "Permalink": submission.permalink,
                "URL": submission.url,
                "Score": submission.score,
                "Upvote_Ratio": submission.upvote_ratio,
                "Created_Date_UTC": [submission.created_utc]
            }
            #print(submission_obj)
            submissions_list.append(submission_obj)
            submission.comments.replace_more(limit=0)
            submission.comments_sort = "top"
            # Limit number of comments return during testing
            submission.comment_limit = 100
            comments = submission.comments.list()
            # Loop through commments on each submission
            for comment in comments:
                comment_obj = {
                    "Post_Type": "Comment",
                    "Submission_ID": submission.id,
                    "Comment_ID": comment.id,
                    "Parent_Comment_ID": comment.parent_id,
                    "Author": comment.author,
                    "Body": [comment.body],
                    "Distinguished": comment.distinguished,
                    "Is_Author": comment.is_submitter,
                    "Permalink": comment.permalink,
                    "Score": comment.score,
                    "Created_Date_UTC": [comment.created_utc]
                }
                #print(comment_obj)
                comments_list.append(comment_obj)
# print(submissions_list)
# print(comments_list)

In [17]:
submissions_df = pd.DataFrame([x for x in submissions_list])
submissions_df.head()

Unnamed: 0,Post_Type,Submission_ID,Title,Author,Body,Distinguished,Num_Comments,Name,Permalink,URL,Score,Upvote_Ratio,Created_Date_UTC
0,Submission,lzwmsl,Does the amount of employees mean a good company?,Snurtysnurts,I've been told a few times if a company is on ...,,6,t3_lzwmsl,/r/stocks/comments/lzwmsl/does_the_amount_of_e...,https://www.reddit.com/r/stocks/comments/lzwms...,3,1.0,[1615142148.0]
1,Submission,lzxab0,What to invest in during a bear market?,chelseaboy1234,Now I know we can’t time the market or any of ...,,2,t3_lzxab0,/r/stocks/comments/lzxab0/what_to_invest_in_du...,https://www.reddit.com/r/stocks/comments/lzxab...,1,1.0,[1615144109.0]
2,Submission,lzwmsl,Does the amount of employees mean a good company?,Snurtysnurts,I've been told a few times if a company is on ...,,6,t3_lzwmsl,/r/stocks/comments/lzwmsl/does_the_amount_of_e...,https://www.reddit.com/r/stocks/comments/lzwms...,3,1.0,[1615142148.0]
3,Submission,lzxab0,What to invest in during a bear market?,chelseaboy1234,Now I know we can’t time the market or any of ...,,2,t3_lzxab0,/r/stocks/comments/lzxab0/what_to_invest_in_du...,https://www.reddit.com/r/stocks/comments/lzxab...,1,1.0,[1615144109.0]
4,Submission,lzx5gs,Why Do the same stocks have radically differen...,Eddy_Khadra,"Hi everyone, new trader here.\n\nI started doi...",,2,t3_lzx5gs,/r/stocks/comments/lzx5gs/why_do_the_same_stoc...,https://www.reddit.com/r/stocks/comments/lzx5g...,2,1.0,[1615143719.0]


In [11]:
comments_df = pd.DataFrame([x for x in comments_list])
comments_df.head()

Unnamed: 0,Post_Type,Submission_ID,Comment_ID,Parent_Comment_ID,Author,Body,Distinguished,Is_Author,Permalink,Score,Created_Date_UTC
0,Comment,lzwmsl,gq4f0ce,t3_lzwmsl,jrosenkrantz,[There is no definitive answer to this. Every ...,,False,/r/stocks/comments/lzwmsl/does_the_amount_of_e...,5,[1615142403.0]
1,Comment,lzwmsl,gq4f0qu,t3_lzwmsl,Analyst027,[More employees can indicate larger operations...,,False,/r/stocks/comments/lzwmsl/does_the_amount_of_e...,3,[1615142407.0]
2,Comment,lzwmsl,gq4f3by,t3_lzwmsl,Zealoussideal,[Sometimes but not all of the time.You really ...,,False,/r/stocks/comments/lzwmsl/does_the_amount_of_e...,1,[1615142437.0]
3,Comment,lzwmsl,gq4fewu,t3_lzwmsl,ATMcalls,"[That makes sense logically, but Best Buy has ...",,False,/r/stocks/comments/lzwmsl/does_the_amount_of_e...,1,[1615142567.0]
4,Comment,lzwmsl,gq4hy8x,t3_lzwmsl,DriveNew,[Number of employees don’t mean shit...\nI own...,,False,/r/stocks/comments/lzwmsl/does_the_amount_of_e...,1,[1615143604.0]


In [26]:
# Using Vader Sentiment Analysis on Submissions
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
from pprint import pprint
nltk.download('vader_lexicon')
sia = SIA()
results = []
submissions_body = submissions_df['Body']
for body in submissions_body:
    pol_score = sia.polarity_scores(body)
    pol_score['body_text'] = body
    results.append(pol_score)
pprint(results[:7], width=100)

[{'body_text': "I've been told a few times if a company is on a major hiring spree then it means "
               'upcoming growth and expansion into more money making ventures.\n'
               '\n'
               "I've also been told that the sudden additional costs of high paid employees could "
               "be a companies downfall if they aren't pulling in enough to cover costs. Then "
               "again I guess that's true of anything.\n"
               '\n'
               "What's the overall sentiment of a companies employee amount and hiring a bunch of "
               'new people?',
  'compound': 0.6597,
  'neg': 0.0,
  'neu': 0.927,
  'pos': 0.073},
 {'body_text': 'Now I know we can’t time the market or any of that jazz but, when we aren’t in a '
               'bull run, what type of investments are best?\n'
               '\n'
               'Value stocks?\n'
               'Dividen stocks?\n'
               'Growth stocks? Probably not?\n'
               '\n'
       

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/madisonleopold/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
