In [62]:
# Dependencies
import praw
import pandas as pd
from config import client_id, client_secret, user_agent
from tickers import Ticker, scrape_tickers, query_list

In [63]:
# Scrape to get updated tickers and company names
scrape_tickers()

In [64]:
queries = []

# Build raw query list with ticker abbrevation and name of company
for query in query_list:
    ticker = "$" + query.abbrev
    queries.append(ticker)
    #queries.append(query.abbrev)
    queries.append(query.name)

In [65]:
# Create connection to Reddit application
reddit = praw.Reddit(client_id=client_id,      # your client id
                     client_secret=client_secret,  # your client secret
                     user_agent=user_agent,  # user agent name
                     username="",     # your reddit username
                     password="")     # your reddit password

In [67]:
submissions_list = []
comments_list = []

subreddit_list = ['Stocks']

# Loop through list of Subreddits
for subreddit in subreddit_list:
    subreddit = reddit.subreddit(subreddit)
    # Loop through query list to query subreddit with each keyword
    for q in queries[:500]:  # slice to first 500 queries for testing purposes
        # Loop through submission results from keyword query
        # Change time filter as needed - set to hour for testing purposes
        for submission in subreddit.search(q, sort="top", time_filter="hour"):
            submission_obj = {
                "Post_Type": "Submission",
                "Ticker": q,
                "Submission_ID": submission.id,
                "Title": submission.title,
                "Author": submission.author,
                "Body": submission.selftext,
                "Distinguished": submission.distinguished,
                "Num_Comments": submission.num_comments,
                "Name": submission.name,
                "Permalink": submission.permalink,
                "URL": submission.url,
                "Score": submission.score,
                "Upvote_Ratio": submission.upvote_ratio,
                "Created_Date_UTC": [submission.created_utc]
            }
            #print(submission_obj)
            submissions_list.append(submission_obj)
            submission.comments.replace_more(limit=0)
            submission.comments_sort = "top"
            # Limit number of comments return during testing
            submission.comment_limit = 100
            comments = submission.comments.list()
            # Loop through commments on each submission
            for comment in comments:
                comment_obj = {
                    "Post_Type": "Comment",
                    "Ticker": q,
                    "Submission_ID": submission.id,
                    "Comment_ID": comment.id,
                    "Parent_Comment_ID": comment.parent_id,
                    "Author": comment.author,
                    "Body": [comment.body],
                    "Distinguished": comment.distinguished,
                    "Is_Author": comment.is_submitter,
                    "Permalink": comment.permalink,
                    "Score": comment.score,
                    "Created_Date_UTC": [comment.created_utc]
                }
                #print(comment_obj)
                comments_list.append(comment_obj)
# print(submissions_list)
# print(comments_list)

In [68]:
submissions_df = pd.DataFrame([x for x in submissions_list])
submissions_df.head()

Unnamed: 0,Post_Type,Ticker,Submission_ID,Title,Author,Body,Distinguished,Num_Comments,Name,Permalink,URL,Score,Upvote_Ratio,Created_Date_UTC
0,Submission,$A,m0ure2,Is AAPL due for a bounce?,just_lick_my_ass,The monthly chart on AAPL is hideous. It’s be...,,8,t3_m0ure2,/r/stocks/comments/m0ure2/is_aapl_due_for_a_bo...,https://www.reddit.com/r/stocks/comments/m0ure...,3,1.0,[1615250954.0]
1,Submission,$AAPL,m0ure2,Is AAPL due for a bounce?,just_lick_my_ass,The monthly chart on AAPL is hideous. It’s be...,,8,t3_m0ure2,/r/stocks/comments/m0ure2/is_aapl_due_for_a_bo...,https://www.reddit.com/r/stocks/comments/m0ure...,3,1.0,[1615250954.0]
2,Submission,Apple,m0ucxb,Things are looking up,Miladyboi,"First, I would like to start this off by sayin...",,13,t3_m0ucxb,/r/stocks/comments/m0ucxb/things_are_looking_up/,https://www.reddit.com/r/stocks/comments/m0ucx...,6,0.75,[1615249655.0]
3,Submission,$AIR,m0ure2,Is AAPL due for a bounce?,just_lick_my_ass,The monthly chart on AAPL is hideous. It’s be...,,8,t3_m0ure2,/r/stocks/comments/m0ure2/is_aapl_due_for_a_bo...,https://www.reddit.com/r/stocks/comments/m0ure...,4,1.0,[1615250954.0]
4,Submission,Air T,m0ure2,Is AAPL due for a bounce?,just_lick_my_ass,The monthly chart on AAPL is hideous. It’s be...,,8,t3_m0ure2,/r/stocks/comments/m0ure2/is_aapl_due_for_a_bo...,https://www.reddit.com/r/stocks/comments/m0ure...,4,1.0,[1615250954.0]


In [69]:
#submissions_df['Body'].iloc[0]

In [70]:
comments_df = pd.DataFrame([x for x in comments_list])
comments_df.head()

Unnamed: 0,Post_Type,Ticker,Submission_ID,Comment_ID,Parent_Comment_ID,Author,Body,Distinguished,Is_Author,Permalink,Score,Created_Date_UTC
0,Comment,$A,m0ure2,gq9ys0n,t3_m0ure2,upstreamer1,[I'm with you. I have no data to say why it w...,,False,/r/stocks/comments/m0ure2/is_aapl_due_for_a_bo...,1,[1615251195.0]
1,Comment,$A,m0ure2,gq9yt8i,t3_m0ure2,tywebb1000,[AAPL market cap hit $1T right before the pand...,,False,/r/stocks/comments/m0ure2/is_aapl_due_for_a_bo...,1,[1615251214.0]
2,Comment,$A,m0ure2,gq9ytys,t3_m0ure2,fethorLR,[Currently the market is going through a depre...,,False,/r/stocks/comments/m0ure2/is_aapl_due_for_a_bo...,1,[1615251225.0]
3,Comment,$A,m0ure2,gq9yu0z,t3_m0ure2,yepez1011,[Lmao who knows. I bought in around 140 to put...,,False,/r/stocks/comments/m0ure2/is_aapl_due_for_a_bo...,1,[1615251226.0]
4,Comment,$A,m0ure2,gq9yw8t,t3_m0ure2,hondo402,[I saw a chart on the Motley Fool where AAPL’s...,,False,/r/stocks/comments/m0ure2/is_aapl_due_for_a_bo...,1,[1615251260.0]


In [72]:
# Using Vader Sentiment Analysis on Submissions
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA
from pprint import pprint
#nltk.download('vader_lexicon')
sia = SIA()
results = []
submissions_loop = submissions_df[['Body', 'Ticker']]
for index, row in submissions_loop.iterrows():
    body = row[0]
    ticker = row[1]
    pol_score = sia.polarity_scores(body)
    pol_score['body_text'] = body
    pol_score['ticker'] = ticker
    results.append(pol_score)
pprint(results[:3], width=100)

[{'body_text': 'The monthly chart on AAPL is hideous.  It’s been pretty much a nonstop decline. '
               'They’re just coming off an amazing quarter. Leadership hasn’t changed. New '
               'services, new hardware coming (AirTags, AR, car??) and the selling stays '
               'unrelenting. For a growth stock they pay a nice dividend too. \n'
               '\n'
               'I’ll admit today I bought a bunch of June 18 $125 calls. Already down like 3k on '
               'them, which sucks. I just don’t see how we can’t get a small bounce back to $120s '
               'in the near future. What do you guys think?',
  'compound': 0.9237,
  'neg': 0.041,
  'neu': 0.78,
  'pos': 0.179,
  'ticker': '$A'},
 {'body_text': 'The monthly chart on AAPL is hideous.  It’s been pretty much a nonstop decline. '
               'They’re just coming off an amazing quarter. Leadership hasn’t changed. New '
               'services, new hardware coming (AirTags, AR, car??) and the s

In [73]:
submission_sentiment_df = pd.DataFrame.from_records(results)
submission_sentiment_df['label'] = 0
submission_sentiment_df.loc[submission_sentiment_df['compound'] > 0.2, 'label'] = 1
submission_sentiment_df.loc[submission_sentiment_df['compound'] < -0.2, 'label'] = -1
submission_sentiment_df.head()

Unnamed: 0,neg,neu,pos,compound,body_text,ticker,label
0,0.041,0.78,0.179,0.9237,The monthly chart on AAPL is hideous. It’s be...,$A,1
1,0.041,0.78,0.179,0.9237,The monthly chart on AAPL is hideous. It’s be...,$AAPL,1
2,0.037,0.845,0.119,0.9939,"First, I would like to start this off by sayin...",Apple,1
3,0.041,0.78,0.179,0.9237,The monthly chart on AAPL is hideous. It’s be...,$AIR,1
4,0.041,0.78,0.179,0.9237,The monthly chart on AAPL is hideous. It’s be...,Air T,1


In [75]:
submission_sentiment_df['body_text'].iloc[0]

'The monthly chart on AAPL is hideous.  It’s been pretty much a nonstop decline. They’re just coming off an amazing quarter. Leadership hasn’t changed. New services, new hardware coming (AirTags, AR, car??) and the selling stays unrelenting. For a growth stock they pay a nice dividend too. \n\nI’ll admit today I bought a bunch of June 18 $125 calls. Already down like 3k on them, which sucks. I just don’t see how we can’t get a small bounce back to $120s in the near future. What do you guys think?'