# This script connects to reddit's API and retrieves a count of stock $ticker mentions to gauge rising popularity.
import praw
import json
import time
import datetime
from datetime import timezone
import re
import boto3
import os
import pytz
import sys
# Function to find all occurrences of the stock ticker (or $ticker) with case ignored. Returns the number of occurrences
def find_occurrences_of_stock_ticker(arg_ticker, arg_text_to_search):
# Regex that also checks for boundaries (start of sentence, end of sentence, etc.)
reg_ex_count = sum(1 for match in re.finditer(r"\b{}\b".format(arg_ticker), arg_text_to_search, re.IGNORECASE))
# Also check for ticker with a $ in front of it
prefaced_ticker = "$" + arg_ticker
reg_ex_count = reg_ex_count + sum(1 for match in re.finditer(r"\b{}\b".format(prefaced_ticker), arg_text_to_search,
return reg_ex_count
# Connection credentials to reddit's API
reddit = praw.Reddit(
# Instantiating objects
posts_in_last_day = []
text_blob = ''
# Retrieve subreddit name from terminal argument
subreddit_name = str(sys.argv[1])
# Get all posts from subreddit in the last 24 hours (limit is 900, but no 24 period has reached that number)
for post in reddit.subreddit(subreddit_name).new(limit=900):
post_title = post.title
post_creation_epoch_time = post.created - 60 * 60 * 8 # subtracting 8 hours due to timezone
current_epoch_time = int(time.time())
age_of_post_in_hours = (current_epoch_time - post_creation_epoch_time) / 60 / 60
if age_of_post_in_hours < 24:
# Define metrics for posts and comments in the last 24 hours
post_count_in_last_day = posts_in_last_day.__len__()
comments_in_last_day = 0
# Retrieve all comments from the acquired posts
for post in posts_in_last_day:
text_blob = text_blob + post.title
for comment in post.comments.list():
if comment.body:
comments_in_last_day = comments_in_last_day + 1
text_blob = text_blob + comment.body
# The text_blob is an amalgamation of all posts and comments from the last 24 hours
# We're going to parse it and find occurrences of stock names
dictionary = {}
with open("curated_stock_tickers.txt") as f:
for line in f:
line = line.rstrip('\n')
print("Currently counting: " + str(line))
occurrences = find_occurrences_of_stock_ticker(line, text_blob)
if occurrences > 0:
dictionary[line] = occurrences
# Get the current time and format it accordingly
current_time =
est = pytz.timezone('US/Eastern')
date_format = "%d %B %I:%M %p"
# Write out the data in .json format for consumption by the frontend
json_data = {"posts": post_count_in_last_day, "comments": comments_in_last_day,
"time": current_time.astimezone(est).strftime(date_format),
"data": (sorted(dictionary.items(), key=lambda x: x[1], reverse=True))}
fp = open(subreddit_name + '_most_mentioned_stocks.json', 'w+')
# Open connection to AWS S3 bucket
s3 = boto3.resource('s3',
s3_client = boto3.client('s3',
# Upload the .json file to S3. Making it public so anyone can use it.
s3_client.upload_file(subreddit_name + '_most_mentioned_stocks.json', 'wsb-pop-index',
subreddit_name + 'PopIndex.json', ExtraArgs={'ContentType': "application/json",
'ACL': 'public-read'})