Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
# This script connects to reddit's API and retrieves a count of stock $ticker mentions to gauge rising popularity.
import praw
import json
import time
import datetime
from datetime import timezone
import re
import boto3
import os
import pytz
import sys
# Function to find all occurrences of the stock ticker (or $ticker) with case ignored. Returns the number of occurrences
def find_occurrences_of_stock_ticker(arg_ticker, arg_text_to_search):
# Regex that also checks for boundaries (start of sentence, end of sentence, etc.)
reg_ex_count = sum(1 for match in re.finditer(r"\b{}\b".format(arg_ticker), arg_text_to_search, re.IGNORECASE))
# Also check for ticker with a $ in front of it
prefaced_ticker = "$" + arg_ticker
reg_ex_count = reg_ex_count + sum(1 for match in re.finditer(r"\b{}\b".format(prefaced_ticker), arg_text_to_search,
re.IGNORECASE))
return reg_ex_count
# Connection credentials to reddit's API
reddit = praw.Reddit(
client_id=os.environ['REDDIT_API_CLIENT_ID'],
client_secret=os.environ['REDDIT_API_CLIENT_SECRET'],
user_agent=os.environ['REDDIT_API_USER_AGENT']
)
# Instantiating objects
posts_in_last_day = []
text_blob = ''
# Retrieve subreddit name from terminal argument
subreddit_name = str(sys.argv[1])
# Get all posts from subreddit in the last 24 hours (limit is 900, but no 24 period has reached that number)
for post in reddit.subreddit(subreddit_name).new(limit=900):
post_title = post.title
post_creation_epoch_time = post.created - 60 * 60 * 8 # subtracting 8 hours due to timezone
current_epoch_time = int(time.time())
age_of_post_in_hours = (current_epoch_time - post_creation_epoch_time) / 60 / 60
if age_of_post_in_hours < 24:
posts_in_last_day.append(post)
# Define metrics for posts and comments in the last 24 hours
post_count_in_last_day = posts_in_last_day.__len__()
comments_in_last_day = 0
# Retrieve all comments from the acquired posts
for post in posts_in_last_day:
text_blob = text_blob + post.title
post.comments.replace_more(limit=1)
for comment in post.comments.list():
if comment.body:
comments_in_last_day = comments_in_last_day + 1
text_blob = text_blob + comment.body
# The text_blob is an amalgamation of all posts and comments from the last 24 hours
# We're going to parse it and find occurrences of stock names
dictionary = {}
with open("curated_stock_tickers.txt") as f:
for line in f:
line = line.rstrip('\n')
print("Currently counting: " + str(line))
occurrences = find_occurrences_of_stock_ticker(line, text_blob)
if occurrences > 0:
dictionary[line] = occurrences
# Get the current time and format it accordingly
current_time = datetime.datetime.now(timezone.utc)
est = pytz.timezone('US/Eastern')
date_format = "%d %B %I:%M %p"
# Write out the data in .json format for consumption by the frontend
json_data = {"posts": post_count_in_last_day, "comments": comments_in_last_day,
"time": current_time.astimezone(est).strftime(date_format),
"data": (sorted(dictionary.items(), key=lambda x: x[1], reverse=True))}
fp = open(subreddit_name + '_most_mentioned_stocks.json', 'w+')
fp.write(json.dumps(json_data))
fp.close()
# Open connection to AWS S3 bucket
s3 = boto3.resource('s3',
aws_access_key_id=os.environ['S3_KEY'],
aws_secret_access_key=os.environ['S3_SECRET'])
s3_client = boto3.client('s3',
aws_access_key_id=os.environ['S3_KEY'],
aws_secret_access_key=os.environ['S3_SECRET'])
# Upload the .json file to S3. Making it public so anyone can use it.
s3_client.upload_file(subreddit_name + '_most_mentioned_stocks.json', 'wsb-pop-index',
subreddit_name + 'PopIndex.json', ExtraArgs={'ContentType': "application/json",
'ACL': 'public-read'})