## Sentiment to Strategy: Leveraging Forums Discussions to Guide Automated Trading Decisions 

---

### 0. Importing libraries

In [74]:
# Import necessary libraries
from pprint import pprint
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import os
import praw
import prawcore
import csv
import time
from datetime import datetime, date, timezone
from dateutil.rrule import rrule, DAILY
from tqdm import tqdm
from transformers import pipeline

---

### 1. Reddit API Data Extraction

In [75]:
# Initialize PRAW (Python Reddit API Wrapper)
user_agent = "StockMarket Sentiment Analysis"
reddit = praw.Reddit(
    client_id="SDD6BO5WvswKQ2DOZrioiQ",
    client_secret="BoGTdq9xg1Wu4kjy3zj_WBLbTInQvQ",
    user_agent=user_agent
)

# Initialize the subreddit
subreddit = reddit.subreddit("wallstreetbets")

In [87]:
# List to hold data for each post
posts = []

# Fetch posts from the subreddit
for submission in subreddit.new(limit=None):
    post_date = datetime.fromtimestamp(submission.created_utc)
    posts.append({
        'id': submission.id,
        'title': submission.title,
        'selftext': submission.selftext,
        'score': submission.score,
        'upvote_ratio': submission.upvote_ratio,       
        'created_date': post_date,
        'permalink': f"https://redd.it/{submission.id}"
    })

# Convert the list to a DataFrame
raw_df = pd.DataFrame(posts)
raw_df

Unnamed: 0,id,title,selftext,score,upvote_ratio,created_date,permalink
0,1kqpv7f,I like rocks. I like Medicare fraud. Is this r...,,2,1.00,2025-05-20 00:59:34,https://redd.it/1kqpv7f
1,1kqprov,$7M into LCID across 3 accounts. Betting Tesla...,This is real. $7M into LCID across 3 different...,8,0.90,2025-05-20 00:54:53,https://redd.it/1kqprov
2,1kqpena,TMC os the next Stock?,"22,5% today. Do you really have more chances ...",0,0.50,2025-05-20 00:37:53,https://redd.it/1kqpena
3,1kqp5u4,My first 10x,I know I might have been able to run this more...,28,1.00,2025-05-20 00:27:00,https://redd.it/1kqp5u4
4,1kqp08k,TMC,There was once a man on here who had a brillia...,3,1.00,2025-05-20 00:20:12,https://redd.it/1kqp08k
...,...,...,...,...,...,...,...
871,1kadh5l,US Boosts Net Quarterly Borrowing Estimate to ...,Bullish,528,0.98,2025-04-29 04:34:20,https://redd.it/1kadh5l
872,1ka9zk6,"First I get dicked, then this",idk even know what to type. Carvana is a manip...,123,0.89,2025-04-29 01:37:13,https://redd.it/1ka9zk6
873,1ka9vkm,"Turned $800 into $4,400 in 100 minutes because...","At 1:36PM (market time) today, I had two choic...",1360,0.96,2025-04-29 01:31:56,https://redd.it/1ka9vkm
874,1ka8xee,Is anyone else here bagholding puts?,Wondering if there are other idiots like me wh...,612,0.96,2025-04-29 00:47:42,https://redd.it/1ka8xee


In [88]:
# Create a csv file with the data
if os.path.exists('wsb_posts.csv'):
    existing_df = pd.read_csv('wsb_posts.csv')
    combined_df = pd.concat([existing_df, raw_df]).drop_duplicates(subset=['id'])
    combined_df.to_csv('wsb_posts.csv', index=False)
else:
    raw_df.to_csv('wsb_posts.csv', index=False)

In [89]:
# Load the data from the csv file
df = pd.read_csv('wsb_posts.csv')
df

Unnamed: 0,id,title,selftext,score,upvote_ratio,created_date,permalink,text
0,1kqotlw,Whats your opinion about Rocket Lab?,I see many ppl hyping rocket lab and also boug...,1,1.00,2025-05-20 00:12:16,https://redd.it/1kqotlw,Whats your opinion about Rocket Lab? I see man...
1,1kqoadm,Fed's Powell has sounded the alarm for years a...,,125,0.94,2025-05-19 23:49:38,https://redd.it/1kqoadm,Fed's Powell has sounded the alarm for years a...
2,1kqo5u3,Big beautiful bill,Am I screwed with my sp500 ETFs as a French in...,0,0.25,2025-05-19 23:44:20,https://redd.it/1kqo5u3,Big beautiful bill Am I screwed with my sp500 ...
3,1kqo1xz,Question on UNH Stock,"Should I sell UNH calls, covered calls or hold...",11,0.87,2025-05-19 23:39:50,https://redd.it/1kqo1xz,"Question on UNH Stock Should I sell UNH calls,..."
4,1kqmxov,Is this gonna print tomorrow,"Held these over the weekend, at its peak today...",0,0.44,2025-05-19 22:54:47,https://redd.it/1kqmxov,Is this gonna print tomorrow Held these over t...
...,...,...,...,...,...,...,...,...
872,1kqprov,$7M into LCID across 3 accounts. Betting Tesla...,This is real. $7M into LCID across 3 different...,8,0.90,2025-05-20 00:54:53,https://redd.it/1kqprov,
873,1kqpena,TMC os the next Stock?,"22,5% today. Do you really have more chances ...",0,0.50,2025-05-20 00:37:53,https://redd.it/1kqpena,
874,1kqp5u4,My first 10x,I know I might have been able to run this more...,28,1.00,2025-05-20 00:27:00,https://redd.it/1kqp5u4,
875,1kqp08k,TMC,There was once a man on here who had a brillia...,3,1.00,2025-05-20 00:20:12,https://redd.it/1kqp08k,


---

### 2. Sentiment Analysis

In [77]:
# URL with the S&P 500 companies list on Wikipedia
stocks_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

# Read the HTML table from the page
tables = pd.read_html(stocks_url)
sp500_df = tables[0]

# Extract the ticker symbols (the column is usually named "Symbol")
tickers = sp500_df['Symbol'].tolist()

# Extract the names of the companies
companies = sp500_df['Security'].tolist()

In [78]:
long_blacklist = ['I', 'ARE',  'ON', 'GO', 'NOW', 'CAN', 'UK', 'SO', 'OR', 'OUT', 'SEE', 'ONE', 'LOVE', 'U', 'STAY', 'HAS', 'BY', 'BIG', 'GOOD', 'RIDE', 'EOD', 'ELON', 'WSB',
            'THE', 'A', 'ROPE', 'YOLO', 'TOS', 'CEO', 'DD', 'IT', 'OPEN', 'ATH', 'PM', 'IRS', 'FOR','DEC', 'BE', 'IMO', 'ALL', 'RH', 'EV', 'TOS', 'CFO', 'CTO','DD', 'BTFD',
            'WSB', 'OK', 'PDT', 'RH', 'KYS', 'FD', 'TYS', 'US', 'USA', 'IT', 'ATH', 'RIP', 'BMW', 'GDP', 'OTM', 'ATM', 'ITM', 'IMO', 'LOL', 'AM', 'BE', 'PR', 'PRAY', 'PT', 
            'FBI', 'SEC', 'GOD', 'NOT', 'POS', 'FOMO', 'TL;DR', 'EDIT', 'STILL', 'WTF', 'RAW', 'PM', 'LMAO','LMFAO', 'ROFL', 'EZ', 'RED', 'BEZOS', 'TICK', 'IS', 'PM', 'LPT',
            'GOAT', 'FL', 'CA', 'IL', 'MACD', 'HQ', 'OP', 'PS', 'AH', 'TL', 'JAN', 'FEB', 'JUL', 'AUG', 'SEP', 'SEPT', 'OCT', 'NOV', 'FDA', 'IV', 'ER', 'IPO', 'MILF', 'BUT', 
            'SSN', 'FIFA', 'USD', 'CPU', 'AT', 'GG']

blacklist = ['A', 'ARE', 'ALL', 'DD', 'IT', 'HAS', 'ON', 'PM', 'NOW', 'SO']

In [79]:
# Create a regex pattern from the S&P 500 tickers list, using word boundaries to avoid false matches
pattern = r'\b(?:' + '|'.join(map(re.escape, tickers)) + r')\b'

# Combine title and selftext into one column to search both
raw_df['text'] = raw_df['title'].fillna('') + " " + raw_df['selftext'].fillna('')

# Filter the DataFrame to include only posts that mention any S&P 500 stock ticker
filtered_reddit = raw_df[raw_df['text'].str.contains(pattern, flags=re.IGNORECASE, regex=True)].copy()
filtered_reddit


Unnamed: 0,id,title,selftext,score,upvote_ratio,created_date,permalink,text
0,1kqotlw,Whats your opinion about Rocket Lab?,I see many ppl hyping rocket lab and also boug...,1,1.00,2025-05-20 00:12:16,https://redd.it/1kqotlw,Whats your opinion about Rocket Lab? I see man...
1,1kqoadm,Fed's Powell has sounded the alarm for years a...,,125,0.94,2025-05-19 23:49:38,https://redd.it/1kqoadm,Fed's Powell has sounded the alarm for years a...
2,1kqo5u3,Big beautiful bill,Am I screwed with my sp500 ETFs as a French in...,0,0.25,2025-05-19 23:44:20,https://redd.it/1kqo5u3,Big beautiful bill Am I screwed with my sp500 ...
3,1kqo1xz,Question on UNH Stock,"Should I sell UNH calls, covered calls or hold...",11,0.87,2025-05-19 23:39:50,https://redd.it/1kqo1xz,"Question on UNH Stock Should I sell UNH calls,..."
4,1kqmxov,Is this gonna print tomorrow,"Held these over the weekend, at its peak today...",0,0.44,2025-05-19 22:54:47,https://redd.it/1kqmxov,Is this gonna print tomorrow Held these over t...
...,...,...,...,...,...,...,...,...
864,1kaezyl,JetBlue Scraps Alliance with American Airlines...,"See below for article, what is the future of J...",2,0.58,2025-04-29 05:58:42,https://redd.it/1kaezyl,JetBlue Scraps Alliance with American Airlines...
866,1ka9zk6,"First I get dicked, then this",idk even know what to type. Carvana is a manip...,125,0.89,2025-04-29 01:37:13,https://redd.it/1ka9zk6,"First I get dicked, then this idk even know wh..."
867,1ka9vkm,"Turned $800 into $4,400 in 100 minutes because...","At 1:36PM (market time) today, I had two choic...",1364,0.96,2025-04-29 01:31:56,https://redd.it/1ka9vkm,"Turned $800 into $4,400 in 100 minutes because..."
868,1ka8xee,Is anyone else here bagholding puts?,Wondering if there are other idiots like me wh...,607,0.96,2025-04-29 00:47:42,https://redd.it/1ka8xee,Is anyone else here bagholding puts? Wondering...


In [81]:
# Find matching tickers in the text
def matching_tickers(text):
    matching_tickers = [ticker for ticker in tickers if ticker in text]
    matching_tickers += [company for company in companies if company in text]
    return matching_tickers

filtered_reddit['tickers'] = filtered_reddit['text'].apply(matching_tickers)
filtered_reddit

Unnamed: 0,id,title,selftext,score,upvote_ratio,created_date,permalink,text,tickers
0,1kqotlw,Whats your opinion about Rocket Lab?,I see many ppl hyping rocket lab and also boug...,1,1.00,2025-05-20 00:12:16,https://redd.it/1kqotlw,Whats your opinion about Rocket Lab? I see man...,[L]
1,1kqoadm,Fed's Powell has sounded the alarm for years a...,,125,0.94,2025-05-19 23:49:38,https://redd.it/1kqoadm,Fed's Powell has sounded the alarm for years a...,[F]
2,1kqo5u3,Big beautiful bill,Am I screwed with my sp500 ETFs as a French in...,0,0.25,2025-05-19 23:44:20,https://redd.it/1kqo5u3,Big beautiful bill Am I screwed with my sp500 ...,"[A, T, F]"
3,1kqo1xz,Question on UNH Stock,"Should I sell UNH calls, covered calls or hold...",11,0.87,2025-05-19 23:39:50,https://redd.it/1kqo1xz,"Question on UNH Stock Should I sell UNH calls,...","[T, UNH]"
4,1kqmxov,Is this gonna print tomorrow,"Held these over the weekend, at its peak today...",0,0.44,2025-05-19 22:54:47,https://redd.it/1kqmxov,Is this gonna print tomorrow Held these over t...,"[T, L, O, Nvidia]"
...,...,...,...,...,...,...,...,...,...
864,1kaezyl,JetBlue Scraps Alliance with American Airlines...,"See below for article, what is the future of J...",2,0.58,2025-04-29 05:58:42,https://redd.it/1kaezyl,JetBlue Scraps Alliance with American Airlines...,"[A, F, J, L]"
866,1ka9zk6,"First I get dicked, then this",idk even know what to type. Carvana is a manip...,125,0.89,2025-04-29 01:37:13,https://redd.it/1ka9zk6,"First I get dicked, then this idk even know wh...","[T, C, D, F, O]"
867,1ka9vkm,"Turned $800 into $4,400 in 100 minutes because...","At 1:36PM (market time) today, I had two choic...",1364,0.96,2025-04-29 01:31:56,https://redd.it/1ka9vkm,"Turned $800 into $4,400 in 100 minutes because...","[A, T, C, D, F, L, PM, O, V]"
868,1ka8xee,Is anyone else here bagholding puts?,Wondering if there are other idiots like me wh...,607,0.96,2025-04-29 00:47:42,https://redd.it/1ka8xee,Is anyone else here bagholding puts? Wondering...,"[C, D, O]"


---

### 3. Sentiment Analysis

In [83]:
# Create sentiment analysis pipeline
sent_pipe = pipeline(
    "sentiment-analysis",
    model="ProsusAI/finBERT",
    top_k=None,            # get full softmax
    truncation=True
)

def finbert_score(text):
    outs = sent_pipe(text)[0] 
    print(outs)        # returns list of dicts
    score = (outs[2]["score"] - outs[1]["score"])  
    print(score)      # positive (outs[2]) - negative (outs[0])
    # neutral (outs[1]) implicitly pulls score toward 0
    return score

filtered_reddit["sentiment"] = filtered_reddit["text"].apply(finbert_score)
filtered_reddit

Device set to use cuda:0


[{'label': 'neutral', 'score': 0.9321987628936768}, {'label': 'positive', 'score': 0.04927988722920418}, {'label': 'negative', 'score': 0.018521295860409737}]
-0.03075859136879444
[{'label': 'negative', 'score': 0.9248008728027344}, {'label': 'neutral', 'score': 0.042889587581157684}, {'label': 'positive', 'score': 0.03230958804488182}]
-0.010579999536275864
[{'label': 'neutral', 'score': 0.7739803194999695}, {'label': 'positive', 'score': 0.18094052374362946}, {'label': 'negative', 'score': 0.04507911577820778}]
-0.13586140796542168
[{'label': 'neutral', 'score': 0.9343388080596924}, {'label': 'positive', 'score': 0.04255009815096855}, {'label': 'negative', 'score': 0.023111045360565186}]
-0.019439052790403366
[{'label': 'neutral', 'score': 0.8726502656936646}, {'label': 'positive', 'score': 0.09138496220111847}, {'label': 'negative', 'score': 0.03596477583050728}]
-0.05542018637061119
[{'label': 'neutral', 'score': 0.7909849286079407}, {'label': 'negative', 'score': 0.109397895634174

Unnamed: 0,id,title,selftext,score,upvote_ratio,created_date,permalink,text,tickers,sentiment
0,1kqotlw,Whats your opinion about Rocket Lab?,I see many ppl hyping rocket lab and also boug...,1,1.00,2025-05-20 00:12:16,https://redd.it/1kqotlw,Whats your opinion about Rocket Lab? I see man...,[L],-0.030759
1,1kqoadm,Fed's Powell has sounded the alarm for years a...,,125,0.94,2025-05-19 23:49:38,https://redd.it/1kqoadm,Fed's Powell has sounded the alarm for years a...,[F],-0.010580
2,1kqo5u3,Big beautiful bill,Am I screwed with my sp500 ETFs as a French in...,0,0.25,2025-05-19 23:44:20,https://redd.it/1kqo5u3,Big beautiful bill Am I screwed with my sp500 ...,"[A, T, F]",-0.135861
3,1kqo1xz,Question on UNH Stock,"Should I sell UNH calls, covered calls or hold...",11,0.87,2025-05-19 23:39:50,https://redd.it/1kqo1xz,"Question on UNH Stock Should I sell UNH calls,...","[T, UNH]",-0.019439
4,1kqmxov,Is this gonna print tomorrow,"Held these over the weekend, at its peak today...",0,0.44,2025-05-19 22:54:47,https://redd.it/1kqmxov,Is this gonna print tomorrow Held these over t...,"[T, L, O, Nvidia]",-0.055420
...,...,...,...,...,...,...,...,...,...,...
864,1kaezyl,JetBlue Scraps Alliance with American Airlines...,"See below for article, what is the future of J...",2,0.58,2025-04-29 05:58:42,https://redd.it/1kaezyl,JetBlue Scraps Alliance with American Airlines...,"[A, F, J, L]",-0.057829
866,1ka9zk6,"First I get dicked, then this",idk even know what to type. Carvana is a manip...,125,0.89,2025-04-29 01:37:13,https://redd.it/1ka9zk6,"First I get dicked, then this idk even know wh...","[T, C, D, F, O]",-0.124690
867,1ka9vkm,"Turned $800 into $4,400 in 100 minutes because...","At 1:36PM (market time) today, I had two choic...",1364,0.96,2025-04-29 01:31:56,https://redd.it/1ka9vkm,"Turned $800 into $4,400 in 100 minutes because...","[A, T, C, D, F, L, PM, O, V]",-0.084324
868,1ka8xee,Is anyone else here bagholding puts?,Wondering if there are other idiots like me wh...,607,0.96,2025-04-29 00:47:42,https://redd.it/1ka8xee,Is anyone else here bagholding puts? Wondering...,"[C, D, O]",-0.325088


In [None]:
# filtered_reddit["sentiment"].describe()
# # Plot the distribution of sentiment scores
# plt.figure(figsize=(10, 6))
# sns.histplot(filtered_reddit["sentiment"], bins=30, kde=True)
# plt.title("Distribution of Sentiment Scores")
# plt.xlabel("Sentiment Score")
# plt.ylabel("Frequency")
# plt.show()

---

### 4. Stock Market Comparison

---

### 5. Visualization 