## Sentiment to Strategy: Leveraging Forums Discussions to Guide Automated Trading Decisions 

---

### 0. Importing libraries

In [1]:
# Import necessary libraries
from pprint import pprint
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import os
import praw
import prawcore
import csv
import time
from datetime import datetime, date, timezone
from dateutil.rrule import rrule, DAILY
from tqdm import tqdm
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


---

### 1. Reddit API Data Extraction

In [2]:
# Initialize PRAW (Python Reddit API Wrapper)
user_agent = "StockMarket Sentiment Analysis"
reddit = praw.Reddit(
    client_id="SDD6BO5WvswKQ2DOZrioiQ",
    client_secret="BoGTdq9xg1Wu4kjy3zj_WBLbTInQvQ",
    user_agent=user_agent
)

# Initialize the subreddit
subreddit = reddit.subreddit("wallstreetbets")

In [3]:
# List to hold data for each post
posts = []

# Fetch posts from the subreddit
for submission in subreddit.new(limit=None):
    post_date = datetime.fromtimestamp(submission.created_utc)
    posts.append({
        'id': submission.id,
        'title': submission.title,
        'selftext': submission.selftext,
        'score': submission.score,
        'upvote_ratio': submission.upvote_ratio,       
        'created_date': post_date,
        'permalink': f"https://redd.it/{submission.id}"
    })

# Convert the list to a DataFrame
raw_df = pd.DataFrame(posts)
raw_df

Unnamed: 0,id,title,selftext,score,upvote_ratio,created_date,permalink
0,1ks8j5o,What are the best times during the day to plac...,I feel like I’m just winging it and getting lu...,3,1.00,2025-05-21 22:40:59,https://redd.it/1ks8j5o
1,1ks8gzw,Palantir direction thoughts…,Thoughts on which direction itll go tomorrow?!...,0,0.50,2025-05-21 22:38:27,https://redd.it/1ks8gzw
2,1ks8grh,Reddit drops 10% after Baird cuts stock price ...,No paywall: [https://www.investing.com/news/an...,5,0.78,2025-05-21 22:38:13,https://redd.it/1ks8grh
3,1ks8cbo,Duolingo to Zero $DUOL,Duolingo ($DUOL) had good earnings and guidanc...,10,1.00,2025-05-21 22:33:13,https://redd.it/1ks8cbo
4,1ks89rk,Decide to stop holding out and sell to see a p...,Due: May 23rd\n\n Strategy: raw guts and delus...,19,1.00,2025-05-21 22:30:23,https://redd.it/1ks89rk
...,...,...,...,...,...,...,...
864,1kbsfmc,GM recalls nearly 600k trucks and SUVs equippe...,,902,0.99,2025-04-30 23:50:12,https://redd.it/1kbsfmc
865,1kbs5yg,Tomorrow I wake up in Lambo or behind Wendy’s ...,,275,0.92,2025-04-30 23:38:49,https://redd.it/1kbs5yg
866,1kbr19g,META soft(half chub) YOLO. Good luck tomorrow (:,Sold 5 longs off a minute before closing incas...,17,0.87,2025-04-30 22:50:46,https://redd.it/1kbr19g
867,1kbqrtm,Why short US 100 instead of regional banks?,I'm wondering why so many people are talking a...,17,0.80,2025-04-30 22:39:35,https://redd.it/1kbqrtm


In [4]:
# Create a csv file with the data
if os.path.exists('wsb_posts.csv'):
    existing_df = pd.read_csv('wsb_posts.csv')
    combined_df = pd.concat([existing_df, raw_df]).drop_duplicates(subset=['id'])
    combined_df.to_csv('wsb_posts.csv', index=False)
else:
    raw_df.to_csv('wsb_posts.csv', index=False)

In [5]:
# Load the data from the csv file
df = pd.read_csv('wsb_posts.csv')
df

Unnamed: 0,id,title,selftext,score,upvote_ratio,created_date,permalink,text
0,1kqotlw,Whats your opinion about Rocket Lab?,I see many ppl hyping rocket lab and also boug...,1,1.00,2025-05-20 00:12:16,https://redd.it/1kqotlw,Whats your opinion about Rocket Lab? I see man...
1,1kqoadm,Fed's Powell has sounded the alarm for years a...,,125,0.94,2025-05-19 23:49:38,https://redd.it/1kqoadm,Fed's Powell has sounded the alarm for years a...
2,1kqo5u3,Big beautiful bill,Am I screwed with my sp500 ETFs as a French in...,0,0.25,2025-05-19 23:44:20,https://redd.it/1kqo5u3,Big beautiful bill Am I screwed with my sp500 ...
3,1kqo1xz,Question on UNH Stock,"Should I sell UNH calls, covered calls or hold...",11,0.87,2025-05-19 23:39:50,https://redd.it/1kqo1xz,"Question on UNH Stock Should I sell UNH calls,..."
4,1kqmxov,Is this gonna print tomorrow,"Held these over the weekend, at its peak today...",0,0.44,2025-05-19 22:54:47,https://redd.it/1kqmxov,Is this gonna print tomorrow Held these over t...
...,...,...,...,...,...,...,...,...
968,1krixsc,I hate GOOG,"This stock has the same curse as AMD, never pa...",171,0.87,2025-05-21 01:04:52,https://redd.it/1krixsc,
969,1krh6rp,Wolfspeed Prepares to File for Bankruptcy With...,,482,0.95,2025-05-20 23:47:12,https://redd.it/1krh6rp,
970,1krh2cq,Also holding INOD calls $42 06/23/25,I know they’re down today but they’ve been goi...,5,0.67,2025-05-20 23:42:00,https://redd.it/1krh2cq,
971,1krh0vi,Warby Parker pops 16% on $150 million Google s...,No paywall: [https://www.cnbc.com/2025/05/20/w...,128,0.92,2025-05-20 23:40:09,https://redd.it/1krh0vi,


---

### 2. Sentiment Analysis

In [6]:
# URL with the S&P 500 companies list on Wikipedia
stocks_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

# Read the HTML table from the page
tables = pd.read_html(stocks_url)
sp500_df = tables[0]

# Extract the ticker symbols (the column is usually named "Symbol")
tickers = sp500_df['Symbol'].tolist()

# Extract the names of the companies
companies = sp500_df['Security'].tolist()

In [7]:
long_blacklist = ['I', 'ARE',  'ON', 'GO', 'NOW', 'CAN', 'UK', 'SO', 'OR', 'OUT', 'SEE', 'ONE', 'LOVE', 'U', 'STAY', 'HAS', 'BY', 'BIG', 'GOOD', 'RIDE', 'EOD', 'ELON', 'WSB',
            'THE', 'A', 'ROPE', 'YOLO', 'TOS', 'CEO', 'DD', 'IT', 'OPEN', 'ATH', 'PM', 'IRS', 'FOR','DEC', 'BE', 'IMO', 'ALL', 'RH', 'EV', 'TOS', 'CFO', 'CTO','DD', 'BTFD',
            'WSB', 'OK', 'PDT', 'RH', 'KYS', 'FD', 'TYS', 'US', 'USA', 'IT', 'ATH', 'RIP', 'BMW', 'GDP', 'OTM', 'ATM', 'ITM', 'IMO', 'LOL', 'AM', 'BE', 'PR', 'PRAY', 'PT', 
            'FBI', 'SEC', 'GOD', 'NOT', 'POS', 'FOMO', 'TL;DR', 'EDIT', 'STILL', 'WTF', 'RAW', 'PM', 'LMAO','LMFAO', 'ROFL', 'EZ', 'RED', 'BEZOS', 'TICK', 'IS', 'PM', 'LPT',
            'GOAT', 'FL', 'CA', 'IL', 'MACD', 'HQ', 'OP', 'PS', 'AH', 'TL', 'JAN', 'FEB', 'JUL', 'AUG', 'SEP', 'SEPT', 'OCT', 'NOV', 'FDA', 'IV', 'ER', 'IPO', 'MILF', 'BUT', 
            'SSN', 'FIFA', 'USD', 'CPU', 'AT', 'GG']

blacklist = ['A', 'ARE', 'ALL', 'DD', 'IT', 'HAS', 'ON', 'PM', 'NOW', 'SO']

In [8]:
# Create a regex pattern from the S&P 500 tickers list, using word boundaries to avoid false matches
pattern = r'\b(?:' + '|'.join(map(re.escape, tickers)) + r')\b'

# Combine title and selftext into one column to search both
raw_df['text'] = raw_df['title'].fillna('') + " " + raw_df['selftext'].fillna('')

# Filter the DataFrame to include only posts that mention any S&P 500 stock ticker
filtered_reddit = raw_df[raw_df['text'].str.contains(pattern, flags=re.IGNORECASE, regex=True)].copy()
filtered_reddit


Unnamed: 0,id,title,selftext,score,upvote_ratio,created_date,permalink,text
0,1ks8j5o,What are the best times during the day to plac...,I feel like I’m just winging it and getting lu...,3,1.00,2025-05-21 22:40:59,https://redd.it/1ks8j5o,What are the best times during the day to plac...
1,1ks8gzw,Palantir direction thoughts…,Thoughts on which direction itll go tomorrow?!...,0,0.50,2025-05-21 22:38:27,https://redd.it/1ks8gzw,Palantir direction thoughts… Thoughts on which...
2,1ks8grh,Reddit drops 10% after Baird cuts stock price ...,No paywall: [https://www.investing.com/news/an...,5,0.78,2025-05-21 22:38:13,https://redd.it/1ks8grh,Reddit drops 10% after Baird cuts stock price ...
3,1ks8cbo,Duolingo to Zero $DUOL,Duolingo ($DUOL) had good earnings and guidanc...,10,1.00,2025-05-21 22:33:13,https://redd.it/1ks8cbo,Duolingo to Zero $DUOL Duolingo ($DUOL) had go...
4,1ks89rk,Decide to stop holding out and sell to see a p...,Due: May 23rd\n\n Strategy: raw guts and delus...,19,1.00,2025-05-21 22:30:23,https://redd.it/1ks89rk,Decide to stop holding out and sell to see a p...
...,...,...,...,...,...,...,...,...
863,1kbsiya,YOLO INTC earnings made me lost a lots. This t...,Hope I am right,3,0.62,2025-04-30 23:54:05,https://redd.it/1kbsiya,YOLO INTC earnings made me lost a lots. This t...
864,1kbsfmc,GM recalls nearly 600k trucks and SUVs equippe...,,902,0.99,2025-04-30 23:50:12,https://redd.it/1kbsfmc,GM recalls nearly 600k trucks and SUVs equippe...
866,1kbr19g,META soft(half chub) YOLO. Good luck tomorrow (:,Sold 5 longs off a minute before closing incas...,17,0.87,2025-04-30 22:50:46,https://redd.it/1kbr19g,META soft(half chub) YOLO. Good luck tomorrow ...
867,1kbqrtm,Why short US 100 instead of regional banks?,I'm wondering why so many people are talking a...,17,0.80,2025-04-30 22:39:35,https://redd.it/1kbqrtm,Why short US 100 instead of regional banks? I'...


In [9]:
# Find matching tickers in the text
def matching_tickers(text):
    matching_tickers = [ticker for ticker in tickers if ticker in text]
    matching_tickers += [company for company in companies if company in text]
    return matching_tickers

filtered_reddit['tickers'] = filtered_reddit['text'].apply(matching_tickers)
filtered_reddit

Unnamed: 0,id,title,selftext,score,upvote_ratio,created_date,permalink,text,tickers
0,1ks8j5o,What are the best times during the day to plac...,I feel like I’m just winging it and getting lu...,3,1.00,2025-05-21 22:40:59,https://redd.it/1ks8j5o,What are the best times during the day to plac...,[]
1,1ks8gzw,Palantir direction thoughts…,Thoughts on which direction itll go tomorrow?!...,0,0.50,2025-05-21 22:38:27,https://redd.it/1ks8gzw,Palantir direction thoughts… Thoughts on which...,[T]
2,1ks8grh,Reddit drops 10% after Baird cuts stock price ...,No paywall: [https://www.investing.com/news/an...,5,0.78,2025-05-21 22:38:13,https://redd.it/1ks8grh,Reddit drops 10% after Baird cuts stock price ...,"[A, GOOGL, GOOG, T, C, D, DD, F, IT, GL, J, JP..."
3,1ks8cbo,Duolingo to Zero $DUOL,Duolingo ($DUOL) had good earnings and guidanc...,10,1.00,2025-05-21 22:33:13,https://redd.it/1ks8cbo,Duolingo to Zero $DUOL Duolingo ($DUOL) had go...,"[A, T, D, J, L, O]"
4,1ks89rk,Decide to stop holding out and sell to see a p...,Due: May 23rd\n\n Strategy: raw guts and delus...,19,1.00,2025-05-21 22:30:23,https://redd.it/1ks89rk,Decide to stop holding out and sell to see a p...,"[C, D, L, O, V]"
...,...,...,...,...,...,...,...,...,...
863,1kbsiya,YOLO INTC earnings made me lost a lots. This t...,Hope I am right,3,0.62,2025-04-30 23:54:05,https://redd.it/1kbsiya,YOLO INTC earnings made me lost a lots. This t...,"[A, AAPL, T, C, INTC, L, O]"
864,1kbsfmc,GM recalls nearly 600k trucks and SUVs equippe...,,902,0.99,2025-04-30 23:50:12,https://redd.it/1kbsfmc,GM recalls nearly 600k trucks and SUVs equippe...,"[A, T, C, GM, L, V]"
866,1kbr19g,META soft(half chub) YOLO. Good luck tomorrow (:,Sold 5 longs off a minute before closing incas...,17,0.87,2025-04-30 22:50:46,https://redd.it/1kbr19g,META soft(half chub) YOLO. Good luck tomorrow ...,"[A, T, L, META, MET, O]"
867,1kbqrtm,Why short US 100 instead of regional banks?,I'm wondering why so many people are talking a...,17,0.80,2025-04-30 22:39:35,https://redd.it/1kbqrtm,Why short US 100 instead of regional banks? I'...,"[A, T, F]"


---

### 3. Sentiment Analysis

In [10]:
# Create sentiment analysis pipeline
sent_pipe = pipeline(
    "sentiment-analysis",
    model="ProsusAI/finBERT",
    top_k=None,            # get full softmax
    truncation=True
)

def finbert_score(text):
    outs = sent_pipe(text)[0] 
    print(outs)        # returns list of dicts
    score = (outs[2]["score"] - outs[1]["score"])  
    print(score)      # positive (outs[2]) - negative (outs[0])
    # neutral (outs[1]) implicitly pulls score toward 0
    return score

filtered_reddit["sentiment"] = filtered_reddit["text"].apply(finbert_score)
filtered_reddit

Device set to use cuda:0
  attn_output = torch.nn.functional.scaled_dot_product_attention(


[{'label': 'neutral', 'score': 0.8555924892425537}, {'label': 'negative', 'score': 0.10736639052629471}, {'label': 'positive', 'score': 0.03704111650586128}]
-0.07032527402043343
[{'label': 'neutral', 'score': 0.8549672961235046}, {'label': 'negative', 'score': 0.09021774679422379}, {'label': 'positive', 'score': 0.05481495335698128}]
-0.03540279343724251
[{'label': 'negative', 'score': 0.7421168088912964}, {'label': 'positive', 'score': 0.22042328119277954}, {'label': 'neutral', 'score': 0.0374598428606987}]
-0.18296343833208084
[{'label': 'negative', 'score': 0.5579645037651062}, {'label': 'neutral', 'score': 0.40864917635917664}, {'label': 'positive', 'score': 0.033386316150426865}]
-0.37526286020874977
[{'label': 'neutral', 'score': 0.868783175945282}, {'label': 'positive', 'score': 0.11044956743717194}, {'label': 'negative', 'score': 0.02076723799109459}]
-0.08968232944607735
[{'label': 'neutral', 'score': 0.8154149055480957}, {'label': 'positive', 'score': 0.14353348314762115}, {

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


[{'label': 'neutral', 'score': 0.8791684508323669}, {'label': 'negative', 'score': 0.06280584633350372}, {'label': 'positive', 'score': 0.05802569538354874}]
-0.004780150949954987
[{'label': 'neutral', 'score': 0.6557179689407349}, {'label': 'positive', 'score': 0.30941835045814514}, {'label': 'negative', 'score': 0.034863632172346115}]
-0.274554718285799
[{'label': 'neutral', 'score': 0.9049307107925415}, {'label': 'negative', 'score': 0.05962345376610756}, {'label': 'positive', 'score': 0.035445891320705414}]
-0.024177562445402145
[{'label': 'negative', 'score': 0.8635517358779907}, {'label': 'neutral', 'score': 0.0979335829615593}, {'label': 'positive', 'score': 0.03851467743515968}]
-0.05941890552639961
[{'label': 'neutral', 'score': 0.6083036065101624}, {'label': 'positive', 'score': 0.2321372926235199}, {'label': 'negative', 'score': 0.15955907106399536}]
-0.07257822155952454
[{'label': 'neutral', 'score': 0.9067550897598267}, {'label': 'negative', 'score': 0.05787690356373787}, 

Unnamed: 0,id,title,selftext,score,upvote_ratio,created_date,permalink,text,tickers,sentiment
0,1ks8j5o,What are the best times during the day to plac...,I feel like I’m just winging it and getting lu...,3,1.00,2025-05-21 22:40:59,https://redd.it/1ks8j5o,What are the best times during the day to plac...,[],-0.070325
1,1ks8gzw,Palantir direction thoughts…,Thoughts on which direction itll go tomorrow?!...,0,0.50,2025-05-21 22:38:27,https://redd.it/1ks8gzw,Palantir direction thoughts… Thoughts on which...,[T],-0.035403
2,1ks8grh,Reddit drops 10% after Baird cuts stock price ...,No paywall: [https://www.investing.com/news/an...,5,0.78,2025-05-21 22:38:13,https://redd.it/1ks8grh,Reddit drops 10% after Baird cuts stock price ...,"[A, GOOGL, GOOG, T, C, D, DD, F, IT, GL, J, JP...",-0.182963
3,1ks8cbo,Duolingo to Zero $DUOL,Duolingo ($DUOL) had good earnings and guidanc...,10,1.00,2025-05-21 22:33:13,https://redd.it/1ks8cbo,Duolingo to Zero $DUOL Duolingo ($DUOL) had go...,"[A, T, D, J, L, O]",-0.375263
4,1ks89rk,Decide to stop holding out and sell to see a p...,Due: May 23rd\n\n Strategy: raw guts and delus...,19,1.00,2025-05-21 22:30:23,https://redd.it/1ks89rk,Decide to stop holding out and sell to see a p...,"[C, D, L, O, V]",-0.089682
...,...,...,...,...,...,...,...,...,...,...
863,1kbsiya,YOLO INTC earnings made me lost a lots. This t...,Hope I am right,3,0.62,2025-04-30 23:54:05,https://redd.it/1kbsiya,YOLO INTC earnings made me lost a lots. This t...,"[A, AAPL, T, C, INTC, L, O]",-0.061213
864,1kbsfmc,GM recalls nearly 600k trucks and SUVs equippe...,,902,0.99,2025-04-30 23:50:12,https://redd.it/1kbsfmc,GM recalls nearly 600k trucks and SUVs equippe...,"[A, T, C, GM, L, V]",-0.126542
866,1kbr19g,META soft(half chub) YOLO. Good luck tomorrow (:,Sold 5 longs off a minute before closing incas...,17,0.87,2025-04-30 22:50:46,https://redd.it/1kbr19g,META soft(half chub) YOLO. Good luck tomorrow ...,"[A, T, L, META, MET, O]",-0.025609
867,1kbqrtm,Why short US 100 instead of regional banks?,I'm wondering why so many people are talking a...,17,0.80,2025-04-30 22:39:35,https://redd.it/1kbqrtm,Why short US 100 instead of regional banks? I'...,"[A, T, F]",-0.222654


In [11]:
# filtered_reddit["sentiment"].describe()
# # Plot the distribution of sentiment scores
# plt.figure(figsize=(10, 6))
# sns.histplot(filtered_reddit["sentiment"], bins=30, kde=True)
# plt.title("Distribution of Sentiment Scores")
# plt.xlabel("Sentiment Score")
# plt.ylabel("Frequency")
# plt.show()

In [12]:
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch

device = 0 if torch.cuda.is_available() else -1          # -1 ⇒ CPU
model_id = "ProsusAI/finBERT"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model     = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if device==0 else torch.float32
)

sent_pipe1 = pipeline(
    task="sentiment-analysis",
    model=model,
    tokenizer=tokenizer,
    device=device,
    return_all_scores=True,         # **preferred** to top_k=None
    truncation=True
)


Device set to use cuda:0


In [13]:
LABEL_TO_POLARITY = {"positive": +1, "neutral": 0, "negative": -1}

def finbert_score1(text: str) -> float:
    scores = sent_pipe(text, batch_size=1)[0]           # list[dict]
    # Convert to dict {label: prob}
    scores = {d["label"].lower(): d["score"] for d in scores}

    # Simple +/− difference (prob_pos − prob_neg)
    return scores["positive"] - scores["negative"]

filtered_reddit["sentiment"] = filtered_reddit["text"].apply(finbert_score1)
filtered_reddit

Unnamed: 0,id,title,selftext,score,upvote_ratio,created_date,permalink,text,tickers,sentiment
0,1ks8j5o,What are the best times during the day to plac...,I feel like I’m just winging it and getting lu...,3,1.00,2025-05-21 22:40:59,https://redd.it/1ks8j5o,What are the best times during the day to plac...,[],-0.070325
1,1ks8gzw,Palantir direction thoughts…,Thoughts on which direction itll go tomorrow?!...,0,0.50,2025-05-21 22:38:27,https://redd.it/1ks8gzw,Palantir direction thoughts… Thoughts on which...,[T],-0.035403
2,1ks8grh,Reddit drops 10% after Baird cuts stock price ...,No paywall: [https://www.investing.com/news/an...,5,0.78,2025-05-21 22:38:13,https://redd.it/1ks8grh,Reddit drops 10% after Baird cuts stock price ...,"[A, GOOGL, GOOG, T, C, D, DD, F, IT, GL, J, JP...",-0.521694
3,1ks8cbo,Duolingo to Zero $DUOL,Duolingo ($DUOL) had good earnings and guidanc...,10,1.00,2025-05-21 22:33:13,https://redd.it/1ks8cbo,Duolingo to Zero $DUOL Duolingo ($DUOL) had go...,"[A, T, D, J, L, O]",-0.524578
4,1ks89rk,Decide to stop holding out and sell to see a p...,Due: May 23rd\n\n Strategy: raw guts and delus...,19,1.00,2025-05-21 22:30:23,https://redd.it/1ks89rk,Decide to stop holding out and sell to see a p...,"[C, D, L, O, V]",0.089682
...,...,...,...,...,...,...,...,...,...,...
863,1kbsiya,YOLO INTC earnings made me lost a lots. This t...,Hope I am right,3,0.62,2025-04-30 23:54:05,https://redd.it/1kbsiya,YOLO INTC earnings made me lost a lots. This t...,"[A, AAPL, T, C, INTC, L, O]",-0.061213
864,1kbsfmc,GM recalls nearly 600k trucks and SUVs equippe...,,902,0.99,2025-04-30 23:50:12,https://redd.it/1kbsfmc,GM recalls nearly 600k trucks and SUVs equippe...,"[A, T, C, GM, L, V]",-0.126542
866,1kbr19g,META soft(half chub) YOLO. Good luck tomorrow (:,Sold 5 longs off a minute before closing incas...,17,0.87,2025-04-30 22:50:46,https://redd.it/1kbr19g,META soft(half chub) YOLO. Good luck tomorrow ...,"[A, T, L, META, MET, O]",-0.025609
867,1kbqrtm,Why short US 100 instead of regional banks?,I'm wondering why so many people are talking a...,17,0.80,2025-04-30 22:39:35,https://redd.it/1kbqrtm,Why short US 100 instead of regional banks? I'...,"[A, T, F]",-0.222654


In [14]:
# make a dictionary that has the keys as the creation date of the post and the values as a dictionary of the tickers and their corresponding sentiment score summed up on that date
def create_sentiment_dict(df):
    sentiment_dict = {}
    for index, row in df.iterrows():
        date = row['created_date'].date()
        tickers = row['tickers']
        sentiment = row['sentiment']
        
        if date not in sentiment_dict:
            sentiment_dict[date] = {}
        
        for ticker in tickers:
            if ticker not in sentiment_dict[date]:
                sentiment_dict[date][ticker] = 0
            sentiment_dict[date][ticker] += sentiment
    
    return sentiment_dict

sentiment_dict = create_sentiment_dict(filtered_reddit)
sentiment_dict

{datetime.date(2025, 5, 21): {'T': -2.7263321578502655,
  'A': -1.8139485185965896,
  'GOOGL': -0.22396760806441307,
  'GOOG': -0.7903230246156454,
  'C': 0.5576930707320571,
  'D': -1.6447628121823072,
  'DD': -0.5603644549846649,
  'F': 0.12938014697283506,
  'IT': -0.5216935276985168,
  'GL': -0.24814517050981522,
  'J': -1.629366459324956,
  'JPM': -0.5216935276985168,
  'L': -0.9770145304501057,
  'PM': -0.5216935276985168,
  'O': -0.7335770782083273,
  'V': 1.0698608001694083,
  'Goldman Sachs': -0.5216935276985168,
  'Wells Fargo': -0.5216935276985168,
  'UNH': 0.3142892438918352,
  'Citigroup': 0.16936409287154675,
  'CB': -0.825037058442831,
  'EG': -0.825037058442831,
  'GD': -0.825037058442831,
  'PLTR': -0.825037058442831,
  'PEG': -0.825037058442831,
  'SO': -0.825037058442831,
  'WMT': -0.825037058442831,
  'WM': -0.825037058442831,
  'K': 0.8461731942370534,
  'NVDA': 1.0008912095800042,
  'Nvidia': 0.07257822155952454,
  'DTE': -0.8157456591725349,
  'MO': 0.90884134173

In [15]:
# Convert sentiment_dict into a DataFrame and fill missing values with 0
sentiment_df = pd.DataFrame.from_dict(sentiment_dict, orient='index').fillna(0)

# Ensure the index is sorted (dates in ascending order)
sentiment_df.sort_index(inplace=True)

# Print the first few rows to verify
sentiment_df


Unnamed: 0,T,A,GOOGL,GOOG,C,D,DD,F,IT,GL,...,RCL,ESS,BALL,HCA,KMB,Bank of America,GM,Meta Platforms,ANET,General Motors
2025-04-30,-0.436018,-0.436018,0.0,0.0,-0.187755,0.0,0.0,-0.222654,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,-0.126542,0.0,0.0,0.0
2025-05-01,1.157045,1.273325,0.0,-0.249928,0.335547,1.679299,1.307821,2.204488,0.68646,0.0,...,0.0,0.0,0.0,0.0,0.0,0.924017,-1.164844,-0.008754,0.622455,-0.914917
2025-05-02,-0.958764,-4.301678,0.0,0.0,-0.031742,-0.323721,0.959688,2.469991,0.0,0.002397,...,0.0,0.0939,0.0,-0.405091,-0.405091,-0.919867,0.0,0.0,0.0,0.0
2025-05-03,-2.171748,-3.795112,0.0,0.0,-1.142152,-1.351588,0.0,-0.712611,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2025-05-04,-2.90154,-2.849443,0.0,0.0,-1.317024,-2.111207,0.0,-0.392798,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2025-05-05,-2.292332,0.608691,0.0,0.0,-0.251652,0.315374,-0.860409,0.776186,0.98222,0.27242,...,0.161576,0.085647,0.241076,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2025-05-06,-1.303356,-1.63504,0.0,0.0,-2.665434,-0.911907,-0.153825,-0.835882,-0.257143,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2025-05-07,-3.973131,-0.65564,-0.023025,-0.023025,-3.513703,0.272089,-0.00753,-5.377309,0.798485,-0.023025,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2025-05-08,1.19698,1.127277,-0.414422,-0.197802,1.818387,1.639227,0.0,0.764555,0.648934,-0.414422,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2025-05-09,1.408799,-0.302084,0.0,0.0,2.577545,-0.436913,0.0,0.137754,0.11368,0.11368,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


---

### 4. Stock Market Comparison

---

### 5. Visualization 