In [3]:
pip install praw pandas requests

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
import pandas as pd

def load_tickers():
    nasdaq = pd.read_csv("nasdaq_.csv")
    nyse = pd.read_csv("nyse.csv")

    # Make sure the column name is correct; use 'Symbol' or 'ACT Symbol'
    if 'Symbol' in nasdaq.columns:
        nasdaq_tickers = set(nasdaq['Symbol'].str.upper())
    else:
        nasdaq_tickers = set(nasdaq['ACT Symbol'].str.upper())

    if 'Symbol' in nyse.columns:
        nyse_tickers = set(nyse['Symbol'].str.upper())
    else:
        nyse_tickers = set(nyse['ACT Symbol'].str.upper())

    all_tickers = nasdaq_tickers.union(nyse_tickers)
    return all_tickers

# Load and print count
valid_tickers = load_tickers()
print(f"Loaded {len(valid_tickers)} valid tickers.")



Loaded 7709 valid tickers.


In [5]:
import praw
import re
from collections import Counter
import time

reddit = praw.Reddit(
    client_id="Lk7amzjYlHw4NZN4jVeMOA",
    client_secret="4Xv7mFZcXHbcvUkQG98tMau-BvEFFg",
    user_agent="WSB_Sentiment_Bot/1.0 by Defiant-Fee-533",
    check_for_async=False
)

subreddits = ['wallstreetbets', 'stocks', 'investing']
post_limit = 200  # number of posts to scan per subreddit

# Regex to extract uppercase words 1-5 letters (possible ticker symbols)
ticker_candidate_pattern = re.compile(r'\b[A-Z]{1,5}\b')

def find_trending_tickers():
    all_candidates = []

    for subreddit_name in subreddits:
        print(f"Scanning r/{subreddit_name}...")
        subreddit = reddit.subreddit(subreddit_name)
        for post in subreddit.hot(limit=post_limit):
            text = post.title.upper()
            candidates = ticker_candidate_pattern.findall(text)
            all_candidates.extend(candidates)
            time.sleep(0.1)  # polite pause to avoid rate limit

    # Filter candidates by whether they are valid tickers
    filtered = [t for t in all_candidates if t in valid_tickers]

    # Count frequencies and get top 20 trending tickers
    counter = Counter(filtered)
    top_20 = counter.most_common(20)

    print("\nTop 20 trending tickers detected on Reddit:")
    for ticker, count in top_20:
        print(f"{ticker}: {count} mentions")

    return [t[0] for t in top_20]


In [6]:
def get_posts_for_ticker(ticker, max_posts=15):
    posts = []
    query = f'title:{ticker} OR selftext:{ticker}'

    for subreddit_name in subreddits:
        subreddit = reddit.subreddit(subreddit_name)
        for post in subreddit.search(query, sort='new', limit=max_posts):
            posts.append({
                'ticker': ticker,
                'post_id': post.id,
                'title': post.title,
                'selftext': post.selftext,
                'url': post.url,
                'created_utc': post.created_utc,
                'score': post.score,
                'num_comments': post.num_comments,
                'subreddit': subreddit_name
            })
            if len(posts) >= max_posts:
                break
        if len(posts) >= max_posts:
            break
    return posts

if __name__ == "__main__":
    valid_tickers = load_tickers()
    trending_tickers = find_trending_tickers()

    all_posts = []
    for ticker in trending_tickers:
        print(f"\nFetching posts for {ticker}...")
        posts = get_posts_for_ticker(ticker)
        all_posts.extend(posts)
        print(f"Fetched {len(posts)} posts for {ticker}")
        time.sleep(1)
    # Convert the list of posts to a DataFrame
    df = pd.DataFrame(all_posts)
    # Save the DataFrame to a CSV file
    output_file = "reddit_trending_tickers_posts.csv"
    df.to_csv(output_file, index=False)
    print(f"\nSaved extracted data to '{output_file}'")


Scanning r/wallstreetbets...
Scanning r/stocks...
Scanning r/investing...

Top 20 trending tickers detected on Reddit:
A: 84 mentions
FOR: 67 mentions
ON: 67 mentions
S: 62 mentions
YOU: 48 mentions
OR: 34 mentions
IT: 34 mentions
ARE: 29 mentions
TSLA: 26 mentions
AS: 23 mentions
JUNE: 20 mentions
GOOD: 20 mentions
AI: 20 mentions
UP: 18 mentions
ALL: 17 mentions
NOW: 16 mentions
NEXT: 15 mentions
CAN: 14 mentions
BE: 14 mentions
M: 14 mentions

Fetching posts for A...
Fetched 15 posts for A

Fetching posts for FOR...
Fetched 15 posts for FOR

Fetching posts for ON...
Fetched 15 posts for ON

Fetching posts for S...
Fetched 15 posts for S

Fetching posts for YOU...
Fetched 15 posts for YOU

Fetching posts for OR...
Fetched 15 posts for OR

Fetching posts for IT...
Fetched 15 posts for IT

Fetching posts for ARE...
Fetched 15 posts for ARE

Fetching posts for TSLA...
Fetched 15 posts for TSLA

Fetching posts for AS...
Fetched 15 posts for AS

Fetching posts for JUNE...
Fetched 15 posts

In [1]:
pip install yfinance




In [2]:
import yfinance as yf
import pandas as pd

def get_sp500_tickers():
    # Fetch S&P 500 tickers
    sp500 = yf.Ticker("^GSPC")

    # yfinance does not have a direct method for tickers list,
    # but there's a popular workaround to get the list via Wikipedia
    sp500_table = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    sp500_df = sp500_table[0]

    # Get the Symbol column as a list
    tickers = sp500_df['Symbol'].tolist()
    # Make uppercase for consistency
    tickers = [t.upper() for t in tickers]

    return tickers

# Example usage
valid_tickers = get_sp500_tickers()
print(valid_tickers[:10])  # print first 10 tickers


['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A']


In [20]:
valid_tickers

['MMM',
 'AOS',
 'ABT',
 'ABBV',
 'ACN',
 'ADBE',
 'AMD',
 'AES',
 'AFL',
 'A',
 'APD',
 'ABNB',
 'AKAM',
 'ALB',
 'ARE',
 'ALGN',
 'ALLE',
 'LNT',
 'ALL',
 'GOOGL',
 'GOOG',
 'MO',
 'AMZN',
 'AMCR',
 'AEE',
 'AEP',
 'AXP',
 'AIG',
 'AMT',
 'AWK',
 'AMP',
 'AME',
 'AMGN',
 'APH',
 'ADI',
 'ANSS',
 'AON',
 'APA',
 'APO',
 'AAPL',
 'AMAT',
 'APTV',
 'ACGL',
 'ADM',
 'ANET',
 'AJG',
 'AIZ',
 'T',
 'ATO',
 'ADSK',
 'ADP',
 'AZO',
 'AVB',
 'AVY',
 'AXON',
 'BKR',
 'BALL',
 'BAC',
 'BAX',
 'BDX',
 'BRK.B',
 'BBY',
 'TECH',
 'BIIB',
 'BLK',
 'BX',
 'BK',
 'BA',
 'BKNG',
 'BSX',
 'BMY',
 'AVGO',
 'BR',
 'BRO',
 'BF.B',
 'BLDR',
 'BG',
 'BXP',
 'CHRW',
 'CDNS',
 'CZR',
 'CPT',
 'CPB',
 'COF',
 'CAH',
 'KMX',
 'CCL',
 'CARR',
 'CAT',
 'CBOE',
 'CBRE',
 'CDW',
 'COR',
 'CNC',
 'CNP',
 'CF',
 'CRL',
 'SCHW',
 'CHTR',
 'CVX',
 'CMG',
 'CB',
 'CHD',
 'CI',
 'CINF',
 'CTAS',
 'CSCO',
 'C',
 'CFG',
 'CLX',
 'CME',
 'CMS',
 'KO',
 'CTSH',
 'COIN',
 'CL',
 'CMCSA',
 'CAG',
 'COP',
 'ED',
 'STZ',
 'CEG',

In [5]:
import praw
import re
from collections import Counter
import time
import csv

# Reddit API setup
reddit = praw.Reddit(
    client_id="Lk7amzjYlHw4NZN4jVeMOA",
    client_secret="4Xv7mFZcXHbcvUkQG98tMau-BvEFFg",
    user_agent="WSB_Sentiment_Bot/1.0 by Defiant-Fee-533",
    check_for_async=False
)

subreddits = ['wallstreetbets', 'stocks', 'investing']
post_limit = 200  # number of posts to scan per subreddit

# Regex to extract uppercase ticker symbols, allowing optional dot (like BRK.B)
ticker_candidate_pattern = re.compile(r'\b[A-Z]{1,5}(?:\.[A-Z])?\b')


def find_trending_tickers():
    all_candidates = []

    for subreddit_name in subreddits:
        print(f"Scanning r/{subreddit_name}...")
        subreddit = reddit.subreddit(subreddit_name)
        for post in subreddit.hot(limit=post_limit):
            text = post.title.upper()
            candidates = ticker_candidate_pattern.findall(text)
            all_candidates.extend(candidates)
            time.sleep(0.1)  # polite pause to avoid rate limit

    # Filter candidates by whether they are valid tickers
    filtered = [t for t in all_candidates if t in valid_tickers]

    # Count frequencies and get top 20 trending tickers
    counter = Counter(filtered)
    top_20 = counter.most_common(20)

    print("\nTop 20 trending tickers detected on Reddit:")
    for ticker, count in top_20:
        print(f"{ticker}: {count} mentions")

    return [t[0] for t in top_20]

def get_posts_for_ticker(ticker, max_posts=15):
    posts = []
    query = f'title:{ticker} OR selftext:{ticker}'

    for subreddit_name in subreddits:
        subreddit = reddit.subreddit(subreddit_name)
        for post in subreddit.search(query, sort='new', limit=max_posts):
            posts.append({
                'ticker': ticker,
                'post_id': post.id,
                'title': post.title,
                'selftext': post.selftext,
                'url': post.url,
                'created_utc': post.created_utc,
                'score': post.score,
                'num_comments': post.num_comments,
                'subreddit': subreddit_name
            })
            if len(posts) >= max_posts:
                break
        if len(posts) >= max_posts:
            break
    return posts

def save_posts_to_csv(posts, filename='reddit_stock_posts.csv'):
    if not posts:
        print("No posts to save.")
        return

    keys = posts[0].keys()
    with open(filename, 'w', newline='', encoding='utf-8') as output_file:
        dict_writer = csv.DictWriter(output_file, fieldnames=keys)
        dict_writer.writeheader()
        dict_writer.writerows(posts)

    print(f"Saved {len(posts)} posts to {filename}")

if __name__ == "__main__":
    trending_tickers = find_trending_tickers()

    all_posts = []
    for ticker in trending_tickers:
        print(f"\nFetching posts for {ticker}...")
        posts = get_posts_for_ticker(ticker)
        all_posts.extend(posts)
        print(f"Fetched {len(posts)} posts for {ticker}")
        time.sleep(1)

    save_posts_to_csv(all_posts)


Scanning r/wallstreetbets...
Scanning r/stocks...
Scanning r/investing...

Top 20 trending tickers detected on Reddit:
A: 79 mentions
ON: 69 mentions
IT: 38 mentions
UNH: 37 mentions
ARE: 36 mentions
T: 20 mentions
ALL: 15 mentions
NOW: 14 mentions
DAY: 8 mentions
NVDA: 7 mentions
SO: 7 mentions
HAS: 7 mentions
TECH: 4 mentions
GOOGL: 3 mentions
D: 3 mentions
GOOG: 3 mentions
AMD: 3 mentions
PLTR: 2 mentions
LOW: 2 mentions
RTX: 2 mentions

Fetching posts for A...
Fetched 15 posts for A

Fetching posts for ON...
Fetched 15 posts for ON

Fetching posts for IT...
Fetched 15 posts for IT

Fetching posts for UNH...
Fetched 15 posts for UNH

Fetching posts for ARE...
Fetched 15 posts for ARE

Fetching posts for T...
Fetched 15 posts for T

Fetching posts for ALL...
Fetched 15 posts for ALL

Fetching posts for NOW...
Fetched 15 posts for NOW

Fetching posts for DAY...
Fetched 15 posts for DAY

Fetching posts for NVDA...
Fetched 15 posts for NVDA

Fetching posts for SO...
Fetched 15 posts for