In [1]:
import praw
from dotenv import load_dotenv
import os
import alpaca_trade_api
import pandas
from datetime import datetime, timedelta
import time
import re
import json
from xai_sdk import Client
from xai_sdk.chat import user, system
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# Load environment variables
load_dotenv()

# Configuration from .env file
REDDIT_CLIENT_ID = os.getenv("REDDIT_CLIENT_ID")
REDDIT_CLIENT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")
REDDIT_USER_AGENT = os.getenv("REDDIT_USER_AGENT")
ALPACA_API_KEY = os.getenv("ALPACA_API_KEY")
ALPACA_API_SECRET = os.getenv("ALPACA_API_SECRET")
ALPACA_BASE_URL = os.getenv("ALPACA_BASE_URL", "https://paper-api.alpaca.markets")
XAI_API_KEY = os.getenv("XAI_API_KEY")

# Validate required environment variables
required_vars = [
    "REDDIT_CLIENT_ID", "REDDIT_CLIENT_SECRET", "REDDIT_USER_AGENT",
    "ALPACA_API_KEY", "ALPACA_API_SECRET", "XAI_API_KEY"
]

for var in required_vars:
    if not os.getenv(var):
        raise ValueError(f"Required environment variable {var} not found in .env file")

True

In [3]:
# Initialize Reddit API
reddit = praw.Reddit(
    client_id=REDDIT_CLIENT_ID,
    client_secret=REDDIT_CLIENT_SECRET,
    user_agent=REDDIT_USER_AGENT
)

# Initialize Alpaca API
alpaca = alpaca_trade_api.REST(ALPACA_API_KEY, ALPACA_API_SECRET, ALPACA_BASE_URL, api_version='v2')

# Initialize xAI Client
xai_client = Client(api_key=XAI_API_KEY)

In [4]:
# Function to clean text for sentiment analysis
def clean_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    return text.strip()

In [5]:
# Function to get sentiment from Grok API with retry logic
def get_grok_sentiment(text, max_retries=3):
    """Get sentiment analysis from xAI Grok API with retry logic"""
    for attempt in range(max_retries):
        try:
            # Create the chat using the Grok SDK
            chat = xai_client.chat.create(
                        model="grok-3-mini",
                        messages=[system("You are a financial sentiment analysis expert. Analyze the sentiment of the provided text "
                        "in relation to stock trading and market sentiment. Return ONLY a valid JSON object with: "
                        "1. 'sentiment': 'positive', 'negative', or 'neutral' "
                        "2. 'compound': a numerical score from -1.0 (very negative) to 1.0 (very positive) "
                        "3. 'confidence': a score from 0.0 to 1.0 indicating confidence in the analysis "
                        "4. 'explanation': a brief explanation of the sentiment "
                        "Focus on financial and trading-related sentiment rather than general mood. "
                        "Respond with ONLY the JSON object, no additional text.")])
            chat.append(user(f"Analyze the financial sentiment of this text: {text}"))

            # Get the response content
            content = chat.sample().content
            
            # Try to parse JSON response
            try:
                sentiment_data = json.loads(content)
                # Validate required fields
                if all(key in sentiment_data for key in ['sentiment', 'compound']):
                    return sentiment_data
                else:
                    raise ValueError("Missing required fields in response")
            except json.JSONDecodeError:
                # If JSON parsing fails, raise exception
                raise ValueError("Warning: Could not parse JSON response")

        # Retry logic        
        except Exception as e:
            print(f"Unexpected error in sentiment analysis (attempt {attempt + 1}/{max_retries}): {e}")
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)
            else:
                return {"sentiment": "neutral", 
                        "compound": 0.0, 
                        "confidence": 0.0, 
                        "explanation": f"Analysis error: {str(e)}"}

In [12]:
# Function to extract stock tickers with improved filtering
def extract_tickers(text):
    """Extract potential stock tickers from text with filtering"""
    # Common words that look like tickers but aren't
    false_positives = {
        'DD', 'MY', 'IN', 'TA', 'AI', 'AF', 'PE', 'IV',
        'US', 'DR', 'FX', 'OI', 'ON', 'TL',
        'THE', 'AND', 'BUT', 'NOT', 'HER', 'WAS', 'ONE', 
        'OUR', 'GET', 'HIM', 'HIS', 'HOW', 'ITS', 'NEW', 
        'OLD', 'WHO', 'BOY', 'DID', 'ITS', 'LET', 'PUT', 
        'SAY', 'TOO', 'WSB', 'CEO', 'ITM', 'OTM', 'EPS', 
        'ARR', 'NFL', 'GDP', 'GPU', 'TPV', 'WTF', 'API',
        'AZA', 'AST', 'AWS', 'YOY',
        'YOLO', 'GAINS', 'HODL', 'CALL', 'PAID', 'ALSO',
        'FULL', 'NYSE', 'OXXO', 'TLDR',
        'BULLS', 'TOFAY', 'TODAY', 'SPACE'
        
    }
    
    # Find potential tickers (2-5 uppercase letters)
    potential_tickers = set(re.findall(r'\b[A-Z]{2,5}\b', text))
    
    # Filter out common false positives
    tickers = potential_tickers - false_positives
    
    return tickers

In [10]:
# Function to get sentiment from r/WallStreetBets
def get_reddit_sentiment(subreddit='wallstreetbets', hours=24, limit=100):
    """Analyze sentiment from Reddit posts using xAI Grok"""
    print(f"Fetching sentiment from r/{subreddit}...")
    
    subreddit_obj = reddit.subreddit(subreddit)
    posts = subreddit_obj.hot(limit=limit)
    sentiment_scores = {}
    post_count = {}
    ticker_posts = {}

    # Time filter: only posts from the last `hours`
    time_threshold = datetime.utcnow() - timedelta(hours=hours)

    processed_posts = 0
    for post in posts:
        try:
            post_time = datetime.fromtimestamp(post.created_utc)
            if post_time < time_threshold:
                continue

            # Clean and combine title and body text
            full_text = clean_text(post.title + ' ' + post.selftext)

            # Extract stock tickers
            tickers = extract_tickers(post.title + ' ' + post.selftext)
            
            if not tickers:
                continue

            # Perform sentiment analysis using Grok API
            print(f"Analyzing post: {post.title}")
            sentiment_data = get_grok_sentiment(full_text)
            
            compound_score = sentiment_data["compound"]
            confidence = sentiment_data.get("confidence", 1.0)

            # Weight the sentiment by confidence
            weighted_score = compound_score * confidence

            for ticker in tickers:
                if ticker not in sentiment_scores:
                    sentiment_scores[ticker] = []
                    post_count[ticker] = 0
                    ticker_posts[ticker] = []
                
                sentiment_scores[ticker].append(weighted_score)
                post_count[ticker] += 1
                ticker_posts[ticker].append({
                    'title': post.title[:100],
                    'sentiment': sentiment_data['sentiment'],
                    'score': compound_score,
                    'confidence': confidence
                })

            processed_posts += 1
            
            # Print progress
            if processed_posts % 10 == 0:
                print(f"Processed {processed_posts} posts...")

            # Rate limiting
            time.sleep(0.1)
            
        except Exception as e:
            print(f"Error processing post: {e}")
            continue

    # Calculate average sentiment scores
    avg_sentiment = {}
    
    for ticker in sentiment_scores:
        scores = sentiment_scores[ticker]
        avg_score = sum(scores) / len(scores)
        avg_sentiment[ticker] = {
            'score': avg_score,
            'post_count': post_count[ticker],
            'posts': ticker_posts[ticker]
        }

    return avg_sentiment

In [11]:
get_reddit_sentiment()

Fetching sentiment from r/wallstreetbets...


  time_threshold = datetime.utcnow() - timedelta(hours=hours)


Analyzing post: I FULL-PORTED NBIS ON MARGIN + MY RETIREMENT ACCOUNT
Analyzing post: 🚨 Is $ASTS cooked? 🚨
Analyzing post: Added 100 contract more to $OPEN
Analyzing post: $NBIS has signed a $17.4 Billion deal with $MSFT
Analyzing post: HOOD saving us from the HOOD
Analyzing post: Planet Lab (PL) 130k bet
Analyzing post: $100k+ in Sweetgreen. Sydney Sweeney Jeans formation spotted, $10+ Recovery.
Analyzing post: YOLO $100K of $OPEN
Analyzing post: $BABA and $BIDU All IN YOLO
Analyzing post: HOOD saved me
Processed 10 posts...
Analyzing post: Capital Vanishing System gains
Analyzing post: NBIS YOLO
Analyzing post: Thanks PLTR I finally broke even
Analyzing post: 320% gains on $PL calls
Analyzing post: Thanks NBIS
Analyzing post: LDI 🚀
Analyzing post: Gambling $230k on out of the money SPOT calls this week
Analyzing post: [DD] Why DLO Is the Sleeper 3X Rocket Nobody’s Watching
Analyzing post: LDI - can you make me quit working at Wendy's?
Analyzing post: LDI YOLO
Processed 20 posts...
Ana

{'ON': {'score': 0.855,
  'post_count': 1,
  'posts': [{'title': 'I FULL-PORTED NBIS ON MARGIN + MY RETIREMENT ACCOUNT',
    'sentiment': 'positive',
    'score': 0.95,
    'confidence': 0.9}]},
 'NBIS': {'score': 0.7125,
  'post_count': 4,
  'posts': [{'title': 'I FULL-PORTED NBIS ON MARGIN + MY RETIREMENT ACCOUNT',
    'sentiment': 'positive',
    'score': 0.95,
    'confidence': 0.9},
   {'title': '$NBIS has signed a $17.4 Billion deal with $MSFT',
    'sentiment': 'positive',
    'score': 0.85,
    'confidence': 0.9},
   {'title': 'NBIS YOLO',
    'sentiment': 'positive',
    'score': 0.8,
    'confidence': 0.75},
   {'title': 'Thanks NBIS',
    'sentiment': 'positive',
    'score': 0.7,
    'confidence': 0.9}]},
 'ASTS': {'score': -0.02250000000000002,
  'post_count': 2,
  'posts': [{'title': '🚨 Is $ASTS cooked? 🚨',
    'sentiment': 'negative',
    'score': -0.8,
    'confidence': 0.9},
   {'title': 'Planet Lab (PL) 130k bet',
    'sentiment': 'positive',
    'score': 0.75,
    'c