# Tesla Stock Prediction Pipeline

Combines Reddit data, current stock price, and trained PyTorch model for real predictions.

In [None]:
import os
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import requests
import json
from datetime import datetime
from sentence_transformers import SentenceTransformer

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
embedding_dim = 1027

class LSTMWithPrice(nn.Module):
    def __init__(self, input_dim=embedding_dim, hidden_dim=64, num_layers=1):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim + 1, 1)

    def forward(self, x, lengths, price):
        packed = nn.utils.rnn.pack_padded_sequence(
            x, lengths.cpu(), batch_first=True, enforce_sorted=False
        )
        _, (hn, _) = self.lstm(packed)
        h_cat = torch.cat([hn[-1], price], dim=1)
        return self.fc(h_cat).squeeze(1)

model = LSTMWithPrice(input_dim=embedding_dim, hidden_dim=64).to(device)
model.load_state_dict(torch.load('models/best_model.pth', map_location=device))
model.eval()

In [None]:
# Get current Tesla data from existing CSV
try:
    tesla_df = pd.read_csv('data/tesla_top5_this_week.csv', parse_dates=['created'])
    
    if not tesla_df.empty:
        latest_post = tesla_df['created'].max()
        hours_old = (datetime.now() - latest_post).total_seconds() / 3600
        
        if hours_old > 24:
            print("⚠️  Data is older than 24 hours - consider running test_model.ipynb first")
    
except FileNotFoundError:
    print("❌ tesla_top5_this_week.csv not found")
    print("💡 Run test_model.ipynb first to generate current Tesla posts")
    
    tesla_df = pd.DataFrame({
        'title': ['Tesla stock discussion'],
        'text': ['General Tesla market sentiment'],
        'full_text': ['Tesla stock discussion General Tesla market sentiment'],
        'score': [100],
        'created': [datetime.now()]
    })

except Exception as e:
    print(f"❌ Error loading CSV: {e}")
    tesla_df = pd.DataFrame({
        'title': ['Tesla stock discussion'],
        'text': ['General Tesla market sentiment'], 
        'full_text': ['Tesla stock discussion General Tesla market sentiment'],
        'score': [100],
        'created': [datetime.now()]
    })

print(f"✅ Using {len(tesla_df)} Tesla posts for prediction")

In [None]:
# Generate embeddings for current posts
embedding_model = SentenceTransformer('mixedbread-ai/mxbai-embed-large-v1')

texts = tesla_df['full_text'].tolist()
embeddings = embedding_model.encode(texts)

scores = tesla_df['score'].values
scores_normalized = (scores - scores.min()) / (scores.max() - scores.min() + 1e-8)

text_lengths = np.array([len(text) for text in texts])
text_lengths_normalized = (text_lengths - text_lengths.min()) / (text_lengths.max() - text_lengths.min() + 1e-8)

if not tesla_df.empty and 'created' in tesla_df.columns:
    now = datetime.now()
    recency_hours = [(now - pd.to_datetime(created)).total_seconds() / 3600 for created in tesla_df['created']]
    max_hours = max(recency_hours) + 1e-8
    recency_scores = [(max_hours - hours) / max_hours for hours in recency_hours]
else:
    recency_scores = [1.0] * len(texts)

recency_scores = np.array(recency_scores)

# Combine features: [embeddings (1024), score (1), text_length (1), recency (1)] = 1027
enhanced_embeddings = np.column_stack([
    embeddings,
    scores_normalized,
    text_lengths_normalized,
    recency_scores
])

In [None]:
# Get current Tesla stock price
try:
    url = 'https://www.alphavantage.co/query?function=GLOBAL_QUOTE&symbol=TSLA&apikey=GR4S16Y4XV97PF9Z'
    response = requests.get(url)
    stock_data = response.json()
    
    if 'Global Quote' in stock_data:
        quote = stock_data['Global Quote']
        current_price = float(quote['05. price'])
        change = float(quote['09. change'])
        change_percent = quote['10. change percent'].rstrip('%')
        trading_day = quote['07. latest trading day']
    else:
        current_price = 250.0
        change = 0.0
        change_percent = "0.00"
        trading_day = datetime.now().strftime('%Y-%m-%d')
        
except Exception as e:
    current_price = 250.0
    change = 0.0
    change_percent = "0.00"
    trading_day = datetime.now().strftime('%Y-%m-%d')

In [None]:
# Make prediction
with torch.no_grad():
    embeddings_tensor = torch.tensor(enhanced_embeddings, dtype=torch.float32).unsqueeze(0)
    lengths_tensor = torch.tensor([enhanced_embeddings.shape[0]], dtype=torch.long)
    price_tensor = torch.tensor([[current_price]], dtype=torch.float32)
    
    embeddings_tensor = embeddings_tensor.to(device)
    lengths_tensor = lengths_tensor.to(device)
    price_tensor = price_tensor.to(device)
    
    predicted_price = model(embeddings_tensor, lengths_tensor, price_tensor).cpu().item()
    
    predicted_change = predicted_price - current_price
    direction = "steigt" if predicted_change > 0 else "fällt"
    
    raw_confidence = min(abs(predicted_change) * 2, 1.0)
    confidence = max(55, min(95, raw_confidence * 40 + 55))

In [None]:
# Create prediction data for dashboard
prediction_data = {
    "prediction": {
        "direction": direction,
        "confidence": f"{confidence:.0f}%"
    },
    "current_stock": {
        "price": current_price,
        "change": change,
        "change_percent": change_percent,
        "trading_day": trading_day
    },
    "reddit_posts": [],
    "timestamp": datetime.now().isoformat()
}

for _, post in tesla_df.iterrows():
    positive_words = ['good', 'great', 'excellent', 'bullish', 'up', 'rise', 'profit', 'beat']
    negative_words = ['bad', 'terrible', 'bearish', 'down', 'fall', 'loss', 'crash', 'drop']
    
    full_text = post.get('full_text', '')
    if pd.isna(full_text) or full_text is None:
        full_text = ''
    
    text_lower = str(full_text).lower()
    pos_count = sum(1 for word in positive_words if word in text_lower)
    neg_count = sum(1 for word in negative_words if word in text_lower)
    
    if pos_count > neg_count:
        sentiment = "positive"
    elif neg_count > pos_count:
        sentiment = "negative"
    else:
        sentiment = "neutral"
    
    title = post.get('title', '')
    if pd.isna(title) or title is None:
        title = 'Tesla discussion'
    title = str(title)
    
    text = post.get('text', '')
    if pd.isna(text) or text is None:
        text = ''
    text = str(text)
    
    prediction_data["reddit_posts"].append({
        "title": title[:80] + "..." if len(title) > 80 else title,
        "score": int(post['score']) if not pd.isna(post['score']) else 0,
        "sentiment": sentiment,
        "text_preview": text[:100] + "..." if len(text) > 100 else text
    })

os.makedirs('data', exist_ok=True)
with open('data/latest_prediction.json', 'w') as f:
    json.dump(prediction_data, f, indent=2)

print(f"🎯 Tesla {direction} (Confidence: {confidence:.0f}%)")