# Tesla Stock Prediction from Reddit Sentiment

Dieses Notebook trainiert ein Machine Learning Modell, um basierend auf Reddit Posts über Tesla zu prognostizieren, ob der Aktienkurs steigt oder fällt.

## Workflow:
1. Daten laden (Reddit Posts + Tesla Kursdaten)
2. Labels erstellen (Kurs steigt/fällt nach Post)
3. Features vorbereiten (Embeddings + Sentiment)
4. Modell trainieren
5. Evaluation

## 1. Import Libraries und Daten laden

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, accuracy_score
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings('ignore')

# Plotting setup
plt.style.use('default')
sns.set_palette("husl")

print("✅ Alle Libraries erfolgreich importiert!")

In [None]:
# Kombinierte Daten mit Embeddings, Sentiment und Upvotes laden
try:
    # Lade die komplette Datei mit allem drin
    data = np.load('data/complet_embedding.npy', allow_pickle=True)
    
    # Falls es ein Dictionary ist, extrahiere die Komponenten
    if isinstance(data.item(), dict):
        data_dict = data.item()
        embeddings = data_dict.get('embeddings')
        reddit_data = data_dict.get('reddit_data')  # Sentiment, Text, Upvotes, Datum
        stock_data = data_dict.get('stock_data', None)  # Optional: Kursdaten
        
        print(f"📊 Kombinierte Daten geladen:")
        print(f"   🧠 Embeddings: {embeddings.shape if embeddings is not None else 'Keine'}")
        print(f"   📝 Reddit Posts: {len(reddit_data)} Einträge")
        if stock_data is not None:
            print(f"   📈 Kursdaten: {len(stock_data)} Einträge")
    else:
        # Falls es nur ein Array ist, nehmen wir an dass es nur Embeddings sind
        embeddings = data
        reddit_data = None
        stock_data = None
        print(f"🧠 Nur Embeddings geladen: {embeddings.shape}")
        
except Exception as e:
    print(f"❌ Fehler beim Laden von complet_embedding.npy: {e}")
    print("📋 Erstelle Mock-Daten für Demo...")
    
    # Fallback: Mock-Daten erstellen
    n_samples = 1000
    embedding_dim = 1024
    
    embeddings = np.random.randn(n_samples, embedding_dim)
    
    # Mock Reddit-Daten
    reddit_data = {
        'text': [f"Tesla post {i}" for i in range(n_samples)],
        'finbert_sentiment': np.random.uniform(-1, 1, n_samples),
        'vader_sentiment': np.random.uniform(-1, 1, n_samples),
        'upvotes': np.random.randint(1, 100, n_samples),
        'created': pd.date_range(start='2012-01-01', periods=n_samples, freq='H')
    }
    
    stock_data = None
    print(f"✅ Mock-Daten erstellt: {n_samples} Posts mit {embedding_dim}D Embeddings")

In [None]:
# Datenübersicht der kombinierten Daten
print("\n=== KOMBINIERTE DATEN ÜBERSICHT ===")

if reddit_data is not None:
    # Falls Reddit-Daten als Dictionary vorliegen
    if isinstance(reddit_data, dict):
        reddit_df = pd.DataFrame(reddit_data)
    else:
        # Falls es schon ein DataFrame ist
        reddit_df = reddit_data
        
    print("Reddit Posts:")
    print(reddit_df.head())
    print(f"\nSpalten: {list(reddit_df.columns)}")
    
    if 'created' in reddit_df.columns:
        print(f"Datum-Range: {reddit_df['created'].min()} bis {reddit_df['created'].max()}")
    
    if 'finbert_sentiment' in reddit_df.columns:
        print(f"FinBERT Sentiment Range: {reddit_df['finbert_sentiment'].min():.3f} bis {reddit_df['finbert_sentiment'].max():.3f}")
    
    if 'upvotes' in reddit_df.columns:
        print(f"Upvotes Range: {reddit_df['upvotes'].min()} bis {reddit_df['upvotes'].max()}")
        
else:
    print("⚠️ Keine separaten Reddit-Daten verfügbar")
    # Erstelle minimales DataFrame für Mock-Daten
    reddit_df = pd.DataFrame({
        'text': [f"Mock Tesla post {i}" for i in range(len(embeddings))],
        'finbert_sentiment': np.random.uniform(-1, 1, len(embeddings)),
        'vader_sentiment': np.random.uniform(-1, 1, len(embeddings)),
        'upvotes': np.random.randint(1, 100, len(embeddings)),
        'created': pd.date_range(start='2012-01-01', periods=len(embeddings), freq='H')
    })
    print("📝 Mock Reddit DataFrame erstellt")

if stock_data is not None:
    print("\n=== TESLA KURSDATEN ÜBERSICHT ===")
    if isinstance(stock_data, dict):
        stock_df = pd.DataFrame(stock_data)
    else:
        stock_df = stock_data
    print(stock_df.head())
    print(f"\nSpalten: {list(stock_df.columns)}")
else:
    stock_df = None

print(f"\n🧠 Embeddings Shape: {embeddings.shape}")
print(f"📊 Reddit Posts: {len(reddit_df)}")

## 2. Datenvorverarbeitung und Label-Erstellung

In [None]:
# Reddit Posts vorbereiten
reddit_df['created'] = pd.to_datetime(reddit_df['created'])
reddit_df['date'] = reddit_df['created'].dt.date

# Sortieren nach Datum
reddit_df = reddit_df.sort_values('created').reset_index(drop=True)

print(f"Reddit Posts von {reddit_df['date'].min()} bis {reddit_df['date'].max()}")

# Überprüfe verfügbare Sentiment-Spalten
sentiment_cols = []
if 'finbert_sentiment' in reddit_df.columns:
    sentiment_cols.append('finbert_sentiment')
    print(f"FinBERT Sentiment Range: {reddit_df['finbert_sentiment'].min():.3f} bis {reddit_df['finbert_sentiment'].max():.3f}")

if 'vader_sentiment' in reddit_df.columns:
    sentiment_cols.append('vader_sentiment')
    print(f"VADER Sentiment Range: {reddit_df['vader_sentiment'].min():.3f} bis {reddit_df['vader_sentiment'].max():.3f}")

if 'upvotes' in reddit_df.columns:
    print(f"Upvotes Range: {reddit_df['upvotes'].min()} bis {reddit_df['upvotes'].max()}")

print(f"📊 Verfügbare Sentiment-Features: {sentiment_cols}")

In [None]:
# Falls keine echten Kursdaten vorhanden sind, erstellen wir Mock-Daten
if stock_df is None:
    print("⚠️ Erstelle Mock Tesla-Kursdaten für Demo...")
    
    # Erstelle realistische Kursdaten basierend auf dem Zeitraum der Reddit Posts
    start_date = reddit_df['date'].min()
    end_date = reddit_df['date'].max()
    
    # Erstelle tägliche Daten
    date_range = pd.date_range(start=start_date, end=end_date, freq='D')
    
    # Simuliere Tesla-Kursdaten (basierend auf historischen Mustern)
    np.random.seed(42)
    base_price = 100  # Startpreis
    prices = [base_price]
    
    for i in range(1, len(date_range)):
        # Random Walk mit leichtem Aufwärtstrend
        change = np.random.normal(0.001, 0.03)  # 0.1% Trend, 3% Volatilität
        new_price = prices[-1] * (1 + change)
        prices.append(max(new_price, 10))  # Mindestpreis 10$
    
    stock_df = pd.DataFrame({
        'date': date_range,
        'close': prices,
        'open': [p * (1 + np.random.normal(0, 0.01)) for p in prices],
        'high': [p * (1 + abs(np.random.normal(0, 0.02))) for p in prices],
        'low': [p * (1 - abs(np.random.normal(0, 0.02))) for p in prices]
    })
    
    print(f"✅ Mock-Kursdaten erstellt: {len(stock_df)} Tage")
else:
    # Echte Kursdaten verarbeiten
    # Annahme: erste Spalte ist Datum, dann OHLC
    if 'date' not in stock_df.columns:
        date_col = stock_df.columns[0]
        stock_df['date'] = pd.to_datetime(stock_df[date_col])
    else:
        stock_df['date'] = pd.to_datetime(stock_df['date'])

stock_df['date'] = stock_df['date'].dt.date
stock_df = stock_df.sort_values('date').reset_index(drop=True)

print(f"📈 Tesla Kursdaten: {stock_df['date'].min()} bis {stock_df['date'].max()}")
print(stock_df.head())

In [None]:
# Labels erstellen: Steigt der Kurs X Tage nach dem Reddit Post?
PREDICTION_DAYS = 3  # Vorhersage für 3 Tage in die Zukunft

def create_labels(reddit_df, stock_df, prediction_days=3):
    """
    Erstellt Labels für jeden Reddit Post:
    1 = Kurs steigt in den nächsten X Tagen
    0 = Kurs fällt oder bleibt gleich
    """
    labels = []
    valid_indices = []
    
    # Stock-Daten als Dict für schnellen Zugriff
    stock_dict = dict(zip(stock_df['date'], stock_df['close']))
    
    for idx, row in reddit_df.iterrows():
        post_date = row['date']
        future_date = post_date + timedelta(days=prediction_days)
        
        # Preis am Tag des Posts
        current_price = stock_dict.get(post_date)
        
        # Preis X Tage später
        future_price = stock_dict.get(future_date)
        
        # Falls beide Preise verfügbar sind
        if current_price is not None and future_price is not None:
            # Label: 1 wenn Kurs steigt, 0 wenn fällt/gleich
            label = 1 if future_price > current_price else 0
            labels.append(label)
            valid_indices.append(idx)
    
    return labels, valid_indices

# Labels erstellen
labels, valid_indices = create_labels(reddit_df, stock_df, PREDICTION_DAYS)

print(f"✅ Labels erstellt für {len(labels)} Posts")
print(f"📊 Verteilung - Steigt: {sum(labels)} ({sum(labels)/len(labels)*100:.1f}%), Fällt: {len(labels)-sum(labels)} ({(len(labels)-sum(labels))/len(labels)*100:.1f}%)")

In [None]:
# Dataset für Training vorbereiten
train_reddit = reddit_df.iloc[valid_indices].copy()
train_labels = np.array(labels)

# Embeddings entsprechend filtern
train_embeddings = embeddings[valid_indices]
print(f"🧠 Embeddings gefiltert: {train_embeddings.shape}")

print(f"📝 Trainingsdaten: {len(train_reddit)} Samples")
print(f"📊 Features: Embeddings + Sentiment + Upvotes + Zeit-Features")
print(f"🎯 Labels: {len(train_labels)} (Kurs steigt/fällt)")

## 3. Feature Engineering

In [None]:
# Features zusammenstellen
def prepare_features(reddit_df, embeddings):
    """
    Bereitet Features für das Machine Learning vor
    Kombiniert Embeddings mit zusätzlichen Meta-Features
    """
    features = []
    feature_names = []
    
    # 1. Text Embeddings (Hauptfeatures)
    features.append(embeddings)
    feature_names.extend([f'embed_{i}' for i in range(embeddings.shape[1])])
    
    # 2. Sentiment Features (falls verfügbar)
    sentiment_features = []
    if 'finbert_sentiment' in reddit_df.columns:
        sentiment_features.append(reddit_df['finbert_sentiment'].values)
        feature_names.append('finbert_sentiment')
    
    if 'vader_sentiment' in reddit_df.columns:
        sentiment_features.append(reddit_df['vader_sentiment'].values)
        feature_names.append('vader_sentiment')
    
    if len(sentiment_features) > 0:
        sentiment_array = np.column_stack(sentiment_features)
        features.append(sentiment_array)
    
    # 3. Engagement Features
    if 'upvotes' in reddit_df.columns:
        upvotes = reddit_df['upvotes'].values.reshape(-1, 1)
        features.append(upvotes)
        feature_names.append('upvotes')
    elif 'score' in reddit_df.columns:
        # Fallback auf 'score' falls 'upvotes' nicht existiert
        score = reddit_df['score'].values.reshape(-1, 1)
        features.append(score)
        feature_names.append('reddit_score')
    
    # 4. Text-basierte Features
    if 'text' in reddit_df.columns:
        text_length = reddit_df['text'].str.len().values.reshape(-1, 1)
        features.append(text_length)
        feature_names.append('text_length')
    
    # 5. Zeitbasierte Features
    reddit_df['hour'] = pd.to_datetime(reddit_df['created']).dt.hour
    reddit_df['weekday'] = pd.to_datetime(reddit_df['created']).dt.weekday
    time_features = np.column_stack([
        reddit_df['hour'].values,
        reddit_df['weekday'].values
    ])
    features.append(time_features)
    feature_names.extend(['hour', 'weekday'])
    
    # Alle Features kombinieren
    X = np.concatenate(features, axis=1)
    
    return X, feature_names

# Features erstellen
X, feature_names = prepare_features(train_reddit, train_embeddings)
y = train_labels

print(f"✅ Features erstellt: {X.shape}")
print(f"📊 Feature-Aufbau:")
print(f"   • Embeddings: {train_embeddings.shape[1]}D")
print(f"   • Meta-Features: {len(feature_names) - train_embeddings.shape[1]}")
print(f"   • Gesamt: {len(feature_names)} Features")
print(f"🎯 Labels: {y.shape}, Positive Klasse: {y.mean():.2%}")

## 4. Modell Training

In [None]:
# Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"📊 Training: {X_train.shape[0]} Samples")
print(f"📊 Test: {X_test.shape[0]} Samples")

# Features skalieren
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("✅ Features skaliert")

In [None]:
# Verschiedene Modelle trainieren
models = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42, max_depth=10)
}

results = {}

for name, model in models.items():
    print(f"\n🚀 Trainiere {name}...")
    
    # Training
    if name == 'Logistic Regression':
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
        y_prob = model.predict_proba(X_test_scaled)[:, 1]
    else:
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1]
    
    # Metriken berechnen
    accuracy = accuracy_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_prob)
    
    results[name] = {
        'model': model,
        'accuracy': accuracy,
        'roc_auc': roc_auc,
        'predictions': y_pred,
        'probabilities': y_prob
    }
    
    print(f"✅ {name}: Accuracy = {accuracy:.3f}, ROC-AUC = {roc_auc:.3f}")

print("\n🏆 Training abgeschlossen!")

## 5. Evaluation und Visualisierung

In [None]:
# Detaillierte Evaluation
for name, result in results.items():
    print(f"\n{'='*50}")
    print(f"📊 EVALUATION: {name}")
    print(f"{'='*50}")
    
    print(f"Accuracy: {result['accuracy']:.3f}")
    print(f"ROC-AUC: {result['roc_auc']:.3f}")
    
    print("\nClassification Report:")
    print(classification_report(y_test, result['predictions']))
    
    print("\nConfusion Matrix:")
    cm = confusion_matrix(y_test, result['predictions'])
    print(cm)

In [None]:
# Visualisierungen
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Tesla Stock Prediction - Model Evaluation', fontsize=16, fontweight='bold')

# 1. Modell-Vergleich
model_names = list(results.keys())
accuracies = [results[name]['accuracy'] for name in model_names]
roc_aucs = [results[name]['roc_auc'] for name in model_names]

x = np.arange(len(model_names))
width = 0.35

axes[0,0].bar(x - width/2, accuracies, width, label='Accuracy', alpha=0.8)
axes[0,0].bar(x + width/2, roc_aucs, width, label='ROC-AUC', alpha=0.8)
axes[0,0].set_xlabel('Models')
axes[0,0].set_ylabel('Score')
axes[0,0].set_title('Model Performance Comparison')
axes[0,0].set_xticks(x)
axes[0,0].set_xticklabels(model_names)
axes[0,0].legend()
axes[0,0].grid(True, alpha=0.3)

# 2. Confusion Matrix (bestes Modell)
best_model_name = max(results.keys(), key=lambda x: results[x]['roc_auc'])
cm = confusion_matrix(y_test, results[best_model_name]['predictions'])
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[0,1])
axes[0,1].set_title(f'Confusion Matrix - {best_model_name}')
axes[0,1].set_xlabel('Predicted')
axes[0,1].set_ylabel('Actual')

# 3. Sentiment vs. Predictions
test_indices = X_test[:, 0]  # FinBERT Sentiment (erstes Feature)
colors = ['red' if pred == 1 else 'blue' for pred in results[best_model_name]['predictions']]
axes[1,0].scatter(test_indices, results[best_model_name]['probabilities'], c=colors, alpha=0.6)
axes[1,0].set_xlabel('FinBERT Sentiment')
axes[1,0].set_ylabel('Prediction Probability')
axes[1,0].set_title('Sentiment vs. Prediction Probability')
axes[1,0].grid(True, alpha=0.3)

# 4. Feature Importance (falls Random Forest das beste Modell ist)
if best_model_name == 'Random Forest':
    importances = results[best_model_name]['model'].feature_importances_
    # Top 10 wichtigste Features
    top_indices = np.argsort(importances)[-10:]
    top_features = [feature_names[i] for i in top_indices]
    top_importances = importances[top_indices]
    
    axes[1,1].barh(range(len(top_features)), top_importances)
    axes[1,1].set_yticks(range(len(top_features)))
    axes[1,1].set_yticklabels(top_features)
    axes[1,1].set_xlabel('Feature Importance')
    axes[1,1].set_title('Top 10 Most Important Features')
else:
    axes[1,1].text(0.5, 0.5, 'Feature Importance\nnur für Random Forest\nverfügbar', 
                   ha='center', va='center', transform=axes[1,1].transAxes)
    axes[1,1].set_title('Feature Importance')

plt.tight_layout()
plt.show()

print(f"\n🏆 Bestes Modell: {best_model_name} (ROC-AUC: {results[best_model_name]['roc_auc']:.3f})")

## 6. Prediction Function für neue Posts

In [None]:
def predict_stock_movement(text, sentiment_finbert=None, sentiment_vader=None, upvotes=1, 
                          model_name='best', embedding=None):
    """
    Prognostiziert die Kursbewegung für einen neuen Reddit Post
    
    Args:
        text: Reddit Post Text
        sentiment_finbert: FinBERT Sentiment Score (optional)
        sentiment_vader: VADER Sentiment Score (optional)
        upvotes: Anzahl Upvotes (default: 1)
        model_name: Welches Modell verwenden ('best' oder spezifischer Name)
        embedding: Embedding des Textes (falls nicht vorhanden, wird Dummy verwendet)
    
    Returns:
        dict: Prognose und Wahrscheinlichkeit
    """
    from datetime import datetime
    
    # Bestes Modell auswählen
    if model_name == 'best':
        model_name = max(results.keys(), key=lambda x: results[x]['roc_auc'])
    
    model = results[model_name]['model']
    
    # Features erstellen
    now = datetime.now()
    features = []
    
    # 1. Embedding (Hauptfeature)
    if embedding is not None:
        features.extend(embedding)
    else:
        # Dummy Embedding verwenden (Durchschnitt der Trainings-Embeddings)
        dummy_embedding = np.mean(train_embeddings, axis=0)
        features.extend(dummy_embedding)
    
    # 2. Sentiment Features (falls vorhanden)
    if sentiment_finbert is not None:
        features.append(sentiment_finbert)
    if sentiment_vader is not None:
        features.append(sentiment_vader)
    
    # 3. Engagement
    features.append(upvotes)
    
    # 4. Text-Length
    features.append(len(text))
    
    # 5. Zeit-Features
    features.extend([now.hour, now.weekday()])
    
    # Features als Array
    X_new = np.array(features).reshape(1, -1)
    
    # Skalierung (falls Logistic Regression)
    if model_name == 'Logistic Regression':
        X_new = scaler.transform(X_new)
    
    # Prognose
    prediction = model.predict(X_new)[0]
    probability = model.predict_proba(X_new)[0]
    
    return {
        'prediction': 'STEIGT' if prediction == 1 else 'FÄLLT',
        'confidence': probability[prediction],
        'probability_steigt': probability[1],
        'probability_fällt': probability[0],
        'model_used': model_name
    }

# Test der Prediction Function
test_prediction = predict_stock_movement(
    text="Tesla is absolutely killing it! Revolutionary technology!",
    sentiment_finbert=0.8,
    sentiment_vader=0.7,
    upvotes=50
)

print("\n🔮 BEISPIEL-PROGNOSE:")
print(f"Text: 'Tesla is absolutely killing it! Revolutionary technology!'")
print(f"Prognose: {test_prediction['prediction']}")
print(f"Konfidenz: {test_prediction['confidence']:.1%}")
print(f"Wahrscheinlichkeit Steigt: {test_prediction['probability_steigt']:.1%}")
print(f"Wahrscheinlichkeit Fällt: {test_prediction['probability_fällt']:.1%}")
print(f"Verwendetes Modell: {test_prediction['model_used']}")

# Test mit negativem Sentiment
test_prediction_neg = predict_stock_movement(
    text="Tesla is overvalued and will crash soon!",
    sentiment_finbert=-0.6,
    sentiment_vader=-0.4,
    upvotes=5
)

print("\n🔮 NEGATIVES BEISPIEL:")
print(f"Text: 'Tesla is overvalued and will crash soon!'")
print(f"Prognose: {test_prediction_neg['prediction']}")
print(f"Konfidenz: {test_prediction_neg['confidence']:.1%}")
print(f"Wahrscheinlichkeit Steigt: {test_prediction_neg['probability_steigt']:.1%}")
print(f"Wahrscheinlichkeit Fällt: {test_prediction_neg['probability_fällt']:.1%}")

## 7. Zusammenfassung und nächste Schritte

In [None]:
print("\n" + "="*60)
print("🎯 ZUSAMMENFASSUNG TESLA STOCK PREDICTION MODEL")
print("="*60)

print(f"\n📊 DATEN:")
print(f"   • Reddit Posts: {len(train_reddit)}")
print(f"   • Embedding Dimension: {train_embeddings.shape[1]}D")
print(f"   • Zusätzliche Features: {len(feature_names) - train_embeddings.shape[1]}")
print(f"   • Gesamt Features: {X.shape[1]}")
print(f"   • Prediction Window: {PREDICTION_DAYS} Tage")
print(f"   • Positive Labels (Kurs steigt): {y.mean():.1%}")

print(f"\n🏆 BESTE PERFORMANCE:")
best_model = max(results.keys(), key=lambda x: results[x]['roc_auc'])
print(f"   • Modell: {best_model}")
print(f"   • Accuracy: {results[best_model]['accuracy']:.1%}")
print(f"   • ROC-AUC: {results[best_model]['roc_auc']:.3f}")

print(f"\n🔧 FEATURES IN COMPLET_EMBEDDING:")
print(f"   • Text Embeddings ({train_embeddings.shape[1]}D) - Hauptfeatures")
if 'finbert_sentiment' in reddit_df.columns:
    print(f"   • FinBERT Sentiment")
if 'vader_sentiment' in reddit_df.columns:
    print(f"   • VADER Sentiment")
if 'upvotes' in reddit_df.columns:
    print(f"   • Reddit Upvotes")
elif 'score' in reddit_df.columns:
    print(f"   • Reddit Score")
print(f"   • Text-Properties (Länge)")
print(f"   • Zeitbasiert (Stunde, Wochentag)")

print(f"\n🚀 NÄCHSTE SCHRITTE:")
print(f"   1. Mehr historische Tesla-Kursdaten für bessere Labels")
print(f"   2. Ensemble Methods mit mehreren Modellen")
print(f"   3. Hyperparameter Tuning (GridSearch)")
print(f"   4. Cross-Validation für robustere Evaluation")
print(f"   5. Feature Selection/Dimensionality Reduction")
print(f"   6. Real-time Pipeline mit neuen Reddit Posts")
print(f"   7. Backtesting mit simuliertem Trading")

print(f"\n✅ Model erfolgreich mit complet_embedding trainiert!")
print("💡 Du kannst jetzt die predict_stock_movement() Funktion verwenden.")
print("🎯 Das Modell nutzt die mächtigen Embeddings als Hauptfeatures!")