In [3]:
import torch
print(torch.__version__)  # Muss mindestens 2.6.0 sein


2.7.1+cu126


In [4]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from torch.nn.functional import softmax
import torch
from tqdm import tqdm


In [5]:
# === 1. Lade den originalen, bereits vorverarbeiteten DataFrame ===
df = pd.read_csv('data/tesla_preprocessed.csv')

# === 2. FinBERT vorbereiten ===
finbert_model_name = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(finbert_model_name)
model = AutoModelForSequenceClassification.from_pretrained(finbert_model_name)

model.eval()  # Inference-Modus
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# === 3. VADER vorbereiten ===
vader_analyzer = SentimentIntensityAnalyzer()

tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

In [6]:
# === 4. Neue Spalten für Sentiment-Labels ===
df['finbert_sentiment'] = ''
df['finbert_score'] = 0.0
df['vader_compound'] = 0.0
df['vader_sentiment'] = ''

# === 5. Texte zusammenfassen (z. B. Titel + Text) ===
df['full_text'] = df['title'].fillna('') + ' ' + df['text'].fillna('')

In [7]:
# === 6. Funktion: FinBERT Sentiment berechnen ===
def get_finbert_sentiment(text):
    try:
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
        inputs = {k: v.to(device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = model(**inputs)
            probs = softmax(outputs.logits, dim=1).cpu().numpy()[0]
            label_idx = probs.argmax()
            labels = ["negative", "neutral", "positive"]
            return labels[label_idx], float(probs[label_idx])
    except Exception as e:
        return "error", 0.0

In [8]:
# === 7. Funktion: VADER Sentiment berechnen ===
def get_vader_sentiment(text):
    scores = vader_analyzer.polarity_scores(text)
    compound = scores['compound']
    label = 'positive' if compound > 0.05 else 'negative' if compound < -0.05 else 'neutral'
    return compound, label

In [9]:
# === 8. Sentimentberechnung durchführen ===
print("Starte Sentimentanalyse...")

for i, row in tqdm(df.iterrows(), total=len(df)):
    text = row['full_text']
    
    # FinBERT
    label, score = get_finbert_sentiment(text)
    df.at[i, 'finbert_sentiment'] = label
    df.at[i, 'finbert_score'] = score

    # VADER
    compound, vader_label = get_vader_sentiment(text)
    df.at[i, 'vader_compound'] = compound
    df.at[i, 'vader_sentiment'] = vader_label

print("Sentimentanalyse abgeschlossen!")

Starte Sentimentanalyse...


  0%|                                                                                                      | 4/13268 [00:00<43:16,  5.11it/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 13268/13268 [35:41<00:00,  6.20it/s]

Sentimentanalyse abgeschlossen!





In [10]:
# === 10. Kombiniertes Sentiment berechnen ===

def combine_sentiments(finbert, vader):
    if "error" in [finbert, vader] or vader not in {"positive", "neutral", "negative"}:
        return "unknown"
    
    if finbert == "positive" and vader == "positive":
        return "strong_positive"
    elif finbert == "negative" and vader == "negative":
        return "strong_negative"
    elif finbert == "neutral" and vader == "neutral":
        return "neutral"
    elif finbert == "positive" and vader == "negative":
        return "mixed"
    elif finbert == "negative" and vader == "positive":
        return "mixed"
    elif finbert == "positive" or vader == "positive":
        return "positive"
    elif finbert == "negative" or vader == "negative":
        return "negative"
    else:
        return "neutral"

# Neue Spalte erzeugen
df['combined_sentiment'] = df.apply(lambda row: combine_sentiments(row['finbert_sentiment'], row['vader_sentiment']), axis=1)


In [11]:
# === 9. Ergebnisse speichern ===
output_path = 'data/tesla_sentiment.csv'
df.to_csv(output_path, index=False)
print(f"Daten mit Sentiment gespeichert unter: {output_path}")


Daten mit Sentiment gespeichert unter: data/tesla_sentiment.csv
