In [1]:
# Setup
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
import pandas as pd
from tqdm import tqdm
# Für VADER:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

# Optional: GPU nutzen
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Verwende:", device)

Verwende: cpu


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Tobia\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
# Modelle & Tokenizer laden
finbert_model_name = "ProsusAI/finbert"

tokenizer = AutoTokenizer.from_pretrained(finbert_model_name)
model = AutoModelForSequenceClassification.from_pretrained(finbert_model_name)
model.to(device)
model.eval()

vader_analyzer = SentimentIntensityAnalyzer()

In [3]:
# Daten laden und Texte zusammenführen
df = pd.read_csv("data/tesla_preprocessed.csv")
df["full_text"] = df["title"].fillna("") + " " + df["text"].fillna("")
texts = df["full_text"].tolist()

# Testen mit 100 zufälligen Einträgen
#sample_df = df.sample(n=100, random_state=42) 
#sample_df["full_text"] = sample_df["title"].fillna("") + " " + sample_df["text"].fillna("")
#sample_texts = sample_df["full_text"].tolist()

In [4]:
# Finbert Sentiment-Berechnungsfunktion (-1 bis 1) 
def compute_finbert_sentiment_scores(text_list):
    results = []

    for text in tqdm(text_list):
        # Tokenisierung eines einzelnen Textes
        inputs = tokenizer(text, return_tensors="pt", truncation=True)
        inputs = {key: val.to(device) for key, val in inputs.items()}

        with torch.no_grad():
            outputs = model(**inputs)
            probs = F.softmax(outputs.logits, dim=1)[0]  # [0] da nur ein Eintrag

        positive = probs[2].item()
        neutral = probs[1].item()
        negative = probs[0].item()
        score = positive - negative

        results.append({
            "positive": positive,
            "neutral": neutral,
            "negative": negative,
            "score": score
        })

    return results

In [5]:
# VADER Berechnungsfunktion
def compute_vader_sentiment_scores(text_list):
    results = []
    
    for text in tqdm(text_list):
        vader_scores = vader_analyzer.polarity_scores(text)
        compound_score = vader_scores["compound"]
        
        results.append(compound_score)
    
    return results

In [6]:
# Sentiment berechnen und Ergebnisse speichern
print("Finbert Sentiment wird berechnet...")
finbert_sentiment_results = compute_finbert_sentiment_scores(texts)
finbert_sentiment_df = pd.DataFrame(finbert_sentiment_results)

print("Vader Sentiment wird berechnet...")
vader_sentiment_scores = compute_vader_sentiment_scores(texts)

# In Original-DataFrame einfügen
df["finbert_sentiment"] = finbert_sentiment_df["score"]
df["vader_sentiment"] = vader_sentiment_scores

# Speichern mit beiden neuen Spalten
df.to_csv("data/tesla_sentiment_complete.csv", index=False)

print("✅ Fertig: FinBERT + VADER Scores gespeichert.")

Finbert Sentiment wird berechnet...


100%|████████████████████████████████████████████████████| 13268/13268 [36:53<00:00,  5.99it/s]


Vader Sentiment wird berechnet...


100%|██████████████████████████████████████████████████| 13268/13268 [00:12<00:00, 1046.16it/s]


✅ Fertig: FinBERT + VADER Scores gespeichert.


In [7]:
# TESTS
testtext1="tesla $ 2.9 billion supply deal .. nice news deleted"
testtext2="tesla $ 2.9 billion supply deal nice news"

test_finbert_sentiment1 = compute_finbert_sentiment_scores([testtext1])
test_finbert_sentiment2 = compute_finbert_sentiment_scores([testtext2])

test_vader_sentiment1 = compute_vader_sentiment_scores([testtext1])
test_vader_sentiment2 = compute_vader_sentiment_scores([testtext2])

print(test_finbert_sentiment1[0])
print(test_vader_sentiment1[0])
print(test_finbert_sentiment2[0])
print(test_vader_sentiment2[0])

100%|████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 30.74it/s]
100%|████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 18.64it/s]
100%|███████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 999.83it/s]
100%|████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]

{'positive': 0.43987175822257996, 'neutral': 0.018384816125035286, 'negative': 0.5417434573173523, 'score': -0.10187169909477234}
0.4215
{'positive': 0.8279392719268799, 'neutral': 0.03641020506620407, 'negative': 0.13565054535865784, 'score': 0.692288726568222}
0.4215



