In [1]:
# Import libraries
import os

import pandas as pd
import numpy as np

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from torch.nn import Softmax

In [2]:
df_news = pd.read_csv(os.path.join('..', 'data', 'RedditNews_preprocessed.csv'))

In [None]:
# Check if a GPU is available and if not, use a CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load pre-trained model and tokenizer (FinBERT from ProsusAI)
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

# Move the model to the GPU if one is available
model = model.to(device)

# Put model in evaluation mode
model.eval()

def get_sentiment_FinBert(text):
    inputs = tokenizer(str(text), return_tensors="pt")
    
    # Move the inputs to the GPU if one is available
    inputs = inputs.to(device)

    # Predict the sentiment
    with torch.no_grad():
        outputs = model(**inputs)

    # Apply softmax to the output to get probabilities
    probs = Softmax(dim=1)(outputs.logits)
    
    # Move probs to cpu
    probs = probs.cpu()
    
    # Convert tensor to list and return separate values
    probs = probs[0].tolist()
    return probs[0], probs[1], probs[2]

# Apply the function to the headlines
df_news['sentiment_FinBert_positiv'],df_news['sentiment_FinBert_negative'],df_news['sentiment_FinBert_neutral'] = df_news['News'].apply(get_sentiment_FinBert)

In [None]:
df_news.to_csv(os.path.join('..', 'data', 'RedditNews_FinBert.csv'))