In [1]:
import pandas as pd

# Load the data
data = pd.read_excel("D:/IDS 506 Healthcare/project/Reddit_Final_Output.xlsx")

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.nn.functional import softmax
import torch

In [4]:
import warnings

# Suppress specific deprecation warnings
warnings.filterwarnings("ignore", category=UserWarning, module='transformers')

In [5]:
# Load tokenizer and model
model_name = "bhadresh-savani/bert-base-uncased-emotion"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

In [6]:
# Define a function to process each title and predict emotional scores
def get_emotion_scores(text):
    # Ensure text is a string
    if not isinstance(text, str):
        return "Invalid input type"  # or handle as needed
    # Encode text and send to model
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        logits = model(**inputs).logits
    # Calculate softmax to get probabilities
    probabilities = softmax(logits, dim=1)
    # Convert probabilities to list and assign to respective emotions
    emotions = ['sadness', 'joy', 'anger', 'surprise', 'fear', 'love']  # adjust based on your model's output
    return dict(zip(emotions, probabilities[0].numpy()))

In [7]:
# Apply the function to each title
data['Emotion Scores'] = data['Full_Post'].apply(get_emotion_scores)