In [4]:

import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import torch
from tqdm import tqdm

In [5]:

df = pd.read_csv("vaccination_tweets.csv")  
df = df[['date', 'text']].dropna().copy()
df['date'] = pd.to_datetime(df['date']).dt.date


tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")

def analyze_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    probs = softmax(outputs.logits.numpy()[0])
    return {
        'positive': probs[0],
        'neutral': probs[1],
        'negative': probs[2]
    }

sentiments = df['text'].apply(analyze_sentiment)


sentiment_df = pd.DataFrame(sentiments.tolist())
df = pd.concat([df, sentiment_df], axis=1)


df['sentiment_label'] = df[['positive', 'neutral', 'negative']].idxmax(axis=1)


df.to_csv("pfizer_tweet_sentiment_finbert.csv", index=False)


print(df.head())


         date                                               text  positive  \
0  2020-12-20  Same folks said daikon paste could treat a cyt...  0.999988   
1  2020-12-13  While the world has been on the wrong side of ...  0.999205   
2  2020-12-12  #coronavirus #SputnikV #AstraZeneca #PfizerBio...  0.999989   
3  2020-12-12  Facts are immutable, Senator, even when you're...  0.999872   
4  2020-12-12  Explain to me again why we need a vaccine @Bor...  0.999771   

    neutral  negative sentiment_label  
0  0.000002  0.000010        positive  
1  0.000037  0.000758        positive  
2  0.000009  0.000002        positive  
3  0.000013  0.000115        positive  
4  0.000028  0.000201        positive  
