In [None]:
!pip install pandas requests_html transformers scipy

In [2]:
import pandas as pd
from requests_html import HTMLSession 
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
import os

In [3]:
current_dir = os.getcwd()
News_file_path = os.path.join(current_dir, 'Data files', 'News_Sentiments.csv')



In [4]:

roberta = "cardiffnlp/twitter-roberta-base-sentiment"
model = AutoModelForSequenceClassification.from_pretrained(roberta)
tokenizer = AutoTokenizer.from_pretrained(roberta)

labels = ['Negative', 'Neutral', 'Positive']

# Loading the file
news_df  = pd.read_csv(News_file_path, encoding='utf-8')

# Perform sentiment analysis
news_proc_list = []
sentiment_list = []

for headlines in news_df['Headlines']:
    news_words = []
    for word in headlines.split(' '):
        if word.startswith('http'):
            word = "http"
        news_words.append(word)

    news_proc = " ".join(news_words)
    news_proc_list.append(news_proc)

    encoder_news = tokenizer(news_proc, return_tensors='pt')
    output = model(**encoder_news)
    scores = output.logits.detach().numpy()[0]
    scores = softmax(scores)

    sentiment = {label: score for label, score in zip(labels, scores)}
    sentiment_list.append(sentiment)

# Append sentiment labels
sentiment_df = pd.DataFrame(sentiment_list)
news_df = pd.concat([news_df, sentiment_df], axis=1)

# Determine sentiment with the highest value
news_df['Actual Sentiment'] = news_df[labels].idxmax(axis=1)
news_df['Sentiment value'] = news_df[labels].max(axis=1)

# Save the updated DataFrame to a new file
output_file_path = "NewsSentiments.csv"  
news_df.to_csv(output_file_path, encoding='utf-8', index=False)
