# Sentiment Generation

In [8]:
import pandas as pd
import numpy as np
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.nn.functional import softmax
import json

# Load FinBERT
def load_finbert():
    tokenizer = BertTokenizer.from_pretrained("yiyanghkust/finbert-tone")
    model = BertForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
    return tokenizer, model

# Get sentiment score for each article
def get_sentiment(text, tokenizer, model):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs).logits.numpy()[0]
    scores = softmax(torch.tensor(outputs), dim=0).numpy()
    return scores[2] - scores[0]  # Positive - Negative Sentiment Score

# Process all news data to compute sentiment score for each day
def process_sentiment(news_data, tokenizer, model):
    sentiment_scores = []
    for date, articles in news_data.items():
        print(date, end = "\r")
        scores = []
        for article in articles:
            # text = " ".join([article.get("title", ""), article.get("description", ""), article.get("content", "")])
            text = article.get("title", "")
            score = get_sentiment(text, tokenizer, model)
            scores.append(score)
        # Average score for the day (or default to 0 if no articles)
        sentiment_scores.append({
            'date': date,
            'sentiment_score': np.mean(scores) if scores else 0.0
        })
    return sentiment_scores

# ----- Load News Data and Save Sentiment Scores to CSV -----
with open('newsData.json', 'r') as f:
    news_data = json.load(f)

# Load FinBERT
tokenizer, finbert_model = load_finbert()

# Get sentiment scores for all days in the news data
sentiment_scores = process_sentiment(news_data, tokenizer, finbert_model)

# Convert sentiment scores to a DataFrame and save to CSV
sentiment_df = pd.DataFrame(sentiment_scores)
sentiment_df.to_csv('sentiment_scores_title.csv', index=False)

print("Sentiment scores saved to sentiment_scores_title.csv")


Sentiment scores saved to sentiment_scores_title.csv
