In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")

# Set the model to use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Load your forex news dataset
news_df = pd.read_excel('/content/EURUSD_news.xlsx')

# Make sure there are no NaNs in the text data
news_df['articleBody'] = news_df['articleBody'].fillna('')

# Tokenization and sentiment analysis function
def get_sentiment_score(text):
    # Truncate and pad the input text to the model's maximum length
    inputs = tokenizer(text, padding='max_length', truncation=True, max_length=512, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        scores = outputs.logits[0].cpu().numpy()
    return scores

# Apply sentiment scoring
news_df['sentiment_scores'] = news_df['articleBody'].apply(get_sentiment_score)

# Adjusted sentiment mapping function to prioritize 'Very Positive'
def map_to_sentiment_class(scores):
    score = scores[2] - scores[0]  # Positive - Negative score for classification
    if score <= -1.0:
        return 'Very Negative'
    elif score <= -0.3:  # Slightly adjust the threshold for 'Negative'
        return 'Negative'
    elif score <= 0.3:   # Narrow the range for 'Neutral'
        return 'Neutral'
    elif score <= 0.8:   # Slightly expand the threshold for 'Positive'
        return 'Positive'
    else:
        return 'Very Positive'

# Apply the mapping function
news_df['sentiment'] = news_df['sentiment_scores'].apply(map_to_sentiment_class)

# Display the first few rows with predicted sentiment
print(news_df[['title', 'sentiment']].head())

# Analyze sentiment distribution
sentiment_counts = news_df['sentiment'].value_counts()
print(sentiment_counts)

# Save the results to a new Excel file
output_file_path = '/content/forex_sentiment_analysis.xlsx'
news_df.to_excel(output_file_path, index=False)

print(f"Sentiment analysis complete. Results saved to {output_file_path}.")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

                                               title      sentiment
0  EUR/USD pulls back on Friday, still heads for ...  Very Positive
1  EUR/USD: Fed should help for another visit to ...  Very Positive
2  EUR/USD: Wider US-DE yield spreads and risk-of...  Very Negative
3  Forex Today: A hectic weeks kicks off with a s...  Very Negative
4      EUR/USD probing lows near 1.1720 ahead of IFO       Negative
sentiment
Very Positive    2391
Very Negative    2264
Negative         1262
Neutral           826
Positive          670
Name: count, dtype: int64
Sentiment analysis complete. Results saved to /content/forex_sentiment_analysis.xlsx.
