In [63]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModel
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [64]:
# Load the dataset
df = pd.read_csv('/content/news_data_labelled.csv', encoding='utf-8')
df['combined_text'] = df['Title'] + " " + df['Text']
sentiment_mapping = {'positive': 1, 'negative': 0, 'neutral': 2}
df['Sentiment'] = df['Sentiment'].map(sentiment_mapping)
train_texts, test_texts, train_labels, test_labels = train_test_split(df['combined_text'], df['Sentiment'], test_size=0.3)


In [65]:
tokenizer = AutoTokenizer.from_pretrained('yiyanghkust/finbert-tone')
model = AutoModel.from_pretrained('yiyanghkust/finbert-tone')

vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

In [66]:
def extract_features(texts):
    inputs = tokenizer(texts.tolist(), padding=True, truncation=True, return_tensors="pt", max_length=512)
    outputs = model(**inputs)
    return outputs.last_hidden_state[:,0,:].detach().numpy()

train_features = extract_features(train_texts)
test_features = extract_features(test_texts)


In [67]:
clf = LogisticRegression(max_iter=1000)
clf.fit(train_features, train_labels)

In [68]:
test_preds = clf.predict(test_features)
print(classification_report(test_labels, test_preds, target_names=['Negative', 'Positive', 'Neutral']))
overall_accuracy = accuracy_score(test_labels, test_preds)
print("Overall Accuracy:", overall_accuracy)

              precision    recall  f1-score   support

    Negative       0.73      0.73      0.73        11
    Positive       0.73      0.79      0.76        14
     Neutral       0.75      0.60      0.67         5

    accuracy                           0.73        30
   macro avg       0.74      0.70      0.72        30
weighted avg       0.73      0.73      0.73        30

Overall Accuracy: 0.7333333333333333


In [69]:
def predict_sentiment(text):
    features = extract_features(pd.Series([text]))
    prediction = clf.predict(features)
    return 'Positive' if prediction[0] == 1 else ('Negative' if prediction[0] == 0 else 'Neutral')

example_text = "Your example news text here"
print(predict_sentiment(example_text))

Neutral
