In [50]:
from transformers import BertModel, BertTokenizer
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertModel
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [51]:

# Load the dataset
df = pd.read_csv('/content/news_data_labelled.csv', encoding='utf-8')

# Combine title and text columns for more context
df['combined_text'] = df['Title'] + " " + df['Text']

# Map sentiment labels to numerical values
sentiment_mapping = {'positive': 1, 'negative': 0, 'neutral': 2}
df['Sentiment'] = df['Sentiment'].map(sentiment_mapping)

# Split the data
train_texts, test_texts, train_labels, test_labels = train_test_split(df['combined_text'], df['Sentiment'], test_size=0.3)


In [52]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

In [53]:
def extract_features(texts):
    inputs = tokenizer(texts.tolist(), padding=True, truncation=True, return_tensors="pt", max_length=512)
    outputs = model(**inputs)
    return outputs.last_hidden_state[:,0,:].detach().numpy()

# Extract features
train_features = extract_features(train_texts)
test_features = extract_features(test_texts)

In [54]:
# Train the classifier
clf = LogisticRegression(max_iter=1000)
clf.fit(train_features, train_labels)

In [55]:
# Predict on the test set
test_preds = clf.predict(test_features)

# Evaluate the predictions
print(classification_report(test_labels, test_preds, target_names=['Negative', 'Positive', 'Neutral']))

              precision    recall  f1-score   support

    Negative       0.54      0.50      0.52        14
    Positive       0.50      0.58      0.54        12
     Neutral       1.00      0.75      0.86         4

    accuracy                           0.57        30
   macro avg       0.68      0.61      0.64        30
weighted avg       0.58      0.57      0.57        30



In [None]:
# Predict on the test set
test_preds = clf.predict(test_features)

# Calculate overall accuracy
overall_accuracy = accuracy_score(test_labels, test_preds)

print("Overall Accuracy:", overall_accuracy)

In [61]:
def predict_sentiment(text):
    features = extract_features(pd.Series([text]))
    prediction = clf.predict(features)
    return 'Positive' if prediction[0] == 1 else ('Negative' if prediction[0] == 0 else 'Neutral')

Negative
