In [3]:
import pandas as pd
from transformers import pipeline, RobertaTokenizer, RobertaForSequenceClassification
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import torch

# Step 1: Load your dataset
# Assuming the dataset is in CSV format and located at 'dataset.csv'
# You can adjust this path accordingly if you are loading from a different source
df = pd.read_csv('test.csv',encoding='latin1')

# Step 2: Preprocess data
# Extract relevant columns: 'text' (tweet) and 'sentiment' (label)
X = df['text'].tolist()
y = df['sentiment'].map({'positive': 2, 'neutral': 1, 'negative': 0})  # Map sentiment labels to numerical values: 0=negative, 1=neutral, 2=positive



In [4]:
# Step 3: Load RoBERTa Twitter-based model
model_name = "cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = RobertaTokenizer.from_pretrained(model_name)
model = RobertaForSequenceClassification.from_pretrained(model_name)

In [5]:
# Step 4: Enable GPU if available
device = 0 if torch.cuda.is_available() else -1
print(f"Using {'GPU' if device == 0 else 'CPU'} for inference.")

# Step 5: Define sentiment analysis pipeline
classifier = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer, device=device)

# Step 6: Evaluate the model on the dataset
predictions = []
for result in classifier(X):
    if result['label'] == 'LABEL_2':
        predictions.append(2)  # Positive
    elif result['label'] == 'LABEL_1':
        predictions.append(1)  # Neutral
    else:
        predictions.append(0)  # Negative


Using CPU for inference.


KeyboardInterrupt: 

In [None]:
# Step 7: Calculate accuracy and other metrics
accuracy = accuracy_score(y, predictions)
f1 = f1_score(y, predictions, average='weighted')  # 'weighted' accounts for label imbalance
precision = precision_score(y, predictions, average='weighted')  # 'weighted' accounts for label imbalance
recall = recall_score(y, predictions, average='weighted')  # 'weighted' accounts for label imbalance

# Step 8: Display results
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
