In [33]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer


In [23]:
# ✅ Device Selection (MPS for Mac, CUDA for NVIDIA, fallback to CPU)
device: torch.device = torch.device(
    "mps" if torch.backends.mps.is_available() else 
    "cuda" if torch.cuda.is_available() else 
    "cpu"
)
print(f"Using device: {device}")

Using device: cpu


In [24]:
# Load data
data = pd.read_csv('data/sample_reviews.csv')

In [25]:
data.head(6)

Unnamed: 0,review,sentiment
0,This product is amazing! I love it.,positive
1,"Terrible experience, would not recommend.",negative
2,"Decent quality, but could be better.",neutral
3,Excellent value for money.,positive
4,Not worth the price.,negative
5,Very satisfied with the purchase.,positive


In [26]:
# Split data into training and testing sets
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [28]:
# Load model and tokenizer
model_name = 'distilbert-base-uncased-finetuned-sst-2-english'
model = DistilBertForSequenceClassification.from_pretrained(model_name)
tokenizer = DistilBertTokenizer.from_pretrained(model_name)

In [34]:
# Function to get sentiment from review
def get_sentiment(review):
    inputs = tokenizer(review, return_tensors="pt", max_length=512, truncation=True, padding=True)
    outputs = model(**inputs)
    sentiment = outputs.logits.argmax().item()
    if sentiment == 0:
        return "negative"
    elif sentiment == 1:
        return "neutral"
    else:
        return "positive"

In [35]:
# Apply the model to the test set
test_data['predicted_sentiment'] = test_data['review'].apply(get_sentiment)

In [38]:
# Evaluate the model
accuracy = accuracy_score(test_data['sentiment'], test_data['predicted_sentiment'])
report = classification_report(test_data['sentiment'], test_data['predicted_sentiment'])
print(f'Model Accuracy: {accuracy:.2f}')
print(f'Classification Report:\n{report}')

# Print sentences with their actual and predicted sentiments
for index, row in test_data.iterrows():
    print(f"Review: {row['review']}\nActual Sentiment: {row['sentiment']}\nPredicted Sentiment: {row['predicted_sentiment']}\n")


Model Accuracy: 0.25
Classification Report:
              precision    recall  f1-score   support

    negative       0.00      0.00      0.00         1
     neutral       0.00      0.00      0.00         2
    positive       0.25      1.00      0.40         1

    accuracy                           0.25         4
   macro avg       0.08      0.33      0.13         4
weighted avg       0.06      0.25      0.10         4

Review: This product is amazing! I love it.
Actual Sentiment: positive
Predicted Sentiment: positive

Review: Serviceable, but not outstanding.
Actual Sentiment: neutral
Predicted Sentiment: positive

Review: Neither good nor bad, just okay.
Actual Sentiment: neutral
Predicted Sentiment: positive

Review: Terrible experience, would not recommend.
Actual Sentiment: negative
Predicted Sentiment: positive



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
#