In [3]:
import pandas as pd
from vaderSentiment.vaderLexicon import Vader
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

# Define paths
data_path = "Restaurant_Reviews.tsv"
roberta_model_name = "cardiffnlp/roberta-base-finetuned-sst-2-english"

# Load data
df = pd.read_csv(data_path)
reviews = df["Review"]
liked = df["Liked"]

# Initialize Vader
vader_model = Vader()

# Initialize RoBERTa
roberta_tokenizer = AutoTokenizer.from_pretrained(roberta_model_name)
roberta_model = TFAutoModelForSequenceClassification.from_pretrained(roberta_model_name)

# Analyze sentiment with Vader and RoBERTa
vader_scores = []
roberta_scores = []
for review in reviews:
    vader_score = vader_model.polarity_scores(review)
    roberta_encoded_inputs = roberta_tokenizer(review, return_tensors="tf")
    roberta_output = roberta_model(**roberta_encoded_inputs)
    roberta_score = tf.nn.softmax(roberta_output.logits[0]).numpy()[1]
    vader_scores.append(vader_score)
    roberta_scores.append(roberta_score)

# Calculate accuracy for both models (assuming "Liked" is the target variable)
vader_accuracy = sum(liked == (vader_score["compound"] > 0)) / len(liked)
roberta_accuracy = sum(liked == (roberta_score > 0.5)) / len(liked)

# Print results
print("Vader Accuracy:", vader_accuracy)
print("RoBERTa Accuracy:", roberta_accuracy)

# (Optional) Analyze individual reviews and scores
for review, liked, vader_score, roberta_score in zip(reviews, liked, vader_scores, roberta_scores):
    print(f"Review: {review}")
    print(f"Liked: {liked}")
    print(f"Vader Score: {vader_score}")
    print(f"RoBERTa Score: {roberta_score}")
    print("---")


ModuleNotFoundError: No module named 'vaderSentiment.vaderLexicon'