In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Load your dataset
df = pd.read_csv("sentiment_categorized_reviews.csv")

# Define a function to preprocess text data
def preprocess_text(text):
    # Tokenize words
    tokens = word_tokenize(text.lower())
    # Remove stopwords
    tokens = [token for token in tokens if token not in stopwords.words("english")]
    # Lemmatize tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return " ".join(tokens)

# Preprocess text data
df['Cleaned_Text'] = df['Text'].apply(preprocess_text)

# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df['Cleaned_Text'], df['Sentiment'], test_size=0.2, random_state=42)

# Define and train Naive Bayes model
nb_model = MultinomialNB()
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
nb_model.fit(X_train_tfidf, y_train)

# Define and train Support Vector Machines (SVM) model
svm_model = SVC(kernel='linear')
svm_model.fit(X_train_tfidf, y_train)

# Initialize VADER SentimentIntensityAnalyzer
vader_analyzer = SentimentIntensityAnalyzer()

# Function to calculate sentiment using VADER
def calculate_vader_sentiment(text):
    sentiment_score = vader_analyzer.polarity_scores(text)
    if sentiment_score['compound'] >= 0.05:
        return 'Positive'
    elif sentiment_score['compound'] <= -0.05:
        return 'Negative'
    else:
        return 'Neutral'

# Calculate VADER sentiment on test set
vader_predictions = X_test.apply(calculate_vader_sentiment)

from sklearn.metrics import precision_recall_fscore_support

# Function to evaluate model with handling for undefined metrics
def evaluate_model(model, X_test, y_test):
    predictions = model.predict(tfidf_vectorizer.transform(X_test))
    accuracy = accuracy_score(y_test, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, predictions, labels=['Negative', 'Positive', 'Neutral'], average=None, zero_division=1)
    return accuracy, precision, recall, f1


# Evaluate Naive Bayes model
nb_accuracy, nb_precision, nb_recall, nb_f1 = evaluate_model(nb_model, X_test, y_test)

# Evaluate SVM model
svm_accuracy, svm_precision, svm_recall, svm_f1 = evaluate_model(svm_model, X_test, y_test)

# Evaluate VADER model
vader_accuracy = accuracy_score(y_test, vader_predictions)
vader_precision, vader_recall, vader_f1, _ = precision_recall_fscore_support(y_test, vader_predictions, labels=['Negative', 'Positive', 'Neutral'], average=None)

# Display results for Naive Bayes model
print("Naive Bayes Model Metrics:")
print("Accuracy:", nb_accuracy)
print("Precision (Negative, Positive, Neutral):", nb_precision)
print("Recall (Negative, Positive, Neutral):", nb_recall)
print("F1 Score (Negative, Positive, Neutral):", nb_f1)

# Display results for SVM model
print("\nSupport Vector Machines (SVM) Model Metrics:")
print("Accuracy:", svm_accuracy)
print("Precision (Negative, Positive, Neutral):", svm_precision)
print("Recall (Negative, Positive, Neutral):", svm_recall)
print("F1 Score (Negative, Positive, Neutral):", svm_f1)

# Display results for VADER model
print("\nVADER Model Metrics:")
print("Accuracy:", vader_accuracy)
print("Precision (Negative, Positive, Neutral):", vader_precision)
print("Recall (Negative, Positive, Neutral):", vader_recall)
print("F1 Score (Negative, Positive, Neutral):", vader_f1)
