In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

In [4]:
data = pd.read_csv("C:/Users/Azar/Downloads/projects/ayup/movie_reviews.csv")

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data['review'], data['sentiment'], test_size=0.2, random_state=42)

In [6]:
# Feature extraction using the bag-of-words model
vectorizer = CountVectorizer(stop_words='english')
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)

In [7]:
# Train Naive Bayes model
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)
nb_preds = nb_model.predict(X_test)

In [8]:
# Train SVM model
svm_model = LinearSVC()
svm_model.fit(X_train, y_train)
svm_preds = svm_model.predict(X_test)



In [9]:
# Evaluate the models
print('Naive Bayes accuracy:', accuracy_score(y_test, nb_preds))
print('SVM accuracy:', accuracy_score(y_test, svm_preds))

print('Naive Bayes precision:', precision_score(y_test, nb_preds, pos_label='positive'))
print('SVM precision:', precision_score(y_test, svm_preds, pos_label='positive'))

print('Naive Bayes recall:', recall_score(y_test, nb_preds, pos_label='positive'))
print('SVM recall:', recall_score(y_test, svm_preds, pos_label='positive'))

print('Naive Bayes F1-score:', f1_score(y_test, nb_preds, pos_label='positive'))
print('SVM F1-score:', f1_score(y_test, svm_preds, pos_label='positive'))

Naive Bayes accuracy: 0.8565
SVM accuracy: 0.8621
Naive Bayes precision: 0.8721602643535729
SVM precision: 0.861803084223013
Naive Bayes recall: 0.8380631077594761
SVM recall: 0.8650525897995635
Naive Bayes F1-score: 0.8547717842323652
SVM F1-score: 0.8634247796375161
