In [17]:
# Importing necessary libraries
import pandas as pd  # For data manipulation and analysis
from sklearn.feature_extraction.text import CountVectorizer  # For converting text data into numerical vectors
from sklearn.model_selection import train_test_split  # For splitting dataset into training and testing sets
from sklearn.linear_model import LogisticRegression  # For Logistic Regression classifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB  # For Naive Bayes classifiers
from sklearn.neighbors import KNeighborsClassifier  # For K-Nearest Neighbors classifier
from sklearn.svm import SVC  # For Support Vector Machine classifier
from nltk.corpus import stopwords  # For stopwords
from nltk.stem.porter import PorterStemmer  # For stemming words
from sklearn.metrics import accuracy_score  # For calculating accuracy score
import re  # For regular expressions
import nltk  # Natural Language Toolkit

# Downloading stopwords if not already downloaded
nltk.download('stopwords')

# Read dataset
dataset = pd.read_csv('Restaurant_Review.tsv', delimiter='\t', quoting=3)

dataset.head()

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\svhar\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [16]:
# Preprocessing the dataset
corpus = []
for review in dataset['Review']:
    review = re.sub('[^a-zA-Z]', ' ', review)  # Removing non-alphabetic characters
    review = review.lower().split()  # Converting text to lowercase and splitting into words
    ps = PorterStemmer()  # Creating a PorterStemmer object for word stemming
    review = [ps.stem(word) for word in review if word not in set(stopwords.words('english'))]  # Stemming words and removing stopwords
    review = ' '.join(review)  # Joining the words back into a sentence
    corpus.append(review)  # Appending the preprocessed review to the corpus list



In [15]:
# Creating Bag of Words model
cv = CountVectorizer(max_features=1500)  # Initializing CountVectorizer with maximum 1500 features
X = cv.fit_transform(corpus).toarray()  # Converting text corpus into numerical vectors
y = dataset.iloc[:, 1].values  # Extracting the target variable (sentiment) from the dataset


In [13]:
# Splitting dataset into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)  # Splitting data into 80% training and 20% testing sets

# Initialize classifiers
classifiers = {
    "Logistic Regression": LogisticRegression(),  # Initializing Logistic Regression classifier
    "SVC": SVC(),  # Initializing Support Vector Machine classifier
    "GaussianNB": GaussianNB(),  # Initializing Gaussian Naive Bayes classifier
    "MultinomialNB": MultinomialNB(),  # Initializing Multinomial Naive Bayes classifier
    "KNeighborsClassifier": KNeighborsClassifier(n_neighbors=5)  # Initializing K-Nearest Neighbors classifier with k=5
}

# Train and evaluate classifiers
for name, classifier in classifiers.items():
    classifier.fit(X_train, y_train)  # Training the classifier
    y_pred = classifier.predict(X_test)  # Predicting on the test set
    accuracy = accuracy_score(y_test, y_pred)  # Calculating accuracy score
    print(f"{name} Accuracy: {accuracy * 100:.2f}%")  # Printing accuracy for each classifier


Logistic Regression Accuracy: 71.00%
SVC Accuracy: 73.50%
GaussianNB Accuracy: 73.00%
MultinomialNB Accuracy: 76.50%
KNeighborsClassifier Accuracy: 58.50%


In [14]:

# Function to predict sentiment of a sample review
def predict_sentiment(sample_review):
    ps = PorterStemmer()  # Creating a PorterStemmer object for word stemming
    sample_review = re.sub('[^a-zA-Z]', ' ', sample_review)  # Removing non-alphabetic characters
    sample_review = sample_review.lower().split()  # Converting text to lowercase and splitting into words
    sample_review = [ps.stem(word) for word in sample_review if word not in set(stopwords.words('english'))]  # Stemming words and removing stopwords
    sample_review = ' '.join(sample_review)  # Joining the words back into a sentence
    sample_vector = cv.transform([sample_review]).toarray()  # Converting sample review into numerical vector
    predictions = {}  # Dictionary to store predictions
    for name, classifier in classifiers.items():  # Looping through each classifier
        predictions[name] = classifier.predict(sample_vector)[0]  # Making prediction using the classifier
    return predictions  # Returning predictions

# Sample review for prediction
sample_review = "The food was amazing and the service was great!"

# Predicting sentiment of sample review
predictions = predict_sentiment(sample_review)  # Calling the predict_sentiment function
print("Sentiment Predictions for Sample Review:")
for name, prediction in predictions.items():  # Looping through each prediction
    print(f"{name}: {'Positive' if prediction == 1 else 'Negative'}")  # Printing sentiment prediction


Sentiment Predictions for Sample Review:
Logistic Regression: Positive
SVC: Positive
GaussianNB: Positive
MultinomialNB: Positive
KNeighborsClassifier: Positive
