# **Restaurant Review Sentiment Analyzer**

# Import Libraries and Load Data

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import torch
import torch.nn as nn
import torch.optim as optim

# Download necessary NLTK data
nltk.download('stopwords')
nltk.download('wordnet')

# Load the dataset
dataset = pd.read_csv('Restaurant_Reviews.tsv', delimiter='\t', quoting=3)

ModuleNotFoundError: No module named 'pandas'

# Preprocess the Data

In [None]:
lemmatizer = WordNetLemmatizer()
corpus = []
for i in range(len(dataset)):
    review = re.sub('[^a-zA-Z]', ' ', dataset['Review'][i])
    review = review.lower()
    review = review.split()
    review = [lemmatizer.lemmatize(word) for word in review if not word in set(stopwords.words('english'))]
    review = ' '.join(review)
    corpus.append(review)

# Create the TF-IDF model
tfidf = TfidfVectorizer(max_features=1500, ngram_range=(1, 2))
X = tfidf.fit_transform(corpus).toarray()
y = dataset.iloc[:, 1].values

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Naive Bayes Model with Hyperparameter Tuning

In [None]:
# Hyperparameter tuning for Naive Bayes
parameters = {'alpha': [0.1, 0.5, 1.0]}
nb_classifier = MultinomialNB()
grid_search = GridSearchCV(estimator=nb_classifier, param_grid=parameters, scoring='accuracy', cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)
best_nb_classifier = grid_search.best_estimator_

y_pred_nb = best_nb_classifier.predict(X_test)

print("Naive Bayes Results:")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_nb))
print("Accuracy:", round(accuracy_score(y_test, y_pred_nb) * 100, 2), "%")
print("Precision:", round(precision_score(y_test, y_pred_nb), 2))
print("Recall:", round(recall_score(y_test, y_pred_nb), 2))

Naive Bayes Results:
Confusion Matrix:
 [[77 19]
 [28 76]]
Accuracy: 76.5 %
Precision: 0.8
Recall: 0.73


# TensorFlow Deep Learning Model

In [None]:
model_tf = Sequential([
    Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(8, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

model_tf.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history_tf = model_tf.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)

y_pred_tf = (model_tf.predict(X_test) > 0.5).astype("int32")

print("\nTensorFlow Deep Learning Results:")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_tf))
print("Accuracy:", round(accuracy_score(y_test, y_pred_tf) * 100, 2), "%")
print("Precision:", round(precision_score(y_test, y_pred_tf), 2))
print("Recall:", round(recall_score(y_test, y_pred_tf), 2))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# PyTorch Deep Learning Model

In [None]:
class SentimentNet(nn.Module):
    def __init__(self, input_dim):
        super(SentimentNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, 16)
        self.dropout1 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(16, 8)
        self.dropout2 = nn.Dropout(0.2)
        self.fc3 = nn.Linear(8, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = torch.sigmoid(self.fc3(x))
        return x

model_torch = SentimentNet(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model_torch.parameters())

X_train_torch = torch.FloatTensor(X_train)
y_train_torch = torch.FloatTensor(y_train)

for epoch in range(50):
    optimizer.zero_grad()
    outputs = model_torch(X_train_torch).squeeze()
    loss = criterion(outputs, y_train_torch)
    loss.backward()
    optimizer.step()

X_test_torch = torch.FloatTensor(X_test)
y_pred_torch = (model_torch(X_test_torch).squeeze() > 0.5).int().numpy()

print("\nPyTorch Deep Learning Results:")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_torch))
print("Accuracy:", round(accuracy_score(y_test, y_pred_torch) * 100, 2), "%")
print("Precision:", round(precision_score(y_test, y_pred_torch), 2))
print("Recall:", round(recall_score(y_test, y_pred_torch), 2))

# Visualize Positive and Negative Reviews

In [None]:
# Visualize the number of positive and negative reviews
positive_reviews = sum(y)
negative_reviews = len(y) - positive_reviews

plt.figure(figsize=(8, 6))
plt.bar(['Positive Reviews', 'Negative Reviews'], [positive_reviews, negative_reviews], color=['green', 'red'])
plt.title('Number of Positive and Negative Reviews')
plt.xlabel('Review Type')
plt.ylabel('Count')
plt.show()

# Word Cloud for Positive Reviews

In [None]:
positive_corpus = ' '.join([corpus[i] for i in range(len(corpus)) if y[i] == 1])

wordcloud_positive = WordCloud(width=800, height=400, background_color='white').generate(positive_corpus)

plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.imshow(wordcloud_positive, interpolation='bilinear')
plt.title('Word Cloud for Positive Reviews')
plt.axis('off')

plt.show()

# Word Cloud for Negative Reviews

In [None]:
negative_corpus = ' '.join([corpus[i] for i in range(len(corpus)) if y[i] == 0])

wordcloud_negative = WordCloud(width=800, height=400, background_color='black').generate(negative_corpus)

plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 2)
plt.imshow(wordcloud_negative, interpolation='bilinear')
plt.title('Word Cloud for Negative Reviews')
plt.axis('off')

plt.show()

# Model Comparison Visualization

In [None]:
models = ['Naive Bayes', 'TensorFlow DL', 'PyTorch DL']
accuracies = [
    accuracy_score(y_test, y_pred_nb),
    accuracy_score(y_test, y_pred_tf),
    accuracy_score(y_test, y_pred_torch)
]

plt.figure(figsize=(10, 6))
plt.bar(models, accuracies, color=['blue', 'green', 'red'])
plt.title('Model Comparison - Accuracy')
plt.xlabel('Models')
plt.ylabel('Accuracy')
plt.ylim(0, 1)
for i, v in enumerate(accuracies):
    plt.text(i, v, f'{v:.2f}', ha='center', va='bottom')
plt.show()