# **Food Review Sentiment Analysis**

#### **Connect Google Drive**

In [232]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#### **Import Libraries**

In [387]:
import numpy as np
import random
import matplotlib.pyplot as plt
import pandas as pd
import re
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Dropout,Embedding

#### **Load Dataset**

In [234]:
dataset = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/NLP Projects/NLP Dataset/Restaurant_Reviews.tsv",
                      delimiter="\t")

In [235]:
dataset.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


#### **Text Cleaning and Normalization**

In [236]:
dataset["Review"] = dataset["Review"].str.replace(
    r'[^a-zA-Z\s]', '',
    regex=True )

In [237]:
dataset["Review"] = dataset["Review"].str.lower()

In [238]:
dataset.head()

Unnamed: 0,Review,Liked
0,wow loved this place,1
1,crust is not good,0
2,not tasty and the texture was just nasty,0
3,stopped by during the late may bank holiday of...,1
4,the selection on the menu was great and so wer...,1


In [239]:
reviews = dataset["Review"].to_list()

In [240]:
len(reviews)

1000

#### **Tokenization and Vocabulary Building**

In [241]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(reviews)


tokenized_data = tokenizer.texts_to_sequences(reviews)
tokenized_vocab = tokenizer.word_index

In [242]:
vocab_size = len(tokenized_vocab)+1
max_review_len = max(len(review) for review in tokenized_data)

In [243]:
print(f"vocab size = {vocab_size}")
print(f"maximum review length = {max_review_len}")

vocab size = 2051
maximum review length = 32


In [244]:
padded_tokenized_data = pad_sequences(tokenized_data,maxlen=max_review_len,padding='post')

#### **Model Architecture**

In [245]:
embedded_vector_size = 5
model = Sequential()
model.add(Embedding(input_dim = vocab_size,
                    output_dim = embedded_vector_size,
                    input_length = max_review_len , name="embedding"))
model.add(Flatten())
model.add(Dropout(0.5))
model.add(Dense(1,activation="sigmoid"))



In [246]:
X = padded_tokenized_data
y = dataset["Liked"]

#### **Model Compilation**

In [247]:
model.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])
model.build((None, max_review_len))
model.summary()

#### **Splitting Data into Training and Testing Sets**

In [248]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

#### **Model Training**

In [249]:
model.fit(X_train,y_train,epochs=50)

Epoch 1/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5283 - loss: 0.6930
Epoch 2/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5728 - loss: 0.6897
Epoch 3/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6286 - loss: 0.6849 
Epoch 4/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6599 - loss: 0.6819 
Epoch 5/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7326 - loss: 0.6749
Epoch 6/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7587 - loss: 0.6678 
Epoch 7/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8183 - loss: 0.6591
Epoch 8/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8402 - loss: 0.6510 
Epoch 9/50
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[

<keras.src.callbacks.history.History at 0x7c68fc658a40>

#### **Model Evaluation**

In [250]:
loss,accuracy=model.evaluate(X_test,y_test)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6649 - loss: 0.5840  


#### **Review Preprocessing for Prediction**

In [251]:
def preprocess_review(review) :

  review = re.sub(r'[^a-zA-Z\s]','',review)
  review = review.lower()

  tokenized_review_text = tokenizer.texts_to_sequences([review])
  padded_review_text = pad_sequences(tokenized_review_text, maxlen=max_review_len , padding='post')

  return padded_review_text

In [252]:
def prediction_test() :
  idx = random.randint(0,len(reviews)-1)
  review_text = preprocess_review(reviews[idx])
  predicted_sentiment = model.predict(review_text)[0][0]


  print(f"review text : {reviews[idx]}")
  if(predicted_sentiment >= 0.5):
    print("Predicted Sentiment : Liked")
    predicted_value = 1
  else :
    print("Predicted Sentiment : Not Liked")
    predicted_value = 0

  true_sentiment = y[idx]
  if(true_sentiment == 0):
    print("True Sentiment : Not Liked")
  else :
    print("True Sentiment : Liked")

  if(predicted_value == 0 and true_sentiment == 0):
    print("Correct Prediction - Not Liked")
  elif(predicted_value == 1 and true_sentiment == 1):
    print("Correct Prediction - Liked")
  else :
    print("Wrong Prediction")



#### **Sentiment Prediction on Random Review**

In [271]:
prediction_test()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
review text : the staff is always super friendly and helpful which is especially cool when you bring two small boys and a baby
Predicted Sentiment : Liked
True Sentiment : Liked
Correct Prediction - Liked


In [291]:
prediction_test()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
review text : if that bug never showed up i would have given a  for sure but on the other side of the wall where this bug was climbing was the kitchen
Predicted Sentiment : Not Liked
True Sentiment : Not Liked
Correct Prediction - Not Liked


In [349]:
prediction_test()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
review text : they could serve it with just the vinaigrette and it may make for a better overall dish but it was still very good
Predicted Sentiment : Liked
True Sentiment : Liked
Correct Prediction - Liked


# **Word Embeddings**

In [374]:
embedding_matrix = model.get_layer('embedding').get_weights()[0]

In [419]:
reviews[:10]

['wow loved this place',
 'crust is not good',
 'not tasty and the texture was just nasty',
 'stopped by during the late may bank holiday off rick steve recommendation and loved it',
 'the selection on the menu was great and so were the prices',
 'now i am getting angry and i want my damn pho',
 'honeslty it didnt taste that fresh',
 'the potatoes were like rubber and you could tell they had been made up ahead of time being kept under a warmer',
 'the fries were great too',
 'a great touch']

#### **Word Similarity using Cosine Similarity**

In [409]:
def find_word_similarity(first_word, second_word):

    # Check if both words exist in vocabulary
    if first_word not in tokenizer.word_index:
        print(f"'{first_word}' not found in vocabulary")
        return

    if second_word not in tokenizer.word_index:
        print(f"'{second_word}' not found in vocabulary")
        return

    # Get word indices
    first_idx = tokenizer.word_index[first_word]
    second_idx = tokenizer.word_index[second_word]

    # Get embedding vectors
    embedding_matrix = model.get_layer("embedding").get_weights()[0]
    first_vector = embedding_matrix[first_idx]
    second_vector = embedding_matrix[second_idx]

    # Compute cosine similarity
    similarity = cosine_similarity(
        first_vector.reshape(1, -1),
        second_vector.reshape(1, -1)
    )[0][0]

    print(f"Cosine similarity between '{first_word}' and '{second_word}': {similarity}")


In [415]:
find_word_similarity("worst","poor")

Cosine similarity between 'worst' and 'poor': 0.9760047197341919


In [416]:
find_word_similarity("best","great")

Cosine similarity between 'best' and 'great': 0.6989485621452332


In [417]:
find_word_similarity("nice","great")

Cosine similarity between 'nice' and 'great': 0.6568199992179871


In [418]:
find_word_similarity("good","great")

Cosine similarity between 'good' and 'great': 0.9135222434997559
