In [62]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split as tts
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import nltk
from nltk.corpus import stopwords
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from google.colab import drive
drive.mount("/content/gdrive")

# Preprocessing
nltk.download('stopwords')
df = pd.read_csv("/content/gdrive/MyDrive/Restaurant_reviews.csv")
df.drop('7514', axis=1, inplace=True)
df.head()
df.dropna(inplace=True)
df['Rating'] = df['Rating'].replace(['Like'], '5')
df['Rating'] = df['Rating'].astype(float)
df['Review'] = df['Review'].astype(str)
df.info()

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
<class 'pandas.core.frame.DataFrame'>
Int64Index: 9955 entries, 0 to 9999
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Restaurant  9955 non-null   object 
 1   Reviewer    9955 non-null   object 
 2   Review      9955 non-null   object 
 3   Rating      9955 non-null   float64
 4   Metadata    9955 non-null   object 
 5   Time        9955 non-null   object 
 6   Pictures    9955 non-null   int64  
dtypes: float64(1), int64(1), object(5)
memory usage: 622.2+ KB


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [63]:
# Filtering the required data
df = df[['Review','Rating']].copy()
df.head()
def conv(row):
  if row['Rating'] >= 3.5:
    a = 2 # Positive
  elif 2.5 <= row['Rating'] < 3.5:
    a = 1 # Neutral
  elif row['Rating'] < 2.5:
    a = 0 # Negative
  return a

df['Rating'] = df.apply(conv, axis=1)
df



Unnamed: 0,Review,Rating
0,"The ambience was good, food was quite good . h...",2
1,Ambience is too good for a pleasant evening. S...,2
2,A must try.. great food great ambience. Thnx f...,2
3,Soumen das and Arun was a great guy. Only beca...,2
4,Food is good.we ordered Kodi drumsticks and ba...,2
...,...,...
9995,Madhumathi Mahajan Well to start with nice cou...,1
9996,This place has never disappointed us.. The foo...,2
9997,"Bad rating is mainly because of ""Chicken Bone ...",0
9998,I personally love and prefer Chinese Food. Had...,2


In [64]:
# Removing stopwords
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [65]:
def rem_stopwords(text):
  words = nltk.word_tokenize(text['Review'])
  filtered_words = [word for word in words if word.lower() not in stopwords.words('english')]
  return ' '.join(filtered_words)

In [66]:
df['Review'] = df.apply(rem_stopwords, axis=1)
df

Unnamed: 0,Review,Rating
0,"ambience good , food quite good . Saturday lun...",2
1,Ambience good pleasant evening . Service promp...,2
2,must try .. great food great ambience . Thnx s...,2
3,Soumen das Arun great guy . behavior sincerety...,2
4,Food good.we ordered Kodi drumsticks basket mu...,2
...,...,...
9995,Madhumathi Mahajan Well start nice courteous s...,1
9996,"place never disappointed us .. food , courteou...",2
9997,Bad rating mainly `` Chicken Bone found Veg fo...,0
9998,personally love prefer Chinese Food . couple t...,2


In [77]:
# Vectorizing text
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB, GaussianNB
x_train,x_test,y_train,y_test= tts(df["Review"].astype(str),df["Rating"].astype(str),test_size=0.2)

v = CountVectorizer(analyzer='word')
x_train = v.fit_transform(x_train)
x_test = v.transform(x_test)

In [78]:
model = MultinomialNB()
model.fit(x_train,y_train)


In [79]:
y_pred = model.predict(x_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

Accuracy: 0.82
Precision: 0.79
Recall: 0.82
F1-Score: 0.79
