In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score,classification_report


In [2]:
#a="Restaurant_Reviews.tsv"
data=pd.read_csv("Restaurant_Reviews.tsv",sep="\t")

In [3]:
import random

In [4]:
data.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


random assign

In [5]:
categories = ['service', 'food', 'ambiance', 'value', 'overall']


In [6]:
data['Category'] = [random.choice(categories) for _ in range(len(data))]


In [7]:
df_labeled = pd.DataFrame(data)


In [8]:
df_labeled

Unnamed: 0,Review,Liked,Category
0,Wow... Loved this place.,1,service
1,Crust is not good.,0,food
2,Not tasty and the texture was just nasty.,0,value
3,Stopped by during the late May bank holiday of...,1,service
4,The selection on the menu was great and so wer...,1,service
...,...,...,...
995,I think food should have flavor and texture an...,0,service
996,Appetite instantly gone.,0,food
997,Overall I was not impressed and would not go b...,0,ambiance
998,"The whole experience was underwhelming, and I ...",0,value


In [9]:
X = data['Review']
y = data['Category']

In [10]:
vectorizer = TfidfVectorizer(stop_words='english', lowercase=True)
X_tfidf = vectorizer.fit_transform(X)


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.5, random_state=12)


In [12]:
model =  MultinomialNB()
model.fit(X_train, y_train)


In [13]:
y_pred = model.predict(X_test)


In [14]:
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))


Classification Report:
              precision    recall  f1-score   support

    ambiance       0.25      0.17      0.20       102
        food       0.20      0.14      0.16       102
     overall       0.14      0.13      0.13        94
     service       0.15      0.16      0.15        95
       value       0.28      0.45      0.34       107

    accuracy                           0.21       500
   macro avg       0.20      0.21      0.20       500
weighted avg       0.21      0.21      0.20       500

Accuracy: 0.212


In [18]:
df_labeled

Unnamed: 0,Review,Liked,Category
0,Wow... Loved this place.,1,service
1,Crust is not good.,0,food
2,Not tasty and the texture was just nasty.,0,value
3,Stopped by during the late May bank holiday of...,1,service
4,The selection on the menu was great and so wer...,1,service
...,...,...,...
995,I think food should have flavor and texture an...,0,service
996,Appetite instantly gone.,0,food
997,Overall I was not impressed and would not go b...,0,ambiance
998,"The whole experience was underwhelming, and I ...",0,value


In [16]:
print(data.head())

                                              Review  Liked Category
0                           Wow... Loved this place.      1  service
1                                 Crust is not good.      0     food
2          Not tasty and the texture was just nasty.      0    value
3  Stopped by during the late May bank holiday of...      1  service
4  The selection on the menu was great and so wer...      1  service


2

In [24]:

import nltk
import pandas as pd
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans


In [25]:
nltk.download('stopwords')


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [26]:
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

In [27]:
def preprocess_text(text):
    t = re.sub(r'\W', ' ', text)
    t = re.sub(r'\s+', ' ', text)
    t = text.lower()
    words = t.split()
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    return ' '.join(words)

In [29]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [30]:
data['Review'] = data['Review'].apply(preprocess_text)


In [31]:
vectorizer = TfidfVectorizer(max_features=500)
X = vectorizer.fit_transform(data['Review'])

In [32]:
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X)

In [33]:
labels = kmeans.labels_
cluster_names = {0: 'Food', 1: 'Service', 2: 'Ambiance'}

In [34]:
data['cluster'] = [cluster_names[label] for label in labels]
print(data[['Review', 'cluster']])

                                                Review   cluster
0                                  wow... loved place.  Ambiance
1                                          crust good.  Ambiance
2                                 tasty texture nasty.   Service
3    stopped late may bank holiday rick steve recom...   Service
4                         selection menu great prices.   Service
..                                                 ...       ...
995                 think food flavor texture lacking.   Service
996                           appetite instantly gone.   Service
997                   overall impressed would go back.   Service
998  whole experience underwhelming, think we'll go...   Service
999  then, wasted enough life there, poured salt wo...   Service

[1000 rows x 2 columns]


In [35]:
for cluster in data['cluster'].unique():
    print("Cluster:",cluster)
    print(data[data['cluster'] == cluster]['Review'].tolist())
    print("\n")

Cluster: Ambiance
['wow... loved place.', 'crust good.', 'place worth time, let alone vegas.', "that's right....the red velvet cake.....ohhh stuff good.", 'found place accident could happier.', 'seems like good quick place grab bite familiar pub food, favor look elsewhere.', 'overall, like place lot.', 'ample portion good prices.', 'deal good enough would drag establishment again.', 'hard judge whether side good grossed melted styrofoam want eat fear getting sick.', 'burger good beef, cooked right.', 'chow mein good!', 'place receives star appetizers!!!', 'glad found place.', '- really, really good rice, time.', 'guess known place would suck, inside excalibur, use common sense.', 'sweet potato fry good seasoned well.', "today second time i've lunch buffet pretty good.", 'much good food vega feel cheated wasting eating opportunity going rice company.', 'walked place smelled like old grease trap 2 others eating.', 'place it!', 'good.', 'side, cafe serf really good food.', "good thing wai