In [4]:
import nltk
import pickle
import string
import pandas as pd
from random import shuffle

from nltk import FreqDist
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tag import pos_tag

In [5]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\norbe\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\norbe\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\norbe\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\norbe\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [6]:
stemmer = PorterStemmer()
wnl = WordNetLemmatizer()
eng_stopwords = stopwords.words('english')

dataset = pd.read_csv('updated_dataset.csv')
dataset.head()

Unnamed: 0,restaurant,text,label
0,Restaurant A,"Contrary to other reviews, I have zero complai...",positive
1,Restaurant B,Last summer I had an appointment to get new ti...,negative
2,Restaurant C,"Friendly staff, same starbucks fair you get an...",positive
3,Restaurant D,The food is good. Unfortunately the service is...,negative
4,Restaurant E,Even when we didn't have a car Filene's Baseme...,positive


In [7]:
def preprocessing(text):
    words = word_tokenize(text.lower())
    words = [stemmer.stem(wnl.lemmatize(word)) for word in words]

    return {word:True for word in words if word not in string.punctuation and word not in eng_stopwords and word.isalpha()}

In [8]:
def trainModel():
    feature_sets = [(preprocessing(text), label) for text,label in zip(dataset['text'], dataset['label'])]

    shuffle(feature_sets)

    idx = int(len(feature_sets) * 0.85)
    train_set, test_set = feature_sets[:idx], feature_sets[idx:]

    classifier = nltk.NaiveBayesClassifier.train(train_set)

    accuracy = nltk.classify.accuracy(classifier, test_set)
    print(f"Accuracy: {accuracy}")

    classifier.show_most_informative_features(5)

    file = open('model.pickle', 'wb')
    pickle.dump(classifier, file)
    file.close()

    return classifier

In [9]:
def readModel():
    try:
        file = open('model.pickle', 'rb')
        print("Model available, loading")
        classifier = pickle.load(file)
        file.close()
        print("success")
        file.close()
    except:
        print("file unavail")
        classifier = trainModel()
    return classifier

In [10]:
def writeReview():
    while True:
        review = input("Write Review [>= 2 words]: ")
        words = review.split()
        if len(words) > 1:
            print("added")
            return review
        else:
            print("more than 2")

In [16]:
def analyzeReview(review, classifier):
    if len(review) == 0:
        print("add review pls")
        return
    
    clean_review = [word for word in word_tokenize(review.lower()) if word not in string.punctuation and word not in eng_stopwords and word.isalpha()]

    result = classifier.classify(FreqDist(clean_review))

    print(f"Review: {review}")
    print(f"Category: {result}")

    return result


In [31]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def recommend_restaurant(review):
    corpus = dataset['text']
    restaurants = dataset['restaurant']

    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(corpus)
    query = tfidf.transform([review])

    tfidf_similarities = cosine_similarity(tfidf_matrix, query)

    df_tfidf = pd.DataFrame({
        'Doc': corpus,
        'Restaurant': restaurants,
        'Similarity': tfidf_similarities.flatten()
    })

    # recommendations = df_tfidf[df_tfidf['Similarity'] > 0.3]
    recommendations = df_tfidf

    top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(10)

    return top_recommendations[['Restaurant', 'Similarity']].reset_index(drop=True)

In [13]:
import spacy
import spacy.cli
from collections import defaultdict

# spacy.cli.download('en_core_web_sm')

nlp = spacy.load('en_core_web_sm')
ner_labels = nlp.get_pipe("ner").labels

def extract_ner():
    categorized_entities = defaultdict(set)

    for review in dataset['text']:
        doc = nlp(review)
        for ent in doc.ents:
            if ent.label_ in ["GPE", "LANGUAGE", "ORG"]:
                categorized_entities[ent.label_].add(ent.text)

    print("Categorized Named Entities")
    for category, entities in categorized_entities.items():
        print(f"{category}: {','.join(sorted(entities))}")


In [33]:
# Display labels with their explanations
for label in ner_labels:
    print(f"{label}: {spacy.explain(label)}")


CARDINAL: Numerals that do not fall under another type
DATE: Absolute or relative dates or periods
EVENT: Named hurricanes, battles, wars, sports events, etc.
FAC: Buildings, airports, highways, bridges, etc.
GPE: Countries, cities, states
LANGUAGE: Any named language
LAW: Named documents made into laws.
LOC: Non-GPE locations, mountain ranges, bodies of water
MONEY: Monetary values, including unit
NORP: Nationalities or religious or political groups
ORDINAL: "first", "second", etc.
ORG: Companies, agencies, institutions, etc.
PERCENT: Percentage, including "%"
PERSON: People, including fictional
PRODUCT: Objects, vehicles, foods, etc. (not services)
QUANTITY: Measurements, as of weight or distance
TIME: Times smaller than a day
WORK_OF_ART: Titles of books, songs, etc.


In [32]:
if __name__ == "__main__":
	# Read Model
	classifier = readModel()
	
	# Review
	review = ""
	
	# New
	sentiment = ""
 
	while True:
		print("Food Review Sentiment Analysis")
		print("Your Review: ", "No Review" if len(review) == 0 else review)
		print("Your Sentiment: ", "No Sentiment" if len(sentiment) == 0 else sentiment)
		print("1. Write your review")
		print("2. Analyze your review")
		print("3. View restaurant recommendation")
		print("4. View NER")
		print("5. Exit")
		print(">> ")
		choice = int(input(">> "))
		if (choice == 1):
			review = writeReview()
		elif (choice == 2):
			sentiment = analyzeReview(review, classifier)
		elif (choice == 3):
			if review:
				recommendations = recommend_restaurant(review)
				print("\nTop Restaurant Based on Your Review:")
				if recommendations.empty:
					print("No recommendations!")
				else:
					for idx, row in recommendations.iterrows():
						print(f"{idx+1}: {row['Restaurant']}")
						print(f"Similarity: {row['Similarity']:.4f}\n")
			else:
				print("Please write a review first.")
		elif (choice == 4):
			print("NER Menu")
			# Updated ! No need to pass the review arguments into these NER
			extract_ner()
		elif (choice == 5):
			print("Thanks for using this application!")
			break
		else:
			print("Please only choose the available menu [1-3]!")

Model available, loading
success
Food Review Sentiment Analysis
Your Review:  No Review
Your Sentiment:  No Sentiment
1. Write your review
2. Analyze your review
3. View restaurant recommendation
4. View NER
5. Exit
>> 
added
Food Review Sentiment Analysis
Your Review:  idk man
Your Sentiment:  No Sentiment
1. Write your review
2. Analyze your review
3. View restaurant recommendation
4. View NER
5. Exit
>> 

Top Restaurant Based on Your Review:
1: Restaurant E
Similarity: 0.1706

2: Restaurant E
Similarity: 0.1529

3: Restaurant E
Similarity: 0.1242

4: Restaurant C
Similarity: 0.1048

5: Restaurant C
Similarity: 0.0950

6: Restaurant A
Similarity: 0.0948

7: Restaurant E
Similarity: 0.0940

8: Restaurant C
Similarity: 0.0930

9: Restaurant C
Similarity: 0.0861

10: Restaurant D
Similarity: 0.0735

Food Review Sentiment Analysis
Your Review:  idk man
Your Sentiment:  No Sentiment
1. Write your review
2. Analyze your review
3. View restaurant recommendation
4. View NER
5. Exit
>> 
Thank