In [1]:
# Import all the libraries needed
import nltk # Natural Language Processing Library
import pickle # For The Model
import string # For String Operations
import pandas as pd # For DataFrame

# Functions from the libraries
from nltk import FreqDist
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords, wordnet
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tag import pos_tag
from random import shuffle

In [2]:
# Download
nltk.download("stopwords")
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")
nltk.download("wordnet")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
# Setting variables
stemmer = PorterStemmer()
wnl = WordNetLemmatizer()
eng_stopwords = stopwords.words("english")

dataset = pd.read_csv("./dataset.csv")

In [4]:
def preprocessing(document):
	# Change into lowercase and tokenize
	words = word_tokenize((document).lower())
	
	# Lemmatizing and Stemming
	words = [wnl.lemmatize(word) for word in words]
	words = [stemmer.stem(word) for word in words]

	# Check if words are not in stop_words and only consists of alphabetic
	return {word: True for word in words if word not in eng_stopwords and word.isalpha()}

def trainModel():
	# dataset = pd.read_csv("updated_dataset.csv")

	features_sets = [(preprocessing(text), label) for text, label in zip(dataset["text"], dataset["label"])]

	shuffle(features_sets)

	split_index = int(len(features_sets) * .85)
	train_set, test_set = features_sets[:split_index], features_sets[split_index:]

	# Training the model (Naive Bayes)
	classifier = nltk.NaiveBayesClassifier.train(train_set)

	# Testing accuracy
	accuracy = nltk.classify.accuracy(classifier, test_set)
	print("Accuracy: ", accuracy)

	# Print 5 most informative features
	classifier.show_most_informative_features(5)

	# Save the trained model using pickle
	file = open("model.pickle", "wb")
	pickle.dump(classifier, file)
	file.close()

	return classifier

def readModel():
    # Check the model is available or not
    
    # If the model is available
	try:
		file = open("model.pickle", "rb") # Read Binary
		print("Model is available!")
		# Read Model
		print("Loading the model...")
		classifier = pickle.load(file)
		file.close()

		print("Model load succesfully!")
		classifier.show_most_informative_features(5)
  
	# Else (model unvailable)
	except:
		print("Model is not available!")
		print("Preparing for model training!")
		classifier = trainModel()
  
	return classifier

def writeReview():
    while True:
        review = input("Input your review [>= 2 words]: ")
        
        words = review.split()
        
        if len(words) > 1:
            print("Review added!")
            return review
        else:
            print("Your review must consisst of at least 2 words!")
     
def analyzeReview(review, classifier):
    if len(review) == 0:
        print("Review is empty!")
        return
    
	# Predict the review
    
    # Preprocessing to remove punctuation and eng_stopwords and tokenize it
    clean_review = [word for word in word_tokenize(review) if word not in string.punctuation and word not in eng_stopwords]
        
    # Preprocessing to lemmatize and stemming the words
    clean_review = [wnl.lemmatize(stemmer.stem(word)) for word in clean_review] 	
    
    result = classifier.classify(FreqDist(clean_review))
    
    print(f"Your Review: {review}")
    print(f"Review Category: {result}")
    
    return result

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def recommend_restaurant(review):
    corpus = dataset["text"]
    restaurants = dataset["restaurant"]
    
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(corpus)

    query_tfidf_v = tfidf_vectorizer.transform([review])

    tfidf_similarities = cosine_similarity(tfidf_matrix, query_tfidf_v)

    df_tfidf = pd.DataFrame({
        'Document': corpus,
        'Restaurant': restaurants,
        'Similarity': tfidf_similarities.flatten()
    })

    recommendations = df_tfidf[df_tfidf['Similarity'] > 0.3]

    top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(10)

    return top_recommendations[['Restaurant', 'Similarity']]

In [6]:
import spacy
import spacy.cli

# Do these if you guys didnt have the en_core things before
# spacy.cli.download('en_core_web_sm')

from collections import defaultdict
nlp = spacy.load("en_core_web_sm")
ner_labels = nlp.get_pipe("ner").labels
# Display the labels
# print("Named Entity Labels:")
# for label in ner_labels:
#     print(label)

# Updated : NER are constructed from the review text on the DATASET, NOT FROM THE USER!
def extract_ner():
    categorized_entities = defaultdict(set)

    # From the dataset, we have to for loop each review on the dataset, and execute NER for each review on that
    for review in dataset['text']:
        doc = nlp(review)
        for ent in doc.ents:
            if ent.label_ in ["GPE", "LANGUAGE", "ORG"]:
                categorized_entities[ent.label_].add(ent.text)

        print("CATEGORIZED NAMED ENTITIES:")
        for category, entities in categorized_entities.items():
            print(f"{category}: {', '.join(sorted(entities))}")

In [7]:
if __name__ == "__main__":
	# Read Model
	classifier = readModel()
	
	# Review
	review = ""
	
	# New
	sentiment = ""
 
	while True:
		print("Food Review Sentiment Analysis")
		print("Your Review: ", "No Review" if len(review) == 0 else review)
		print("Your Sentiment: ", "No Sentiment" if len(sentiment) == 0 else sentiment)
		print("1. Write your review")
		print("2. Analyze your review")
		print("3. View restaurant recommendation")
		print("4. View NER")
		print("5. Exit")
		print(">> ")
		choice = int(input(">> "))
		if (choice == 1):
			review = writeReview()
		elif (choice == 2):
			sentiment = analyzeReview(review, classifier)
		elif (choice == 3):
			if review:
				recommendations = recommend_restaurant(review)
				print("\nTop Restaurant Based on Your Review:")
				if recommendations.empty:
					print("No recommendations!")
				else:
					for idx, row in recommendations.iterrows():
						print(f"{idx+1}: {row['Restaurant']}")
						print(f"Similarity: {row['Similarity']:.4f}\n")
			else:
				print("Please write a review first.")
		elif (choice == 4):
			print("NER Menu")
			# Updated ! No need to pass the review arguments into these NER
			extract_ner()
		elif (choice == 5):
			print("Thanks for using this application!")
			break
		else:
			print("Please only choose the available menu [1-3]!")

Model is not available!
Preparing for model training!
Accuracy:  0.7349397590361446
Most Informative Features
                 terribl = True           negati : positi =     12.8 : 1.0
                 horribl = True           negati : positi =     10.2 : 1.0
                 fantast = True           positi : negati =      9.8 : 1.0
                 perfect = True           positi : negati =      8.3 : 1.0
                     low = True           negati : positi =      7.6 : 1.0
Food Review Sentiment Analysis
Your Review:  No Review
Your Sentiment:  No Sentiment
1. Write your review
2. Analyze your review
3. View restaurant recommendation
4. View NER
5. Exit
>> 
Review added!
Food Review Sentiment Analysis
Your Review:  Hello World
Your Sentiment:  No Sentiment
1. Write your review
2. Analyze your review
3. View restaurant recommendation
4. View NER
5. Exit
>> 
Review added!
Food Review Sentiment Analysis
Your Review:  Very nice
Your Sentiment:  No Sentiment
1. Write your review
2. An